def test_always_false_entry_filter(self, mock_user_agent, mock_master_idx_file):
     quarterly_filing = QuarterlyFilings(year=1993,
                                         quarter=4,
                                         entry_filter=lambda _: False,
                                         user_agent=mock_user_agent)
     urls = quarterly_filing.get_urls()
     assert len(urls) == 0
Example #2
0
 def __init__(self,
              start_date: date,
              end_date: date,
              user_agent: Union[str, None] = None,
              client=None,
              entry_filter=lambda _: True,
              balancing_point=30,
              **kwargs):
     self.entry_filter = entry_filter
     self.start_date = start_date
     self.end_date = end_date
     self.user_agent = user_agent
     self.quarterly = QuarterlyFilings(year=self.start_date.year,
                                       quarter=get_quarter(self.start_date),
                                       user_agent=user_agent,
                                       client=client,
                                       entry_filter=self.entry_filter,
                                       **kwargs)
     self.daily = DailyFilings(date=self.start_date,
                               user_agent=user_agent,
                               client=client,
                               entry_filter=self.entry_filter,
                               **kwargs)
     self.balancing_point = balancing_point
     self._recompute()
Example #3
0
    def get_urls(self):
        """Get all urls between ``start_date`` and ``end_date``."""
        # Use functools.reduce for speed
        # see https://stackoverflow.com/questions/10461531/merge-and-sum-of-two-dictionaries
        def _reducer(accumulator, dictionary):
            for key, value in dictionary.items():
                accumulator[key] = accumulator.get(key, []) + value
            return accumulator

        list_of_dicts = []
        for (year, quarter, f) in self.quarterly_date_list:
            q = QuarterlyFilings(year=year,
                                 quarter=quarter,
                                 user_agent=self.user_agent,
                                 client=self.client,
                                 entry_filter=lambda x: f(x) and self.entry_filter(x))
            list_of_dicts.append(q.get_urls())

        for _date in self.daily_date_list:
            d = DailyFilings(date=_date,
                             user_agent=self.user_agent,
                             client=self.client,
                             entry_filter=self.entry_filter)
            try:
                list_of_dicts.append(d.get_urls())
            except EDGARQueryError:  # continue if no URLs available for given day
                continue

        complete_dictionary = reduce(_reducer, list_of_dicts, {})
        return complete_dictionary
Example #4
0
 def test_save(self, tmp_data_directory, mock_filing_data,
               mock_quarterly_quarter_directory, mock_master_idx_file,
               mock_filing_response, subdir, file):
     quarterly_filing = QuarterlyFilings(year=1993, quarter=4)
     quarterly_filing.save(tmp_data_directory)
     subdir = os.path.join("1993", "QTR4", subdir)
     path_to_check = os.path.join(tmp_data_directory, subdir, file)
     assert os.path.exists(path_to_check)
Example #5
0
    def save(self,
             directory,
             dir_pattern=None,
             file_pattern="{accession_number}",
             download_all=False,
             daily_date_format="%Y%m%d"):
        """Save all filings between ``start_date`` and ``end_date``.

        Only filings that satisfy args given at initialization will
        be saved.

        Args:
            directory (str): Directory where filings should be stored.
            dir_pattern (str, optional): Format string for subdirectories. Defaults to None.
            file_pattern (str, optional): Format string for files. Defaults to "{accession_number}".
            download_all (bool, optional): Type of downloading system, if true downloads
                all data for each day, if false downloads each file in index.
                Defaults to False.
            daily_date_format (str, optional): Format string to use for the `{date}` pattern.
                Defaults to "%Y%m%d".
        """
        # Go through all quarters and dates and save filings using appropriate class
        for (year, quarter, f) in self.quarterly_date_list:
            q = QuarterlyFilings(year=year,
                                 quarter=quarter,
                                 user_agent=self.client.user_agent,
                                 client=self.client,
                                 entry_filter=lambda x: f(x) and self.entry_filter(x))
            q.save(directory=directory,
                   dir_pattern=dir_pattern,
                   file_pattern=file_pattern,
                   download_all=download_all)

        for date_ in self.daily_date_list:
            d = DailyFilings(date=date_,
                             user_agent=self.client.user_agent,
                             client=self.client,
                             entry_filter=self.entry_filter)
            try:
                d.save(directory=directory,
                       dir_pattern=dir_pattern,
                       file_pattern=file_pattern,
                       download_all=download_all,
                       date_format=daily_date_format)
            except (EDGARQueryError, NoFilingsError):  # continue if no filings for given day
                continue
Example #6
0
 def __init__(self,
              start_date: date,
              end_date: date,
              client=None,
              entry_filter=lambda _: True,
              balancing_point=30):
     self.entry_filter = entry_filter
     self.start_date = start_date
     self.end_date = end_date
     self.quarterly = QuarterlyFilings(year=self.start_date.year,
                                       quarter=get_quarter(self.start_date),
                                       client=client,
                                       entry_filter=self.entry_filter)
     self.daily = DailyFilings(date=self.start_date,
                               client=client,
                               entry_filter=self.entry_filter)
     self.balancing_point = balancing_point
     self._recompute()
Example #7
0
 def test_idx_filename_is_always_the_same(self, year, quarter):
     mf = QuarterlyFilings(year=year, quarter=quarter)
     assert mf.idx_filename == "master.idx"
Example #8
0
 def test_good_quarters(self):
     for quarter in range(1, 5):
         mf = QuarterlyFilings(year=2019, quarter=quarter)
         assert mf.quarter == quarter
Example #9
0
 def test_bad_quarter(self, bad_quarter, expected_error):
     with pytest.raises(expected_error):
         _ = QuarterlyFilings(year=2020, quarter=bad_quarter)
Example #10
0
 def test_good_year(self):
     for year in range(1993, date.today().year + 1):
         mf = QuarterlyFilings(year=year, quarter=1)
         assert mf.year == year
Example #11
0
def filings(
    cik_lookup=None,
    filing_type=None,
    start_date=None,
    end_date=date.today(),
    count=None,
    client=None,
    entry_filter=lambda _: True,
):
    """Utility method to get best filing object.

    Args:
        cik_lookup (str): Central Index Key (CIK) for company of interest.
        start_date (datetime.date, optional): Date of daily filing to fetch.
        end_date (datetime.date, optional): Date of daily filing to fetch.
        filing_type (secedgar.core.filing_types.FilingType, optional): Valid filing type
            enum. Defaults to None. If None, then all filing types for CIKs will be returned.
        count (int, optional): Number of filings to fetch. Will fetch up to `count` if that
        many filings are available. Defaults to all filings available.
        client (secedgar.client._base, optional): Client to use. Defaults to
                    ``secedgar.client.NetworkClient`` if None given.
        entry_filter (function, optional): A boolean function to determine
            if the FilingEntry should be kept. Defaults to ``lambda _: True``.
            See :class:`secedgar.core.DailyFilings` for more detail.
    .. code-block:: python

        from datetime import date
        from secedgar.core import filings, FilingType

        engine = filings(start_date=date(2020, 12, 10), end_date=date(2020, 12, 10),
            filing_type=FilingType.FILING_4, count=50)
    """
    if filing_type is not None and not isinstance(filing_type, FilingType):
        raise FilingTypeError

    if cik_lookup:
        return CompanyFilings(
            cik_lookup,
            filing_type=filing_type,
            start_date=start_date,
            end_date=end_date,
            count=count,
            client=client,
        )

    if filing_type is not None:
        original_entry_filter = entry_filter

        def entry_filter(x):
            return x.form_type == filing_type and original_entry_filter(x)

        original_entry_filter = entry_filter

    if count is not None:
        raise NotImplementedError(
            "Count has not yet been implemented for Daily, quarterly & Combo Filings."
        )

    if (end_date is None or end_date == start_date) and isinstance(
            start_date, date):
        return DailyFilings(date=start_date,
                            client=client,
                            entry_filter=entry_filter)

    if isinstance(start_date, date) and isinstance(end_date, date):
        current_quarter = get_quarter(start_date)
        current_year = start_date.year
        start_quarter_date = date(current_year, get_month(current_quarter), 1)
        next_year, next_quarter = add_quarter(current_year, current_quarter)
        end_quarter_date = date(next_year, get_month(next_quarter),
                                1) - timedelta(days=1)
        if start_quarter_date == start_date and end_date == end_quarter_date:
            return QuarterlyFilings(current_year,
                                    current_quarter,
                                    client=client,
                                    entry_filter=entry_filter)
        return ComboFilings(start_date,
                            end_date,
                            client=client,
                            entry_filter=entry_filter)

    raise ValueError(
        """Invalid arguments. You must provide 'cik_lookup' OR 'start_date' \
OR ('start_date' and 'end_date').""")
Example #12
0
 def test_bad_quarter(self, mock_user_agent, bad_quarter, expected_error):
     with pytest.raises(expected_error):
         _ = QuarterlyFilings(year=2020, quarter=bad_quarter, user_agent=mock_user_agent)
Example #13
0
class ComboFilings:
    """Class for retrieving all filings between specified dates.

    Args:
        start_date (Union[str, datetime.datetime, datetime.date], optional): Date before
            which not to fetch reports. Stands for "date after."
            Defaults to None (will fetch all filings before ``end_date``).
        end_date (Union[str, datetime.datetime, datetime.date], optional):
            Date after which not to fetch reports.
            Stands for "date before." Defaults to today.
        user_agent (Union[str, NoneType]): Value used for HTTP header "User-Agent" for all requests.
            If given None, a valid client with user_agent must be given.
            See the SEC's statement on
            `fair access <https://www.sec.gov/os/accessing-edgar-data>`_
            for more information.
        client (Union[NoneType, secedgar.client.NetworkClient], optional): Client to use for
            fetching data. If None is given, a user_agent must be given to pass to
            :class:`secedgar.client.NetworkClient`.
            Defaults to ``secedgar.client.NetworkClient`` if none is given.
        entry_filter (function, optional): A boolean function to determine
            if the FilingEntry should be kept. Defaults to `lambda _: True`.
            The ``FilingEntry`` object exposes 7 variables which can be
            used to filter which filings to keep. These are "cik", "company_name",
            "form_type", "date_filed", "file_name", "path", and "num_previously_valid".
        balancing_point (int): Number of days from which to change lookup method from using
            ``DailyFilings`` to ``QuarterlyFilings``. If ``QuarterlyFilings`` is used, an
            additional filter will be added to limit which days are included.
            Defaults to 30.
        kwargs: Any keyword arguments to pass to ``NetworkClient`` if no client is specified.

    .. versionadded:: 0.4.0

    Examples:
        To download all filings from January 6, 2020 until November 5, 2020, you could do following:

        .. code-block:: python

            from datetime import date
            from secedgar import ComboFilings

            combo_filings = ComboFilings(start_date=date(2020, 1, 6),
                                         end_date=date(2020, 11, 5)
            combo_filings.save('/my_directory')
    """
    def __init__(self,
                 start_date: date,
                 end_date: date,
                 user_agent: Union[str, None] = None,
                 client=None,
                 entry_filter=lambda _: True,
                 balancing_point=30,
                 **kwargs):
        self.entry_filter = entry_filter
        self.start_date = start_date
        self.end_date = end_date
        self.user_agent = user_agent
        self.quarterly = QuarterlyFilings(year=self.start_date.year,
                                          quarter=get_quarter(self.start_date),
                                          user_agent=user_agent,
                                          client=client,
                                          entry_filter=self.entry_filter,
                                          **kwargs)
        self.daily = DailyFilings(date=self.start_date,
                                  user_agent=user_agent,
                                  client=client,
                                  entry_filter=self.entry_filter,
                                  **kwargs)
        self.balancing_point = balancing_point
        self._recompute()

    def _recompute(self):
        """Recompute the best list of quarters and days to use based on the start and end date."""
        current_date = self.start_date
        self.quarterly_date_list = []
        self.daily_date_list = []
        while current_date <= self.end_date:
            current_quarter = get_quarter(current_date)
            current_year = current_date.year
            next_year, next_quarter = add_quarter(current_year,
                                                  current_quarter)
            next_start_quarter_date = date(next_year, get_month(next_quarter),
                                           1)

            days_till_next_quarter = (next_start_quarter_date -
                                      current_date).days
            days_till_end = (self.end_date - current_date).days
            if days_till_next_quarter <= days_till_end:
                current_start_quarter_date = date(current_year,
                                                  get_month(current_quarter),
                                                  1)
                if current_start_quarter_date == current_date:
                    self.quarterly_date_list.append(
                        (current_year, current_quarter, lambda x: True))
                    current_date = next_start_quarter_date
                elif days_till_next_quarter > self.balancing_point:
                    self.quarterly_date_list.append(
                        (current_year, current_quarter,
                         lambda x: date(x['date_filed']) >= self.start_date))
                    current_date = next_start_quarter_date
                else:
                    while current_date < next_start_quarter_date:
                        self.daily_date_list.append(current_date)
                        current_date += timedelta(days=1)
            else:
                if days_till_end > self.balancing_point:
                    if days_till_next_quarter - 1 == days_till_end:
                        self.quarterly_date_list.append(
                            (current_year, current_quarter, lambda x: True))
                        current_date = next_start_quarter_date
                    else:
                        self.quarterly_date_list.append(
                            (current_year, current_quarter,
                             lambda x: date(x['date_filed']) <= self.end_date))
                        current_date = self.end_date
                else:
                    while current_date <= self.end_date:
                        self.daily_date_list.append(current_date)
                        current_date += timedelta(days=1)

    def get_urls(self):
        """Get all urls between ``start_date`` and ``end_date``."""

        # Use functools.reduce for speed
        # see https://stackoverflow.com/questions/10461531/merge-and-sum-of-two-dictionaries
        def reducer(accumulator, dictionary):
            for key, value in dictionary.items():
                accumulator[key] = accumulator.get(key, []) + value
            return accumulator

        list_of_dicts = []
        for (year, quarter, f) in self.quarterly_date_list:
            self.quarterly.year = year
            self.quarterly.quarter = quarter
            self.quarterly.entry_filter = lambda x: f(x) and self.entry_filter(
                x)
            list_of_dicts.append(self.quarterly.get_urls())

        for d in self.daily_date_list:
            self.daily.date = d
            try:
                list_of_dicts.append(self.daily.get_urls())
            except EDGARQueryError:
                pass

        complete_dictionary = reduce(reducer, list_of_dicts, {})
        return complete_dictionary

    def save(self,
             directory,
             dir_pattern=None,
             file_pattern="{accession_number}",
             download_all=False,
             daily_date_format="%Y%m%d"):
        """Save all filings between ``start_date`` and ``end_date``.

        Only filings that satisfy args given at initialization will
        be saved.

        Args:
            directory (str): Directory where filings should be stored.
            dir_pattern (str, optional): Format string for subdirectories. Defaults to None.
            file_pattern (str, optional): Format string for files. Defaults to "{accession_number}".
            download_all (bool, optional): Type of downloading system, if true downloads
                all data for each day, if false downloads each file in index.
                Defaults to False.
            daily_date_format (str, optional): Format string to use for the `{date}` pattern.
                Defaults to "%Y%m%d".
        """
        for (year, quarter, f) in self.quarterly_date_list:
            self.quarterly.year = year
            self.quarterly.quarter = quarter
            self.quarterly.entry_filter = lambda x: f(x) and self.entry_filter(
                x)
            self.quarterly.save(directory=directory,
                                dir_pattern=dir_pattern,
                                file_pattern=file_pattern,
                                download_all=download_all)

        for d in self.daily_date_list:
            self.daily.date = d
            try:
                self.daily.save(directory=directory,
                                dir_pattern=dir_pattern,
                                file_pattern=file_pattern,
                                download_all=download_all,
                                date_format=daily_date_format)
            except (EDGARQueryError, NoFilingsError):
                pass
Example #14
0
 def test_good_year(self, mock_user_agent):
     for year in range(1993, date.today().year + 1):
         mf = QuarterlyFilings(year=year, quarter=1, user_agent=mock_user_agent)
         assert mf.year == year
Example #15
0
 def test_good_quarters(self, mock_user_agent):
     for quarter in range(1, 5):
         mf = QuarterlyFilings(year=2019, quarter=quarter, user_agent=mock_user_agent)
         assert mf.quarter == quarter
Example #16
0
def filings(cik_lookup=None,
            filing_type=None,
            user_agent=None,
            start_date=None,
            end_date=date.today(),
            count=None,
            client=None,
            entry_filter=lambda _: True,
            **kwargs):
    """Utility method to get best filing object.

    Args:
        cik_lookup (str): Central Index Key (CIK) for company of interest.
        start_date (datetime.date, optional): Date of daily filing to fetch.
        end_date (datetime.date, optional): Date of daily filing to fetch.
        filing_type (secedgar.core.filing_types.FilingType, optional): Valid filing type
            enum. Defaults to None. If None, then all filing types for CIKs will be returned.
        count (int, optional): Number of filings to fetch. Will fetch up to `count` if that
        many filings are available. Defaults to all filings available.
        client (secedgar.client.NetworkClient, optional): Client to use. Defaults to
                    ``secedgar.client.NetworkClient`` if None given.
        entry_filter (function, optional): A boolean function to determine
            if the FilingEntry should be kept. Defaults to ``lambda _: True``.
            See :class:`secedgar.core.DailyFilings` for more detail.
        kwargs: Any keyword arguments to pass to ``NetworkClient`` if no client is specified.

    Examples:
        Using the ``filings`` function from secedgar is the easiest way to retrieve filings.

        Depending on the arguments given, secedgar will return an object that will get you
        the information you want from EDGAR.

        There are 4 main classes which can be returned.

            - :class:`secedgar.ComboFilings` for fetching filings over multiple days
              that does not fall exactly into a quarter
            - :class:`secedgar.CompanyFilings` for fetching a particular
              filing type for one or more companies
            - :class:`secedgar.DailyFilings` for fetching all filings
              from a specific date
            - :class:`secedgar.QuarterlyFilings` for fetching all filings
              from a specific quarter

        To get all filings over a time span, you could use something like below.

        .. code-block:: python

            from datetime import date
            from secedgar import filings, FilingType

            # secedgar creates correct filing object for given arguments
            # this will fetch the first 50 filings found over the time span
            my_filings = filings(start_date=date(2020, 12, 10),
                                 end_date=date(2020, 12, 15),
                                 filing_type=FilingType.FILING_4,
                                 user_agent="Name (email)",
                                 count=50)

            # easy access to methods shared across all 4 different filing classes
            my_filings_urls = my_filings.get_urls()
            my_filings.save("/path/to/directory")

        To get a single filing type for one or more companies, you could use this:

        .. code-block:: python

            from secedgar import filings, FilingType

            # similar to above, but fetches filings for specific tickers
            company_filings = filings(cik_lookup=["aapl", "fb"],
                                      filing_type=sec.FilingType.FILING_10Q,
                                      user_agent="Name (email)")
            company_filings_urls = company_filings.get_urls()
            company_filings.save("/path/to/directory")

        To get filings for a single day, you could use something like this:

        .. code-block:: python

            from datetime import date
            from secedgar import filings

            # all filings for
            daily_filings = filings(start_date=date(2020, 1 ,3),
                                    end_date=date(2020, 1, 3),
                                    user_agent="Name (email)")
            daily_filings.save("/path/to/directory")

            # limit which quarterly filings to use - saves only form 4 filings
            limit_to_form4 = lambda f: f.form_type.lower() == "4"
            daily_filings_limited = filings(start_date=date(2020, 1 ,3),
                                            end_date=date(2020, 1, 3),
                                            user_agent="Name (email)",
                                            entry_filter=limit_to_form4)
            daily_filings_limited.save("/path/to/other/directory")


        For getting filings from a specific quarter, the function call would look like this:


        .. code-block:: python

            from datetime import date
            from secedgar import filings

            # all quarterly filings
            quarterly_filings = filings(start_date=date(2020, 1 ,1),
                                        end_date=date(2020, 3, 31),
                                        user_agent="Name (email)")
            quarterly_filings.save("/path/to/directory")

            # limit which quarterly filings to use
            # saves only 10-K and 10-Q filings from quarter
            limit_to_10k_10q = lambda f: f.form_type.lower() in ("10-k", "10-q")
            quarterly_filings_limited = filings(start_date=date(2020, 1 ,1),
                                                end_date=date(2020, 3, 31),
                                                user_agent="Name (email)",
                                                entry_filter=limit_to_10k_10q)
            quarterly_filings_limited.save("/path/to/other/directory")

    """
    if filing_type is not None and not isinstance(filing_type, FilingType):
        raise FilingTypeError

    if cik_lookup:
        return CompanyFilings(cik_lookup,
                              filing_type=filing_type,
                              user_agent=user_agent,
                              start_date=start_date,
                              end_date=end_date,
                              count=count,
                              client=client,
                              **kwargs)
    # Define entry filter as original
    _entry_filter = entry_filter

    if filing_type is not None:
        # If filing type also given, add filing types to existing entry filter
        def _entry_filter(x):
            return x.form_type == filing_type and entry_filter(x)

    if count is not None:
        raise NotImplementedError(
            "Count has not yet been implemented for Daily, quarterly & Combo Filings."
        )

    if (end_date is None or end_date == start_date) and isinstance(
            start_date, date):
        return DailyFilings(date=start_date,
                            user_agent=user_agent,
                            client=client,
                            entry_filter=_entry_filter,
                            **kwargs)

    if isinstance(start_date, date) and isinstance(end_date, date):
        current_quarter = get_quarter(start_date)
        current_year = start_date.year
        start_quarter_date = date(current_year, get_month(current_quarter), 1)
        next_year, next_quarter = add_quarter(current_year, current_quarter)
        end_quarter_date = date(next_year, get_month(next_quarter),
                                1) - timedelta(days=1)
        if start_quarter_date == start_date and end_date == end_quarter_date:
            return QuarterlyFilings(year=current_year,
                                    quarter=current_quarter,
                                    client=client,
                                    user_agent=user_agent,
                                    entry_filter=_entry_filter,
                                    **kwargs)
        return ComboFilings(start_date=start_date,
                            end_date=end_date,
                            user_agent=user_agent,
                            client=client,
                            entry_filter=_entry_filter,
                            **kwargs)

    raise ValueError(
        """Invalid arguments. You must provide 'cik_lookup' OR 'start_date' \
OR ('start_date' and 'end_date').""")
Example #17
0
 def test_clean_path(self, mock_user_agent, original_path, clean_path):
     quarterly_filing = QuarterlyFilings(year=2000, quarter=1, user_agent=mock_user_agent)
     assert quarterly_filing.clean_directory_path(original_path) == clean_path
Example #18
0
 def test_idx_filename_is_always_the_same(self, mock_user_agent, year, quarter):
     mf = QuarterlyFilings(year=year, quarter=quarter, user_agent=mock_user_agent)
     assert mf.idx_filename == "master.idx"
Example #19
0
 def test_clean_path(self, original_path, clean_path):
     quarterly_filing = QuarterlyFilings(year=2000, quarter=1)
     assert quarterly_filing.clean_directory_path(original_path) == clean_path
Example #20
0
 def test_user_agent_passed_to_client(self, mock_user_agent):
     quarterly = QuarterlyFilings(year=2020, quarter=1, user_agent=mock_user_agent)
     assert quarterly.client.user_agent == mock_user_agent