def test_always_false_entry_filter(self, mock_user_agent, mock_master_idx_file): quarterly_filing = QuarterlyFilings(year=1993, quarter=4, entry_filter=lambda _: False, user_agent=mock_user_agent) urls = quarterly_filing.get_urls() assert len(urls) == 0
def __init__(self, start_date: date, end_date: date, user_agent: Union[str, None] = None, client=None, entry_filter=lambda _: True, balancing_point=30, **kwargs): self.entry_filter = entry_filter self.start_date = start_date self.end_date = end_date self.user_agent = user_agent self.quarterly = QuarterlyFilings(year=self.start_date.year, quarter=get_quarter(self.start_date), user_agent=user_agent, client=client, entry_filter=self.entry_filter, **kwargs) self.daily = DailyFilings(date=self.start_date, user_agent=user_agent, client=client, entry_filter=self.entry_filter, **kwargs) self.balancing_point = balancing_point self._recompute()
def get_urls(self): """Get all urls between ``start_date`` and ``end_date``.""" # Use functools.reduce for speed # see https://stackoverflow.com/questions/10461531/merge-and-sum-of-two-dictionaries def _reducer(accumulator, dictionary): for key, value in dictionary.items(): accumulator[key] = accumulator.get(key, []) + value return accumulator list_of_dicts = [] for (year, quarter, f) in self.quarterly_date_list: q = QuarterlyFilings(year=year, quarter=quarter, user_agent=self.user_agent, client=self.client, entry_filter=lambda x: f(x) and self.entry_filter(x)) list_of_dicts.append(q.get_urls()) for _date in self.daily_date_list: d = DailyFilings(date=_date, user_agent=self.user_agent, client=self.client, entry_filter=self.entry_filter) try: list_of_dicts.append(d.get_urls()) except EDGARQueryError: # continue if no URLs available for given day continue complete_dictionary = reduce(_reducer, list_of_dicts, {}) return complete_dictionary
def test_save(self, tmp_data_directory, mock_filing_data, mock_quarterly_quarter_directory, mock_master_idx_file, mock_filing_response, subdir, file): quarterly_filing = QuarterlyFilings(year=1993, quarter=4) quarterly_filing.save(tmp_data_directory) subdir = os.path.join("1993", "QTR4", subdir) path_to_check = os.path.join(tmp_data_directory, subdir, file) assert os.path.exists(path_to_check)
def save(self, directory, dir_pattern=None, file_pattern="{accession_number}", download_all=False, daily_date_format="%Y%m%d"): """Save all filings between ``start_date`` and ``end_date``. Only filings that satisfy args given at initialization will be saved. Args: directory (str): Directory where filings should be stored. dir_pattern (str, optional): Format string for subdirectories. Defaults to None. file_pattern (str, optional): Format string for files. Defaults to "{accession_number}". download_all (bool, optional): Type of downloading system, if true downloads all data for each day, if false downloads each file in index. Defaults to False. daily_date_format (str, optional): Format string to use for the `{date}` pattern. Defaults to "%Y%m%d". """ # Go through all quarters and dates and save filings using appropriate class for (year, quarter, f) in self.quarterly_date_list: q = QuarterlyFilings(year=year, quarter=quarter, user_agent=self.client.user_agent, client=self.client, entry_filter=lambda x: f(x) and self.entry_filter(x)) q.save(directory=directory, dir_pattern=dir_pattern, file_pattern=file_pattern, download_all=download_all) for date_ in self.daily_date_list: d = DailyFilings(date=date_, user_agent=self.client.user_agent, client=self.client, entry_filter=self.entry_filter) try: d.save(directory=directory, dir_pattern=dir_pattern, file_pattern=file_pattern, download_all=download_all, date_format=daily_date_format) except (EDGARQueryError, NoFilingsError): # continue if no filings for given day continue
def __init__(self, start_date: date, end_date: date, client=None, entry_filter=lambda _: True, balancing_point=30): self.entry_filter = entry_filter self.start_date = start_date self.end_date = end_date self.quarterly = QuarterlyFilings(year=self.start_date.year, quarter=get_quarter(self.start_date), client=client, entry_filter=self.entry_filter) self.daily = DailyFilings(date=self.start_date, client=client, entry_filter=self.entry_filter) self.balancing_point = balancing_point self._recompute()
def test_idx_filename_is_always_the_same(self, year, quarter): mf = QuarterlyFilings(year=year, quarter=quarter) assert mf.idx_filename == "master.idx"
def test_good_quarters(self): for quarter in range(1, 5): mf = QuarterlyFilings(year=2019, quarter=quarter) assert mf.quarter == quarter
def test_bad_quarter(self, bad_quarter, expected_error): with pytest.raises(expected_error): _ = QuarterlyFilings(year=2020, quarter=bad_quarter)
def test_good_year(self): for year in range(1993, date.today().year + 1): mf = QuarterlyFilings(year=year, quarter=1) assert mf.year == year
def filings( cik_lookup=None, filing_type=None, start_date=None, end_date=date.today(), count=None, client=None, entry_filter=lambda _: True, ): """Utility method to get best filing object. Args: cik_lookup (str): Central Index Key (CIK) for company of interest. start_date (datetime.date, optional): Date of daily filing to fetch. end_date (datetime.date, optional): Date of daily filing to fetch. filing_type (secedgar.core.filing_types.FilingType, optional): Valid filing type enum. Defaults to None. If None, then all filing types for CIKs will be returned. count (int, optional): Number of filings to fetch. Will fetch up to `count` if that many filings are available. Defaults to all filings available. client (secedgar.client._base, optional): Client to use. Defaults to ``secedgar.client.NetworkClient`` if None given. entry_filter (function, optional): A boolean function to determine if the FilingEntry should be kept. Defaults to ``lambda _: True``. See :class:`secedgar.core.DailyFilings` for more detail. .. code-block:: python from datetime import date from secedgar.core import filings, FilingType engine = filings(start_date=date(2020, 12, 10), end_date=date(2020, 12, 10), filing_type=FilingType.FILING_4, count=50) """ if filing_type is not None and not isinstance(filing_type, FilingType): raise FilingTypeError if cik_lookup: return CompanyFilings( cik_lookup, filing_type=filing_type, start_date=start_date, end_date=end_date, count=count, client=client, ) if filing_type is not None: original_entry_filter = entry_filter def entry_filter(x): return x.form_type == filing_type and original_entry_filter(x) original_entry_filter = entry_filter if count is not None: raise NotImplementedError( "Count has not yet been implemented for Daily, quarterly & Combo Filings." ) if (end_date is None or end_date == start_date) and isinstance( start_date, date): return DailyFilings(date=start_date, client=client, entry_filter=entry_filter) if isinstance(start_date, date) and isinstance(end_date, date): current_quarter = get_quarter(start_date) current_year = start_date.year start_quarter_date = date(current_year, get_month(current_quarter), 1) next_year, next_quarter = add_quarter(current_year, current_quarter) end_quarter_date = date(next_year, get_month(next_quarter), 1) - timedelta(days=1) if start_quarter_date == start_date and end_date == end_quarter_date: return QuarterlyFilings(current_year, current_quarter, client=client, entry_filter=entry_filter) return ComboFilings(start_date, end_date, client=client, entry_filter=entry_filter) raise ValueError( """Invalid arguments. You must provide 'cik_lookup' OR 'start_date' \ OR ('start_date' and 'end_date').""")
def test_bad_quarter(self, mock_user_agent, bad_quarter, expected_error): with pytest.raises(expected_error): _ = QuarterlyFilings(year=2020, quarter=bad_quarter, user_agent=mock_user_agent)
class ComboFilings: """Class for retrieving all filings between specified dates. Args: start_date (Union[str, datetime.datetime, datetime.date], optional): Date before which not to fetch reports. Stands for "date after." Defaults to None (will fetch all filings before ``end_date``). end_date (Union[str, datetime.datetime, datetime.date], optional): Date after which not to fetch reports. Stands for "date before." Defaults to today. user_agent (Union[str, NoneType]): Value used for HTTP header "User-Agent" for all requests. If given None, a valid client with user_agent must be given. See the SEC's statement on `fair access <https://www.sec.gov/os/accessing-edgar-data>`_ for more information. client (Union[NoneType, secedgar.client.NetworkClient], optional): Client to use for fetching data. If None is given, a user_agent must be given to pass to :class:`secedgar.client.NetworkClient`. Defaults to ``secedgar.client.NetworkClient`` if none is given. entry_filter (function, optional): A boolean function to determine if the FilingEntry should be kept. Defaults to `lambda _: True`. The ``FilingEntry`` object exposes 7 variables which can be used to filter which filings to keep. These are "cik", "company_name", "form_type", "date_filed", "file_name", "path", and "num_previously_valid". balancing_point (int): Number of days from which to change lookup method from using ``DailyFilings`` to ``QuarterlyFilings``. If ``QuarterlyFilings`` is used, an additional filter will be added to limit which days are included. Defaults to 30. kwargs: Any keyword arguments to pass to ``NetworkClient`` if no client is specified. .. versionadded:: 0.4.0 Examples: To download all filings from January 6, 2020 until November 5, 2020, you could do following: .. code-block:: python from datetime import date from secedgar import ComboFilings combo_filings = ComboFilings(start_date=date(2020, 1, 6), end_date=date(2020, 11, 5) combo_filings.save('/my_directory') """ def __init__(self, start_date: date, end_date: date, user_agent: Union[str, None] = None, client=None, entry_filter=lambda _: True, balancing_point=30, **kwargs): self.entry_filter = entry_filter self.start_date = start_date self.end_date = end_date self.user_agent = user_agent self.quarterly = QuarterlyFilings(year=self.start_date.year, quarter=get_quarter(self.start_date), user_agent=user_agent, client=client, entry_filter=self.entry_filter, **kwargs) self.daily = DailyFilings(date=self.start_date, user_agent=user_agent, client=client, entry_filter=self.entry_filter, **kwargs) self.balancing_point = balancing_point self._recompute() def _recompute(self): """Recompute the best list of quarters and days to use based on the start and end date.""" current_date = self.start_date self.quarterly_date_list = [] self.daily_date_list = [] while current_date <= self.end_date: current_quarter = get_quarter(current_date) current_year = current_date.year next_year, next_quarter = add_quarter(current_year, current_quarter) next_start_quarter_date = date(next_year, get_month(next_quarter), 1) days_till_next_quarter = (next_start_quarter_date - current_date).days days_till_end = (self.end_date - current_date).days if days_till_next_quarter <= days_till_end: current_start_quarter_date = date(current_year, get_month(current_quarter), 1) if current_start_quarter_date == current_date: self.quarterly_date_list.append( (current_year, current_quarter, lambda x: True)) current_date = next_start_quarter_date elif days_till_next_quarter > self.balancing_point: self.quarterly_date_list.append( (current_year, current_quarter, lambda x: date(x['date_filed']) >= self.start_date)) current_date = next_start_quarter_date else: while current_date < next_start_quarter_date: self.daily_date_list.append(current_date) current_date += timedelta(days=1) else: if days_till_end > self.balancing_point: if days_till_next_quarter - 1 == days_till_end: self.quarterly_date_list.append( (current_year, current_quarter, lambda x: True)) current_date = next_start_quarter_date else: self.quarterly_date_list.append( (current_year, current_quarter, lambda x: date(x['date_filed']) <= self.end_date)) current_date = self.end_date else: while current_date <= self.end_date: self.daily_date_list.append(current_date) current_date += timedelta(days=1) def get_urls(self): """Get all urls between ``start_date`` and ``end_date``.""" # Use functools.reduce for speed # see https://stackoverflow.com/questions/10461531/merge-and-sum-of-two-dictionaries def reducer(accumulator, dictionary): for key, value in dictionary.items(): accumulator[key] = accumulator.get(key, []) + value return accumulator list_of_dicts = [] for (year, quarter, f) in self.quarterly_date_list: self.quarterly.year = year self.quarterly.quarter = quarter self.quarterly.entry_filter = lambda x: f(x) and self.entry_filter( x) list_of_dicts.append(self.quarterly.get_urls()) for d in self.daily_date_list: self.daily.date = d try: list_of_dicts.append(self.daily.get_urls()) except EDGARQueryError: pass complete_dictionary = reduce(reducer, list_of_dicts, {}) return complete_dictionary def save(self, directory, dir_pattern=None, file_pattern="{accession_number}", download_all=False, daily_date_format="%Y%m%d"): """Save all filings between ``start_date`` and ``end_date``. Only filings that satisfy args given at initialization will be saved. Args: directory (str): Directory where filings should be stored. dir_pattern (str, optional): Format string for subdirectories. Defaults to None. file_pattern (str, optional): Format string for files. Defaults to "{accession_number}". download_all (bool, optional): Type of downloading system, if true downloads all data for each day, if false downloads each file in index. Defaults to False. daily_date_format (str, optional): Format string to use for the `{date}` pattern. Defaults to "%Y%m%d". """ for (year, quarter, f) in self.quarterly_date_list: self.quarterly.year = year self.quarterly.quarter = quarter self.quarterly.entry_filter = lambda x: f(x) and self.entry_filter( x) self.quarterly.save(directory=directory, dir_pattern=dir_pattern, file_pattern=file_pattern, download_all=download_all) for d in self.daily_date_list: self.daily.date = d try: self.daily.save(directory=directory, dir_pattern=dir_pattern, file_pattern=file_pattern, download_all=download_all, date_format=daily_date_format) except (EDGARQueryError, NoFilingsError): pass
def test_good_year(self, mock_user_agent): for year in range(1993, date.today().year + 1): mf = QuarterlyFilings(year=year, quarter=1, user_agent=mock_user_agent) assert mf.year == year
def test_good_quarters(self, mock_user_agent): for quarter in range(1, 5): mf = QuarterlyFilings(year=2019, quarter=quarter, user_agent=mock_user_agent) assert mf.quarter == quarter
def filings(cik_lookup=None, filing_type=None, user_agent=None, start_date=None, end_date=date.today(), count=None, client=None, entry_filter=lambda _: True, **kwargs): """Utility method to get best filing object. Args: cik_lookup (str): Central Index Key (CIK) for company of interest. start_date (datetime.date, optional): Date of daily filing to fetch. end_date (datetime.date, optional): Date of daily filing to fetch. filing_type (secedgar.core.filing_types.FilingType, optional): Valid filing type enum. Defaults to None. If None, then all filing types for CIKs will be returned. count (int, optional): Number of filings to fetch. Will fetch up to `count` if that many filings are available. Defaults to all filings available. client (secedgar.client.NetworkClient, optional): Client to use. Defaults to ``secedgar.client.NetworkClient`` if None given. entry_filter (function, optional): A boolean function to determine if the FilingEntry should be kept. Defaults to ``lambda _: True``. See :class:`secedgar.core.DailyFilings` for more detail. kwargs: Any keyword arguments to pass to ``NetworkClient`` if no client is specified. Examples: Using the ``filings`` function from secedgar is the easiest way to retrieve filings. Depending on the arguments given, secedgar will return an object that will get you the information you want from EDGAR. There are 4 main classes which can be returned. - :class:`secedgar.ComboFilings` for fetching filings over multiple days that does not fall exactly into a quarter - :class:`secedgar.CompanyFilings` for fetching a particular filing type for one or more companies - :class:`secedgar.DailyFilings` for fetching all filings from a specific date - :class:`secedgar.QuarterlyFilings` for fetching all filings from a specific quarter To get all filings over a time span, you could use something like below. .. code-block:: python from datetime import date from secedgar import filings, FilingType # secedgar creates correct filing object for given arguments # this will fetch the first 50 filings found over the time span my_filings = filings(start_date=date(2020, 12, 10), end_date=date(2020, 12, 15), filing_type=FilingType.FILING_4, user_agent="Name (email)", count=50) # easy access to methods shared across all 4 different filing classes my_filings_urls = my_filings.get_urls() my_filings.save("/path/to/directory") To get a single filing type for one or more companies, you could use this: .. code-block:: python from secedgar import filings, FilingType # similar to above, but fetches filings for specific tickers company_filings = filings(cik_lookup=["aapl", "fb"], filing_type=sec.FilingType.FILING_10Q, user_agent="Name (email)") company_filings_urls = company_filings.get_urls() company_filings.save("/path/to/directory") To get filings for a single day, you could use something like this: .. code-block:: python from datetime import date from secedgar import filings # all filings for daily_filings = filings(start_date=date(2020, 1 ,3), end_date=date(2020, 1, 3), user_agent="Name (email)") daily_filings.save("/path/to/directory") # limit which quarterly filings to use - saves only form 4 filings limit_to_form4 = lambda f: f.form_type.lower() == "4" daily_filings_limited = filings(start_date=date(2020, 1 ,3), end_date=date(2020, 1, 3), user_agent="Name (email)", entry_filter=limit_to_form4) daily_filings_limited.save("/path/to/other/directory") For getting filings from a specific quarter, the function call would look like this: .. code-block:: python from datetime import date from secedgar import filings # all quarterly filings quarterly_filings = filings(start_date=date(2020, 1 ,1), end_date=date(2020, 3, 31), user_agent="Name (email)") quarterly_filings.save("/path/to/directory") # limit which quarterly filings to use # saves only 10-K and 10-Q filings from quarter limit_to_10k_10q = lambda f: f.form_type.lower() in ("10-k", "10-q") quarterly_filings_limited = filings(start_date=date(2020, 1 ,1), end_date=date(2020, 3, 31), user_agent="Name (email)", entry_filter=limit_to_10k_10q) quarterly_filings_limited.save("/path/to/other/directory") """ if filing_type is not None and not isinstance(filing_type, FilingType): raise FilingTypeError if cik_lookup: return CompanyFilings(cik_lookup, filing_type=filing_type, user_agent=user_agent, start_date=start_date, end_date=end_date, count=count, client=client, **kwargs) # Define entry filter as original _entry_filter = entry_filter if filing_type is not None: # If filing type also given, add filing types to existing entry filter def _entry_filter(x): return x.form_type == filing_type and entry_filter(x) if count is not None: raise NotImplementedError( "Count has not yet been implemented for Daily, quarterly & Combo Filings." ) if (end_date is None or end_date == start_date) and isinstance( start_date, date): return DailyFilings(date=start_date, user_agent=user_agent, client=client, entry_filter=_entry_filter, **kwargs) if isinstance(start_date, date) and isinstance(end_date, date): current_quarter = get_quarter(start_date) current_year = start_date.year start_quarter_date = date(current_year, get_month(current_quarter), 1) next_year, next_quarter = add_quarter(current_year, current_quarter) end_quarter_date = date(next_year, get_month(next_quarter), 1) - timedelta(days=1) if start_quarter_date == start_date and end_date == end_quarter_date: return QuarterlyFilings(year=current_year, quarter=current_quarter, client=client, user_agent=user_agent, entry_filter=_entry_filter, **kwargs) return ComboFilings(start_date=start_date, end_date=end_date, user_agent=user_agent, client=client, entry_filter=_entry_filter, **kwargs) raise ValueError( """Invalid arguments. You must provide 'cik_lookup' OR 'start_date' \ OR ('start_date' and 'end_date').""")
def test_clean_path(self, mock_user_agent, original_path, clean_path): quarterly_filing = QuarterlyFilings(year=2000, quarter=1, user_agent=mock_user_agent) assert quarterly_filing.clean_directory_path(original_path) == clean_path
def test_idx_filename_is_always_the_same(self, mock_user_agent, year, quarter): mf = QuarterlyFilings(year=year, quarter=quarter, user_agent=mock_user_agent) assert mf.idx_filename == "master.idx"
def test_clean_path(self, original_path, clean_path): quarterly_filing = QuarterlyFilings(year=2000, quarter=1) assert quarterly_filing.clean_directory_path(original_path) == clean_path
def test_user_agent_passed_to_client(self, mock_user_agent): quarterly = QuarterlyFilings(year=2020, quarter=1, user_agent=mock_user_agent) assert quarterly.client.user_agent == mock_user_agent