def test_save_with_single_level_cik_dir_pattern( self, tmp_data_directory, mock_daily_quarter_directory, mock_daily_idx_file, mock_filing_response, cik, file): daily_filing = DailyFilings(date(2018, 12, 31)) daily_filing.save(tmp_data_directory, dir_pattern="{cik}") path_to_check = os.path.join(tmp_data_directory, cik, file) assert os.path.exists(path_to_check)
def get_urls(self): """Get all urls between ``start_date`` and ``end_date``.""" # Use functools.reduce for speed # see https://stackoverflow.com/questions/10461531/merge-and-sum-of-two-dictionaries def _reducer(accumulator, dictionary): for key, value in dictionary.items(): accumulator[key] = accumulator.get(key, []) + value return accumulator list_of_dicts = [] for (year, quarter, f) in self.quarterly_date_list: q = QuarterlyFilings(year=year, quarter=quarter, user_agent=self.user_agent, client=self.client, entry_filter=lambda x: f(x) and self.entry_filter(x)) list_of_dicts.append(q.get_urls()) for _date in self.daily_date_list: d = DailyFilings(date=_date, user_agent=self.user_agent, client=self.client, entry_filter=self.entry_filter) try: list_of_dicts.append(d.get_urls()) except EDGARQueryError: # continue if no URLs available for given day continue complete_dictionary = reduce(_reducer, list_of_dicts, {}) return complete_dictionary
def __init__(self, start_date: date, end_date: date, user_agent: Union[str, None] = None, client=None, entry_filter=lambda _: True, balancing_point=30, **kwargs): self.entry_filter = entry_filter self.start_date = start_date self.end_date = end_date self.user_agent = user_agent self.quarterly = QuarterlyFilings(year=self.start_date.year, quarter=get_quarter(self.start_date), user_agent=user_agent, client=client, entry_filter=self.entry_filter, **kwargs) self.daily = DailyFilings(date=self.start_date, user_agent=user_agent, client=client, entry_filter=self.entry_filter, **kwargs) self.balancing_point = balancing_point self._recompute()
def test_get_listings_directory(self, mock_user_agent, mock_daily_quarter_directory): daily_filings = DailyFilings(date(2018, 12, 31), user_agent=mock_user_agent) daily_filing_listing_directory = daily_filings._get_listings_directory( ) assert daily_filing_listing_directory.status_code == 200 assert "master.20181231.idx" in daily_filing_listing_directory.text
def test_save_default(self, tmp_data_directory, mock_daily_quarter_directory, mock_daily_idx_file, mock_filing_response, cik, file): daily_filing = DailyFilings(date(2018, 12, 31)) daily_filing.save(tmp_data_directory) subdir = os.path.join("20181231", cik) path_to_check = os.path.join(tmp_data_directory, subdir, file) assert os.path.exists(path_to_check)
def test_save_with_single_level_date_dir_pattern( self, tmp_data_directory, mock_daily_quarter_directory, mock_daily_idx_file, mock_filing_response, file): daily_filing = DailyFilings(date(2018, 12, 31)) daily_filing.save(tmp_data_directory, dir_pattern="{date}", date_format="%Y-%m-%d") path_to_check = os.path.join(tmp_data_directory, "2018-12-31", file) assert os.path.exists(path_to_check)
def test_save_with_multi_level_dir_pattern_date_not_first( self, tmp_data_directory, mock_daily_quarter_directory, mock_daily_idx_file, mock_filing_response, cik, file): daily_filing = DailyFilings(date(2018, 12, 31)) daily_filing.save(tmp_data_directory, dir_pattern="{cik}/{date}", date_format="%Y-%m-%d") subdir = os.path.join(cik, "2018-12-31") path_to_check = os.path.join(tmp_data_directory, subdir, file) assert os.path.exists(path_to_check)
def save(self, directory, dir_pattern=None, file_pattern="{accession_number}", download_all=False, daily_date_format="%Y%m%d"): """Save all filings between ``start_date`` and ``end_date``. Only filings that satisfy args given at initialization will be saved. Args: directory (str): Directory where filings should be stored. dir_pattern (str, optional): Format string for subdirectories. Defaults to None. file_pattern (str, optional): Format string for files. Defaults to "{accession_number}". download_all (bool, optional): Type of downloading system, if true downloads all data for each day, if false downloads each file in index. Defaults to False. daily_date_format (str, optional): Format string to use for the `{date}` pattern. Defaults to "%Y%m%d". """ # Go through all quarters and dates and save filings using appropriate class for (year, quarter, f) in self.quarterly_date_list: q = QuarterlyFilings(year=year, quarter=quarter, user_agent=self.client.user_agent, client=self.client, entry_filter=lambda x: f(x) and self.entry_filter(x)) q.save(directory=directory, dir_pattern=dir_pattern, file_pattern=file_pattern, download_all=download_all) for date_ in self.daily_date_list: d = DailyFilings(date=date_, user_agent=self.client.user_agent, client=self.client, entry_filter=self.entry_filter) try: d.save(directory=directory, dir_pattern=dir_pattern, file_pattern=file_pattern, download_all=download_all, date_format=daily_date_format) except (EDGARQueryError, NoFilingsError): # continue if no filings for given day continue
def __init__(self, start_date: date, end_date: date, client=None, entry_filter=lambda _: True, balancing_point=30): self.entry_filter = entry_filter self.start_date = start_date self.end_date = end_date self.quarterly = QuarterlyFilings(year=self.start_date.year, quarter=get_quarter(self.start_date), client=client, entry_filter=self.entry_filter) self.daily = DailyFilings(date=self.start_date, client=client, entry_filter=self.entry_filter) self.balancing_point = balancing_point self._recompute()
def test_idx_filename(self, date, expected_filename, mock_user_agent): assert DailyFilings( date=date, user_agent=mock_user_agent).idx_filename == expected_filename
def test_quarter(self, date, expected, mock_user_agent): assert DailyFilings(date=date, user_agent=mock_user_agent).quarter == expected
def test_get_master_idx_file(self, mock_daily_quarter_directory, mock_daily_idx_file, company_name): daily_filing = DailyFilings(date(2018, 12, 31)) assert company_name in daily_filing._get_master_idx_file()
def test_bad_date_format_fails(self, bad_date): with pytest.raises(TypeError): DailyFilings(bad_date)
def test_quarter(self, date, expected): assert DailyFilings(date=date).quarter == expected
def test_set_bad_entry_filter(self, bad_entry_filter): with pytest.raises(ValueError): d = DailyFilings(date=date(2020, 1, 2)) d.entry_filter = bad_entry_filter
def test__get_tar_bad_year_quarter(self, year, quarter): with pytest.raises(ValueError): d = DailyFilings(date=date(year, utils.get_month(quarter), 1)) d._get_tar_urls()
class ComboFilings: """Class for retrieving all filings between specified dates. Args: start_date (Union[str, datetime.datetime, datetime.date], optional): Date before which not to fetch reports. Stands for "date after." Defaults to None (will fetch all filings before ``end_date``). end_date (Union[str, datetime.datetime, datetime.date], optional): Date after which not to fetch reports. Stands for "date before." Defaults to today. user_agent (Union[str, NoneType]): Value used for HTTP header "User-Agent" for all requests. If given None, a valid client with user_agent must be given. See the SEC's statement on `fair access <https://www.sec.gov/os/accessing-edgar-data>`_ for more information. client (Union[NoneType, secedgar.client.NetworkClient], optional): Client to use for fetching data. If None is given, a user_agent must be given to pass to :class:`secedgar.client.NetworkClient`. Defaults to ``secedgar.client.NetworkClient`` if none is given. entry_filter (function, optional): A boolean function to determine if the FilingEntry should be kept. Defaults to `lambda _: True`. The ``FilingEntry`` object exposes 7 variables which can be used to filter which filings to keep. These are "cik", "company_name", "form_type", "date_filed", "file_name", "path", and "num_previously_valid". balancing_point (int): Number of days from which to change lookup method from using ``DailyFilings`` to ``QuarterlyFilings``. If ``QuarterlyFilings`` is used, an additional filter will be added to limit which days are included. Defaults to 30. kwargs: Any keyword arguments to pass to ``NetworkClient`` if no client is specified. .. versionadded:: 0.4.0 Examples: To download all filings from January 6, 2020 until November 5, 2020, you could do following: .. code-block:: python from datetime import date from secedgar import ComboFilings combo_filings = ComboFilings(start_date=date(2020, 1, 6), end_date=date(2020, 11, 5) combo_filings.save('/my_directory') """ def __init__(self, start_date: date, end_date: date, user_agent: Union[str, None] = None, client=None, entry_filter=lambda _: True, balancing_point=30, **kwargs): self.entry_filter = entry_filter self.start_date = start_date self.end_date = end_date self.user_agent = user_agent self.quarterly = QuarterlyFilings(year=self.start_date.year, quarter=get_quarter(self.start_date), user_agent=user_agent, client=client, entry_filter=self.entry_filter, **kwargs) self.daily = DailyFilings(date=self.start_date, user_agent=user_agent, client=client, entry_filter=self.entry_filter, **kwargs) self.balancing_point = balancing_point self._recompute() def _recompute(self): """Recompute the best list of quarters and days to use based on the start and end date.""" current_date = self.start_date self.quarterly_date_list = [] self.daily_date_list = [] while current_date <= self.end_date: current_quarter = get_quarter(current_date) current_year = current_date.year next_year, next_quarter = add_quarter(current_year, current_quarter) next_start_quarter_date = date(next_year, get_month(next_quarter), 1) days_till_next_quarter = (next_start_quarter_date - current_date).days days_till_end = (self.end_date - current_date).days if days_till_next_quarter <= days_till_end: current_start_quarter_date = date(current_year, get_month(current_quarter), 1) if current_start_quarter_date == current_date: self.quarterly_date_list.append( (current_year, current_quarter, lambda x: True)) current_date = next_start_quarter_date elif days_till_next_quarter > self.balancing_point: self.quarterly_date_list.append( (current_year, current_quarter, lambda x: date(x['date_filed']) >= self.start_date)) current_date = next_start_quarter_date else: while current_date < next_start_quarter_date: self.daily_date_list.append(current_date) current_date += timedelta(days=1) else: if days_till_end > self.balancing_point: if days_till_next_quarter - 1 == days_till_end: self.quarterly_date_list.append( (current_year, current_quarter, lambda x: True)) current_date = next_start_quarter_date else: self.quarterly_date_list.append( (current_year, current_quarter, lambda x: date(x['date_filed']) <= self.end_date)) current_date = self.end_date else: while current_date <= self.end_date: self.daily_date_list.append(current_date) current_date += timedelta(days=1) def get_urls(self): """Get all urls between ``start_date`` and ``end_date``.""" # Use functools.reduce for speed # see https://stackoverflow.com/questions/10461531/merge-and-sum-of-two-dictionaries def reducer(accumulator, dictionary): for key, value in dictionary.items(): accumulator[key] = accumulator.get(key, []) + value return accumulator list_of_dicts = [] for (year, quarter, f) in self.quarterly_date_list: self.quarterly.year = year self.quarterly.quarter = quarter self.quarterly.entry_filter = lambda x: f(x) and self.entry_filter( x) list_of_dicts.append(self.quarterly.get_urls()) for d in self.daily_date_list: self.daily.date = d try: list_of_dicts.append(self.daily.get_urls()) except EDGARQueryError: pass complete_dictionary = reduce(reducer, list_of_dicts, {}) return complete_dictionary def save(self, directory, dir_pattern=None, file_pattern="{accession_number}", download_all=False, daily_date_format="%Y%m%d"): """Save all filings between ``start_date`` and ``end_date``. Only filings that satisfy args given at initialization will be saved. Args: directory (str): Directory where filings should be stored. dir_pattern (str, optional): Format string for subdirectories. Defaults to None. file_pattern (str, optional): Format string for files. Defaults to "{accession_number}". download_all (bool, optional): Type of downloading system, if true downloads all data for each day, if false downloads each file in index. Defaults to False. daily_date_format (str, optional): Format string to use for the `{date}` pattern. Defaults to "%Y%m%d". """ for (year, quarter, f) in self.quarterly_date_list: self.quarterly.year = year self.quarterly.quarter = quarter self.quarterly.entry_filter = lambda x: f(x) and self.entry_filter( x) self.quarterly.save(directory=directory, dir_pattern=dir_pattern, file_pattern=file_pattern, download_all=download_all) for d in self.daily_date_list: self.daily.date = d try: self.daily.save(directory=directory, dir_pattern=dir_pattern, file_pattern=file_pattern, download_all=download_all, date_format=daily_date_format) except (EDGARQueryError, NoFilingsError): pass
def filings(cik_lookup=None, filing_type=None, user_agent=None, start_date=None, end_date=date.today(), count=None, client=None, entry_filter=lambda _: True, **kwargs): """Utility method to get best filing object. Args: cik_lookup (str): Central Index Key (CIK) for company of interest. start_date (datetime.date, optional): Date of daily filing to fetch. end_date (datetime.date, optional): Date of daily filing to fetch. filing_type (secedgar.core.filing_types.FilingType, optional): Valid filing type enum. Defaults to None. If None, then all filing types for CIKs will be returned. count (int, optional): Number of filings to fetch. Will fetch up to `count` if that many filings are available. Defaults to all filings available. client (secedgar.client.NetworkClient, optional): Client to use. Defaults to ``secedgar.client.NetworkClient`` if None given. entry_filter (function, optional): A boolean function to determine if the FilingEntry should be kept. Defaults to ``lambda _: True``. See :class:`secedgar.core.DailyFilings` for more detail. kwargs: Any keyword arguments to pass to ``NetworkClient`` if no client is specified. Examples: Using the ``filings`` function from secedgar is the easiest way to retrieve filings. Depending on the arguments given, secedgar will return an object that will get you the information you want from EDGAR. There are 4 main classes which can be returned. - :class:`secedgar.ComboFilings` for fetching filings over multiple days that does not fall exactly into a quarter - :class:`secedgar.CompanyFilings` for fetching a particular filing type for one or more companies - :class:`secedgar.DailyFilings` for fetching all filings from a specific date - :class:`secedgar.QuarterlyFilings` for fetching all filings from a specific quarter To get all filings over a time span, you could use something like below. .. code-block:: python from datetime import date from secedgar import filings, FilingType # secedgar creates correct filing object for given arguments # this will fetch the first 50 filings found over the time span my_filings = filings(start_date=date(2020, 12, 10), end_date=date(2020, 12, 15), filing_type=FilingType.FILING_4, user_agent="Name (email)", count=50) # easy access to methods shared across all 4 different filing classes my_filings_urls = my_filings.get_urls() my_filings.save("/path/to/directory") To get a single filing type for one or more companies, you could use this: .. code-block:: python from secedgar import filings, FilingType # similar to above, but fetches filings for specific tickers company_filings = filings(cik_lookup=["aapl", "fb"], filing_type=sec.FilingType.FILING_10Q, user_agent="Name (email)") company_filings_urls = company_filings.get_urls() company_filings.save("/path/to/directory") To get filings for a single day, you could use something like this: .. code-block:: python from datetime import date from secedgar import filings # all filings for daily_filings = filings(start_date=date(2020, 1 ,3), end_date=date(2020, 1, 3), user_agent="Name (email)") daily_filings.save("/path/to/directory") # limit which quarterly filings to use - saves only form 4 filings limit_to_form4 = lambda f: f.form_type.lower() == "4" daily_filings_limited = filings(start_date=date(2020, 1 ,3), end_date=date(2020, 1, 3), user_agent="Name (email)", entry_filter=limit_to_form4) daily_filings_limited.save("/path/to/other/directory") For getting filings from a specific quarter, the function call would look like this: .. code-block:: python from datetime import date from secedgar import filings # all quarterly filings quarterly_filings = filings(start_date=date(2020, 1 ,1), end_date=date(2020, 3, 31), user_agent="Name (email)") quarterly_filings.save("/path/to/directory") # limit which quarterly filings to use # saves only 10-K and 10-Q filings from quarter limit_to_10k_10q = lambda f: f.form_type.lower() in ("10-k", "10-q") quarterly_filings_limited = filings(start_date=date(2020, 1 ,1), end_date=date(2020, 3, 31), user_agent="Name (email)", entry_filter=limit_to_10k_10q) quarterly_filings_limited.save("/path/to/other/directory") """ if filing_type is not None and not isinstance(filing_type, FilingType): raise FilingTypeError if cik_lookup: return CompanyFilings(cik_lookup, filing_type=filing_type, user_agent=user_agent, start_date=start_date, end_date=end_date, count=count, client=client, **kwargs) # Define entry filter as original _entry_filter = entry_filter if filing_type is not None: # If filing type also given, add filing types to existing entry filter def _entry_filter(x): return x.form_type == filing_type and entry_filter(x) if count is not None: raise NotImplementedError( "Count has not yet been implemented for Daily, quarterly & Combo Filings." ) if (end_date is None or end_date == start_date) and isinstance( start_date, date): return DailyFilings(date=start_date, user_agent=user_agent, client=client, entry_filter=_entry_filter, **kwargs) if isinstance(start_date, date) and isinstance(end_date, date): current_quarter = get_quarter(start_date) current_year = start_date.year start_quarter_date = date(current_year, get_month(current_quarter), 1) next_year, next_quarter = add_quarter(current_year, current_quarter) end_quarter_date = date(next_year, get_month(next_quarter), 1) - timedelta(days=1) if start_quarter_date == start_date and end_date == end_quarter_date: return QuarterlyFilings(year=current_year, quarter=current_quarter, client=client, user_agent=user_agent, entry_filter=_entry_filter, **kwargs) return ComboFilings(start_date=start_date, end_date=end_date, user_agent=user_agent, client=client, entry_filter=_entry_filter, **kwargs) raise ValueError( """Invalid arguments. You must provide 'cik_lookup' OR 'start_date' \ OR ('start_date' and 'end_date').""")
def filings( cik_lookup=None, filing_type=None, start_date=None, end_date=date.today(), count=None, client=None, entry_filter=lambda _: True, ): """Utility method to get best filing object. Args: cik_lookup (str): Central Index Key (CIK) for company of interest. start_date (datetime.date, optional): Date of daily filing to fetch. end_date (datetime.date, optional): Date of daily filing to fetch. filing_type (secedgar.core.filing_types.FilingType, optional): Valid filing type enum. Defaults to None. If None, then all filing types for CIKs will be returned. count (int, optional): Number of filings to fetch. Will fetch up to `count` if that many filings are available. Defaults to all filings available. client (secedgar.client._base, optional): Client to use. Defaults to ``secedgar.client.NetworkClient`` if None given. entry_filter (function, optional): A boolean function to determine if the FilingEntry should be kept. Defaults to ``lambda _: True``. See :class:`secedgar.core.DailyFilings` for more detail. .. code-block:: python from datetime import date from secedgar.core import filings, FilingType engine = filings(start_date=date(2020, 12, 10), end_date=date(2020, 12, 10), filing_type=FilingType.FILING_4, count=50) """ if filing_type is not None and not isinstance(filing_type, FilingType): raise FilingTypeError if cik_lookup: return CompanyFilings( cik_lookup, filing_type=filing_type, start_date=start_date, end_date=end_date, count=count, client=client, ) if filing_type is not None: original_entry_filter = entry_filter def entry_filter(x): return x.form_type == filing_type and original_entry_filter(x) original_entry_filter = entry_filter if count is not None: raise NotImplementedError( "Count has not yet been implemented for Daily, quarterly & Combo Filings." ) if (end_date is None or end_date == start_date) and isinstance( start_date, date): return DailyFilings(date=start_date, client=client, entry_filter=entry_filter) if isinstance(start_date, date) and isinstance(end_date, date): current_quarter = get_quarter(start_date) current_year = start_date.year start_quarter_date = date(current_year, get_month(current_quarter), 1) next_year, next_quarter = add_quarter(current_year, current_quarter) end_quarter_date = date(next_year, get_month(next_quarter), 1) - timedelta(days=1) if start_quarter_date == start_date and end_date == end_quarter_date: return QuarterlyFilings(current_year, current_quarter, client=client, entry_filter=entry_filter) return ComboFilings(start_date, end_date, client=client, entry_filter=entry_filter) raise ValueError( """Invalid arguments. You must provide 'cik_lookup' OR 'start_date' \ OR ('start_date' and 'end_date').""")
def test_good_date_setter(self, date): daily_filing = DailyFilings(date=date) assert daily_filing.date == date
def test__get_tar_valid(self): d = DailyFilings(date=date(2020, 1, 2)) urls = d._get_tar_urls() assert urls[0].endswith('20200102.nc.tar.gz') assert urls[0].startswith('http')
def test_bad_date_on_init(self, bad_date): with pytest.raises(TypeError): _ = DailyFilings(date=bad_date)
def test_set_good_entry_filter(self, good_entry_filter): d = DailyFilings(date=date(2020, 1, 2)) d.entry_filter = good_entry_filter assert callable(d.entry_filter)
def test_bad_date_setter_after_init(self, bad_date): daily_filing = DailyFilings(date=date(2020, 1, 1)) with pytest.raises(TypeError): daily_filing.date = bad_date
def test_idx_filename(self, date, expected_filename): assert DailyFilings(date=date).idx_filename == expected_filename
def test_no_params(self): """Params should always be empty.""" daily_filing = DailyFilings(date(2020, 1, 1)) assert not daily_filing.params
def test_get_urls(self, mock_daily_quarter_directory, mock_daily_idx_file, key, url): daily_filing = DailyFilings(date(2018, 12, 31)) assert url in daily_filing.get_urls()[key]
def test_master_idx_date_format(self, date_tuple, formatted): daily_filing = DailyFilings(date(*date_tuple)) assert daily_filing._get_idx_formatted_date() == formatted
def test_path_property(self, year, month, day, quarter): daily_filing = DailyFilings(date(year, month, day)) assert daily_filing.path == "Archives/edgar/daily-index/{year}/QTR{quarter}/".format( year=year, quarter=quarter)
def test_set_good_client(self, mock_user_agent): client = NetworkClient(user_agent=mock_user_agent) daily = DailyFilings(date=date(2021, 1, 1), client=client) assert daily.client == client