def test_large_number_of_filings(formatted_earliest_after_date): filing_type = "8-K" ticker = "AAPL" before_date = date(2019, 11, 15).strftime(DATE_FORMAT_TOKENS) include_amends = False # num_filings_to_download < number of filings available num_filings_to_download = 100 filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, formatted_earliest_after_date, before_date, include_amends, ) assert len(filings_to_download) == 100 # fetch filing URLs over two pages, but retrieve # fewer than the total number of filings available num_filings_to_download = 150 filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, formatted_earliest_after_date, before_date, include_amends, ) assert len(filings_to_download) == 150 # SEC Edgar Search fails to retrieve Apple 8-Ks after 2000 and before 2002 formatted_earliest_after_date = date(2002, 1, 1).strftime(DATE_FORMAT_TOKENS) # num_filings_to_download > number of filings available num_filings_to_download = 200 filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, formatted_earliest_after_date, before_date, include_amends, ) # there are 158 AAPL 8-K filings before 2019-11-15 and after 2002-01-01 assert len(filings_to_download) == 158 # num_filings_to_download == number of filings available num_filings_to_download = 158 filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, formatted_earliest_after_date, before_date, include_amends, ) # there are 158 AAPL 8-K filings before 2019-11-15 assert len(filings_to_download) == 158
def test_include_amends(): ticker = "AAPL" filing_type = "10-K" num_filings_to_download = 100 # AAPL has two 10-K/A amends before this date before_date = "20191201" after_date = None filing_urls_without_amends = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, after_date, before_date, False) num_filings_without_amends = len(filing_urls_without_amends) filing_urls_with_amends = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, after_date, before_date, True) num_filings_with_amends = len(filing_urls_with_amends) assert num_filings_with_amends > num_filings_without_amends num_amends = num_filings_with_amends - num_filings_without_amends filing_type = "10-K/A" amends_filing_urls = get_filing_urls_to_download(filing_type, ticker, num_filings_to_download, after_date, before_date, True) expected_num_amends = len(amends_filing_urls) assert num_amends == expected_num_amends
def test_large_number_of_filings(): filing_type = "8-K" ticker = "AAPL" after_date = None before_date = "20191115" include_amends = False # num_filings_to_download < number of filings available num_filings_to_download = 100 filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, after_date, before_date, include_amends, ) assert len(filings_to_download) == 100 # fetch filing URLs over two pages, but retrieve # fewer than the total number of filings available num_filings_to_download = 150 filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, after_date, before_date, include_amends, ) assert len(filings_to_download) == 150 # num_filings_to_download > number of filings available num_filings_to_download = 200 filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, after_date, before_date, include_amends, ) # there are 176 AAPL 8-K filings before 20191115 assert len(filings_to_download) == 176 # num_filings_to_download == number of filings available num_filings_to_download = 176 filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, after_date, before_date, include_amends, ) # there are 176 AAPL 8-K filings before 20191115 assert len(filings_to_download) == 176
def test_date_bounds(): filing_type = "8-K" # get all available filings in the date range num_filings_to_download = sys.maxsize ticker = "AAPL" after_date = datetime(2017, 9, 12) before_date = datetime(2019, 11, 15) include_amends = False # filings available on after_date and before_date filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, after_date.strftime("%Y%m%d"), before_date.strftime("%Y%m%d"), include_amends, ) assert len(filings_to_download) == 20 after_date += timedelta(1) filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, after_date.strftime("%Y%m%d"), before_date.strftime("%Y%m%d"), include_amends, ) assert len(filings_to_download) == 19 before_date -= timedelta(1) filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, after_date.strftime("%Y%m%d"), before_date.strftime("%Y%m%d"), include_amends, ) assert len(filings_to_download) == 18 # num_filings_to_download < number of filings available num_filings_to_download = 5 filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, after_date.strftime("%Y%m%d"), before_date.strftime("%Y%m%d"), include_amends, ) assert len(filings_to_download) == 5
def test_search_api_error_handling(formatted_latest_before_date): ticker = "AAPL" filing_type = "8-K" num_filings_to_download = 1 include_amends = False # Edgar Search API requires a date in the format YYYY-MM-DD invalid_before_date = "20090827" with pytest.raises(EdgarSearchApiError): get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, invalid_before_date, formatted_latest_before_date, include_amends, )
def test_include_amends(): ticker = "AAPL" filing_type = "10-K" num_filings_to_download = 100 # AAPL has 10-K/A amends before this date before_date = date(2019, 12, 1).strftime(DATE_FORMAT_TOKENS) after_date = None filing_urls_without_amends = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, after_date, before_date, False) num_filings_without_amends = len(filing_urls_without_amends) filing_urls_with_amends = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, after_date, before_date, True) num_filings_with_amends = len(filing_urls_with_amends) num_amends = num_filings_with_amends - num_filings_without_amends assert num_amends > 0
def test_13f_filings(filing_type, formatted_earliest_after_date, formatted_latest_before_date): # Vanguard files 13F-NT, 13F-HR ticker = "0000102909" num_filings_to_download = 1 include_amends = False filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, formatted_earliest_after_date, formatted_latest_before_date, include_amends, ) assert len(filings_to_download) == 1
def test_fetch_zero_filings(formatted_earliest_after_date, formatted_latest_before_date): ticker = "AAPL" filing_type = "8-K" num_filings_to_download = 0 include_amends = False filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, formatted_earliest_after_date, formatted_latest_before_date, include_amends, ) assert len(filings_to_download) == 0
def test_common_filings(filing_type, formatted_earliest_after_date, formatted_latest_before_date): # AAPL files 4, 8-K, 10-K, 10-Q, SC 13G, SD, DEF 14A ticker = "AAPL" num_filings_to_download = 1 include_amends = False filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, formatted_earliest_after_date, formatted_latest_before_date, include_amends, ) assert len(filings_to_download) == 1
def test_13f_filings(filing_type): # Vanguard files 13F-NT, 13F-HR ticker = "0000102909" num_filings_to_download = 1 after_date = None before_date = None include_amends = False filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, after_date, before_date, include_amends, ) assert len(filings_to_download) == 1
def test_s1_filings(formatted_earliest_after_date, formatted_latest_before_date): # Cloudflare filed an S-1 during its IPO ticker = "NET" filing_type = "S-1" num_filings_to_download = 1 include_amends = False filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, formatted_earliest_after_date, formatted_latest_before_date, include_amends, ) assert len(filings_to_download) == 1
def test_20f_filings(formatted_earliest_after_date, formatted_latest_before_date): # Alibaba files 20-F ticker = "BABA" filing_type = "20-F" num_filings_to_download = 1 include_amends = False filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, formatted_earliest_after_date, formatted_latest_before_date, include_amends, ) assert len(filings_to_download) == 1
def test_common_filings(filing_type): # AAPL files 4, 8-K, 10-K, 10-Q, SC 13G, SD, DEF 14A ticker = "AAPL" num_filings_to_download = 1 after_date = None before_date = None include_amends = False filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, after_date, before_date, include_amends, ) assert len(filings_to_download) == 1
def test_10ksb_filings(formatted_earliest_after_date, formatted_latest_before_date): # Ubiquitech files 10KSB ticker = "0001411460" filing_type = "10KSB" num_filings_to_download = 1 include_amends = False filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, formatted_earliest_after_date, formatted_latest_before_date, include_amends, ) assert len(filings_to_download) == 1
def test_10ksb_filings(): # Ubiquitech files 10KSB ticker = "0001411460" filing_type = "10KSB" num_filings_to_download = 1 after_date = None before_date = None include_amends = False filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, after_date, before_date, include_amends, ) assert len(filings_to_download) == 1
def test_s1_filings(): # Cloudflare filed an S-1 during its IPO ticker = "NET" filing_type = "S-1" num_filings_to_download = 1 after_date = None before_date = None include_amends = False filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, after_date, before_date, include_amends, ) assert len(filings_to_download) == 1
def test_20f_filings(): # Alibaba files 20-F ticker = "BABA" filing_type = "20-F" num_filings_to_download = 1 after_date = None before_date = None include_amends = False filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, after_date, before_date, include_amends, ) assert len(filings_to_download) == 1
def test_common_filings(): # AAPL files 4, 8-K, 10-K, 10-Q, SC 13G, SD ticker = "AAPL" filing_types = ["4", "8-K", "10-K", "10-Q", "SC 13G", "SD"] num_filings_to_download = 1 after_date = None before_date = None include_amends = False for filing_type in filing_types: filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, after_date, before_date, include_amends, ) assert len(filings_to_download) == 1
def test_simple_query(formatted_earliest_after_date): # Search for "antitrust" in all AAPL proxy statements filing_type = "DEF 14A" ticker = "AAPL" before = "2021-01-10" include_amends = False query = "antitrust" filings_to_download = get_filing_urls_to_download( filing_type, ticker, sys.maxsize, formatted_earliest_after_date, before, include_amends, query, ) # Proxy statements are published in both HTML and PDF form and the EDGAR # search API provides each one as its own hit. assert len(filings_to_download) == 6 assert get_number_of_unique_filings(filings_to_download) == 3
def test_all_supported_filings(filing_type, formatted_earliest_after_date, formatted_latest_before_date): ticker = "AAPL" num_filings_to_download = 1 include_amends = False try: filings_to_download = get_filing_urls_to_download( filing_type, ticker, num_filings_to_download, formatted_earliest_after_date, formatted_latest_before_date, include_amends, ) except EdgarSearchApiError: pytest.fail( f"EdgarSearchApiError was raised for {filing_type} filing.") else: # AAPL may or may not file certain filings assert len(filings_to_download) == 0 or len(filings_to_download) == 1
def get(self, filing_type, ticker_or_cik, num_filings_to_download=None, after_date=None, before_date=None, include_amends=False, store_files_to_disc=True): """Downloads filing documents and saves them to disk. :param filing_type: type of filing to download :type filing_type: ``str`` :param ticker_or_cik: ticker or CIK to download filings for :type ticker_or_cik: ``str`` :param num_filings_to_download: number of filings to download, defaults to all available filings :type num_filings_to_download: ``int``, optional :param after_date: date of form YYYYMMDD in which to download filings after, defaults to None :type after_date: ``str``, optional :param before_date: date of form YYYYMMDD in which to download filings before, defaults to today :type before_date: ``str``, optional :param include_amends: denotes whether or not to include filing amends (e.g. 8-K/A), defaults to False :type include_amends: ``bool``, optional :return: number of filings downloaded :rtype: ``int`` Usage:: >>> from sec_edgar_downloader import Downloader >>> dl = Downloader() # Get all 8-K filings for Apple >>> dl.get("8-K", "AAPL") # Get all 8-K filings for Apple, including filing amends (8-K/A) >>> dl.get("8-K", "AAPL", include_amends=True) # Get all 8-K filings for Apple after January 1, 2017 and before March 25, 2017 >>> dl.get("8-K", "AAPL", after_date="20170101", before_date="20170325") # Get the five most recent 10-K filings for Apple >>> dl.get("10-K", "AAPL", 5) # Get all 10-Q filings for Visa >>> dl.get("10-Q", "V") # Get all 13F-NT filings for the Vanguard Group >>> dl.get("13F-NT", "0000102909") # Get all 13F-HR filings for the Vanguard Group >>> dl.get("13F-HR", "0000102909") # Get all SC 13G filings for Apple >>> dl.get("SC 13G", "AAPL") # Get all SD filings for Apple >>> dl.get("SD", "AAPL") """ if filing_type not in SUPPORTED_FILINGS: filing_options = ", ".join(sorted(SUPPORTED_FILINGS)) raise ValueError( f"'{filing_type}' filings are not supported. " f"Please choose from the following: {filing_options}.") ticker_or_cik = str(ticker_or_cik).strip().upper().lstrip("0") if num_filings_to_download is None: # obtain all available filings, so we simply # need a large number to denote this num_filings_to_download = sys.maxsize else: num_filings_to_download = int(num_filings_to_download) if num_filings_to_download < 1: raise ValueError("Please enter a number greater than 1 " "for the number of filings to download.") # no sensible default exists for after_date if after_date is not None: after_date = str(after_date) validate_date_format(after_date) if before_date is None: before_date = date.today().strftime("%Y%m%d") else: before_date = str(before_date) validate_date_format(before_date) if after_date is not None and after_date > before_date: raise ValueError( "Invalid after_date and before_date. " "Please enter an after_date that is less than the before_date." ) filings_to_fetch = get_filing_urls_to_download( filing_type, ticker_or_cik, num_filings_to_download, after_date, before_date, include_amends, ) if store_files_to_disc: download_filings(self.download_folder, ticker_or_cik, filing_type, filings_to_fetch) return [{ "company_id": ticker_or_cik, "filing_type": filing_type, "file_name": filing.filename, "url": filing.url } for filing in filings_to_fetch]