class Filing(AbstractFiling): """Base class for receiving EDGAR filings. Attributes: cik (str): Central Index Key (CIK) for company of interest. filing_type (secedgar.filings.filing_types.FilingType): Valid filing type enum. start_date (Union[str, datetime.datetime], optional): Date before which not to fetch reports. Stands for "date after." Defaults to None (will fetch all filings before end_date). end_date (Union[str, datetime.datetime], optional): Date after which not to fetch reports. Stands for "date before." Defaults to today. .. versionadded:: 0.1.5 """ # TODO: Maybe allow NetworkClient to take in kwargs # (set to None and if None, create NetworkClient with kwargs) def __init__(self, cik, filing_type, start_date=None, end_date=datetime.datetime.today(), client=None, **kwargs): self._start_date = start_date self._end_date = end_date if not isinstance(filing_type, FilingType): raise FilingTypeError self._filing_type = filing_type if not isinstance(cik, CIK): # make CIK for users if not given cik = CIK(cik) self._cik = cik self._accession_numbers = [] self._params = { 'action': 'getcompany', 'dateb': sanitize_date(self.end_date), 'output': 'xml', 'owner': 'include', 'start': 0, 'type': self.filing_type.value } if kwargs.get('count') is not None: self._params['count'] = kwargs.get('count') if start_date is not None: self._params['datea'] = sanitize_date(start_date) # Make default client NetworkClient and pass in kwargs if client is None: self._client = NetworkClient(**kwargs) @property def path(self): """str: Path added to client base.""" return "cgi-bin/browse-edgar" @property def params(self): """:obj:`dict`: Parameters to include in requests.""" return self._params @property def client(self): """``secedgar.client.base``: Client to use to make requests.""" return self._client @property def start_date(self): """Union([datetime.datetime, str]): Date before which no filings are fetched.""" return self._start_date @start_date.setter def start_date(self, val): self._start_date = val self._params['datea'] = sanitize_date(val) @property def end_date(self): """Union([datetime.datetime, str]): Date after which no filings are fetched.""" return self._end_date @end_date.setter def end_date(self, val): self._end_date = val self._params['dateb'] = sanitize_date(val) @property def filing_type(self): """``secedgar.filings.FilingType``: FilingType enum of filing.""" return self._filing_type @filing_type.setter def filing_type(self, filing_type): if not isinstance(filing_type, FilingType): raise FilingTypeError self._filing_type = filing_type self._params['type'] = filing_type.value @property def accession_numbers(self): return self._accession_numbers @property def ciks(self): """:obj:`list` of :obj:`str`: List of CIK strings.""" return self._cik.ciks def get_urls(self, **kwargs): """Get urls for all CIKs given to Filing object. Args: kwargs: Anything to be passed to requests when making get request. Returns: urls (list): List of urls for txt files to download. """ urls = [] for cik in self.ciks: urls.extend(self._get_urls_for_cik(cik, **kwargs)) return urls # TODO: Change this to return accession numbers that are turned into URLs later def _get_urls_for_cik(self, cik, **kwargs): """ Get all urls for specific company according to CIK that match start date, end date, filing_type, and count parameters. Args: cik (str): CIK for company. kwargs: Anything to be passed to requests when making get request. Returns: txt_urls (list of str): Up to the desired number of URLs for that specific company if available. """ self.params['CIK'] = cik links = [] self.params["start"] = 0 # set start back to 0 before paginating # TODO: Make paginate utility outside of this class while len(links) < self._client.count: data = self._client.get_soup(self.path, self.params, **kwargs) links.extend([link.string for link in data.find_all("filinghref")]) # TODO: Consider making client adopt most efficient count self.params["start"] += self._client.count if len(data.find_all("filinghref")) == 0: break # break if no more filings left txt_urls = [link[:link.rfind("-")] + ".txt" for link in links] return txt_urls[:self.client.count] @staticmethod def _get_accession_numbers(links): """Gets accession numbers given list of links of the form https://www.sec.gov/Archives/edgar/data/<cik>/<first part of accession number before '-'> /<accession number>-index.htm Args: links (list): List of links to extract accession numbers from. Returns: List of accession numbers for given links. """ return [link.split('/')[-1].replace('-index.htm', '') for link in links] # TODO: break this method down further def save(self, directory): """Save files in specified directory. Each txt url looks something like: https://www.sec.gov/Archives/edgar/data/1018724/000101872419000043/0001018724-19-000043.txt Args: directory (str): Path to directory where files should be saved. Returns: None Raises: ValueError: If no text urls are available for given filing object. """ urls = self.get_urls() if len(urls) == 0: raise ValueError("No filings available.") doc_names = [url.split("/")[-1] for url in urls] for (url, doc_name) in list(zip(urls, doc_names)): cik = doc_name.split('-')[0] data = requests.get(url).text path = os.path.join(directory, cik, self.filing_type.value) make_path(path) path = os.path.join(path, doc_name) with open(path, "w") as f: f.write(data)
class CIKValidator(object): """Validates company tickers and/or company names based on CIK availability. Used internally by the CIK class. Not intended for outside use. Args: lookups (Union[str, list, tuple]): List of tickers and/or company names for which to find CIKs. **kwargs: Any keyword arguments needed to be passed to _EDGARBase (see class for more details). .. versionadded:: 0.1.5 """ def __init__(self, lookups, client=None, **kwargs): if isinstance(lookups, str): self._lookups = [lookups] # make single string into list else: try: # Check that iterable only contains strings and is not empty if not lookups or not all(type(o) is str for o in lookups): raise TypeError self._lookups = lookups except TypeError: raise TypeError("CIKs must be given as string or iterable.") self._params = {'action': 'getcompany'} if client is None: self._client = NetworkClient(**kwargs) @property def path(self): """str: Path to add to client base.""" return "cgi-bin/browse-edgar" @property def client(self): """``secedgar.client.base``: Client to use to fetch requests.""" return self._client @property def params(self): """:obj:`dict` Search parameters to add to client.""" return self._params def get_ciks(self): """ Validate lookup values and return corresponding CIKs. Returns: ciks (dict): Dictionary with lookup terms as keys and CIKs as values. """ ciks = dict() for lookup in self._lookups: try: result = self._get_cik(lookup) self._validate_cik(result) # raises error if not valid CIK ciks[lookup] = result except CIKError: pass # If multiple companies, found, just print out warnings return ciks def _get_cik(self, lookup): """ Get cik for lookup value. """ self._validate_lookup(lookup) try: # try to lookup by CIK self._params['CIK'] = lookup soup = self._client.get_soup(self.path, self.params) except EDGARQueryError: # fallback to lookup by company name del self._params[ 'CIK'] # delete this parameter so no conflicts arise self._params['company'] = lookup soup = self._client.get_soup(self.path, self.params) try: # try to get single CIK for lookup span = soup.find('span', {'class': 'companyName'}) return span.find('a').getText().split()[0] # returns single CIK except AttributeError: # warn and skip if multiple possibilities for CIK found warnings.warn( "Lookup '{0}' will be skipped. " "Found multiple companies matching '{0}':".format(lookup)) warnings.warn('\n'.join(self._get_cik_possibilities(soup))) finally: # Delete parameters after lookup if self._params.get('company') is not None: del self._params['company'] if self._params.get('CIK') is not None: del self._params['CIK'] @staticmethod def _get_cik_possibilities(soup): """Get all CIK possibilities if multiple are listed. Args: soup (BeautifulSoup): BeautifulSoup object to search through. Returns: All possible companies that match lookup. """ try: # Exclude table header table_rows = soup.find('table', { 'summary': 'Results' }).find_all('tr')[1:] # Company names are in second column of table return [ ''.join(row.find_all('td')[1].find_all(text=True)) for row in table_rows ] except AttributeError: # If there are no CIK possibilities, then no results were returned raise EDGARQueryError @staticmethod def _validate_cik(cik): """Check if CIK is 10 digit string.""" if not (isinstance(cik, str) and len(cik) == 10 and cik.isdigit()): raise CIKError(cik) @staticmethod def _validate_lookup(lookup): """Ensure that lookup is string. Args: lookup: Value to lookup. Raises: TypeError: If lookup is not string. """ if not isinstance(lookup, str): raise TypeError( "Lookup value must be string. Given type {0}.".format( type(lookup)))