def _validate_response(response): """Ensures response from EDGAR is valid. Args: response (requests.response): A requests.response object. Raises: EDGARQueryError: If response contains EDGAR error message. """ error_messages = ("The value you submitted is not valid", "No matching Ticker Symbol.", "No matching CIK.", "No matching companies.") status_code = response.status_code if 400 <= status_code < 500: if status_code == 400: raise EDGARQueryError("The query could not be completed. " "The page does not exist.") else: raise EDGARQueryError( "The query could not be completed. " "There was a client-side error with your " "request.") elif 500 <= status_code < 600: raise EDGARQueryError("The query could not be completed. " "There was a server-side error with " "your request.") elif any(error_message in response.text for error_message in error_messages): raise EDGARQueryError()
def _get_master_idx_file(self, update_cache=False, **kwargs): """Get master file with all filings from given date. Args: update_cache (bool, optional): Whether master index should be updated method call. Defaults to False. kwargs: Keyword arguments to pass to ``secedgar.client._base.AbstractClient.get_response``. Returns: text (str): Idx file text. Raises: EDGARQueryError: If no file of the form master.<DATE>.idx is found. """ if self._master_idx_file is None or update_cache: if self.idx_filename in self._get_listings_directory().text: master_idx_url = "{path}{filename}".format( path=self.path, filename=self.idx_filename) self._master_idx_file = self.client.get_response( master_idx_url, self.params, **kwargs).text else: raise EDGARQueryError("""File {filename} not found. There may be no filings for the given day/quarter.""" .format(filename=self.idx_filename)) return self._master_idx_file
def _get_master_idx_file(self, update_cache=False, **kwargs): """Get master file with all filings from given date. Args: update_cache (bool, optional): Whether master index should be updated method call. Defaults to False. kwargs: Keyword arguments to pass to `client.get_response`. Returns: text (str): Idx file as string. Raises: EDGARQueryError: If no file of the form master.<DATE>.idx is found. """ if self._master_idx_file is None or update_cache: formatted_date = datetime.datetime.strftime("%y%m%d", self._date) formatted_file_name = "master.{date}.idx".format( date=formatted_date) if formatted_file_name in self._get_quarterly_directory().text: master_idx_url = "{path}/master.{date}.idx".format( path=self.path, date=formatted_date) self._master_idx_file = self.client.get_response( master_idx_url, **kwargs).text else: raise EDGARQueryError("""File master.{date}.idx not found. There may be no filings for this day.""". format(date=formatted_date)) return self._master_idx_file
def get_response(self, path, params, **kwargs): """Execute HTTP request and returns response if valid. Args: path (str): A properly-formatted path params (dict): Dictionary of parameters to pass to request. Returns: response (requests.Response): A `requests.Response` object. Raises: EDGARQueryError: If problems arise when making query. """ prepared_url = self._prepare_query(path) response = None for i in range(self.retry_count + 1): response = requests.get(prepared_url, params=params, **kwargs) try: self._validate_response(response) except EDGARQueryError: # Raise query error if on last retry if i == self.retry_count: raise EDGARQueryError() finally: time.sleep(self.pause) self.response = response return self.response
def _fetch_report(self, company_code, cik, priorto, count, filing_type): """Fetch filings. Args: company_code (str): Code used to help find company filings. Often the company's ticker is used. cik (Union[str, int]): Central Index Key assigned by SEC. See https://www.sec.gov/edgar/searchedgar/cik.htm to search for a company's CIK. priorto (Union[str, datetime.datetime]): Most recent report to consider. Must be in form 'YYYYMMDD' or valid ``datetime.datetime`` object. filing_type (str): Choose from list of valid filing types. Includes '10-Q', '10-K', '8-K', '13-F', 'SD'. Returns: None """ priorto = sanitize_date(priorto) cik = self._check_cik(cik) self._make_directory(company_code, cik, priorto, filing_type) # generate the url to crawl base_url = "http://www.sec.gov/cgi-bin/browse-edgar" params = { 'action': 'getcompany', 'owner': 'exclude', 'output': 'xml', 'CIK': cik, 'type': filing_type, 'dateb': priorto, 'count': count } print("started {filing_type} {company_code}".format( filing_type=filing_type, company_code=company_code)) r = requests.get(base_url, params=params) if r.status_code == 200: data = r.text # get doc list data docs = self._create_document_list(data) try: self._save_in_directory(company_code, cik, priorto, filing_type, docs) except Exception as e: print(str(e)) # Need to use str for Python 2.5 else: raise EDGARQueryError(r.status_code) print("Successfully downloaded all the files")
def _validate_response(response): """Ensure response from EDGAR is valid. Args: response (requests.response): A requests.response object. Raises: EDGARQueryError: If response contains EDGAR error message. """ error_messages = ("The value you submitted is not valid", "No matching Ticker Symbol.", "No matching CIK.", "No matching companies.") status_code = response.status_code if 400 <= status_code < 500: if status_code == 400: raise EDGARQueryError("The query could not be completed. " "The page does not exist.") elif status_code == 429: raise EDGARQueryError( "Error: You have hit the rate limit. " "SEC has banned your IP for 10 minutes. " "Please wait 10 minutes " "before making another request." "https://www.sec.gov/privacy.htm#security") else: raise EDGARQueryError( "The query could not be completed. " "There was a client-side error with your " "request.") elif 500 <= status_code < 600: raise EDGARQueryError("The query could not be completed. " "There was a server-side error with " "your request.") elif any(error_message in response.text for error_message in error_messages): raise EDGARQueryError() # Need to check for error messages before checking for 200 status code elif status_code != 200: raise EDGARQueryError()