コード例 #1
0
    def _validate_response(response):
        """Ensures response from EDGAR is valid.

        Args:
            response (requests.response): A requests.response object.

        Raises:
            EDGARQueryError: If response contains EDGAR error message.
        """
        error_messages = ("The value you submitted is not valid",
                          "No matching Ticker Symbol.", "No matching CIK.",
                          "No matching companies.")
        status_code = response.status_code
        if 400 <= status_code < 500:
            if status_code == 400:
                raise EDGARQueryError("The query could not be completed. "
                                      "The page does not exist.")
            else:
                raise EDGARQueryError(
                    "The query could not be completed. "
                    "There was a client-side error with your "
                    "request.")
        elif 500 <= status_code < 600:
            raise EDGARQueryError("The query could not be completed. "
                                  "There was a server-side error with "
                                  "your request.")
        elif any(error_message in response.text
                 for error_message in error_messages):
            raise EDGARQueryError()
コード例 #2
0
ファイル: _index.py プロジェクト: kmart42/EDGAR_scrape
    def _get_master_idx_file(self, update_cache=False, **kwargs):
        """Get master file with all filings from given date.

        Args:
            update_cache (bool, optional): Whether master index should be updated
                method call. Defaults to False.
            kwargs: Keyword arguments to pass to
                ``secedgar.client._base.AbstractClient.get_response``.

        Returns:
            text (str): Idx file text.

        Raises:
            EDGARQueryError: If no file of the form master.<DATE>.idx
                is found.
        """
        if self._master_idx_file is None or update_cache:
            if self.idx_filename in self._get_listings_directory().text:
                master_idx_url = "{path}{filename}".format(
                    path=self.path, filename=self.idx_filename)
                self._master_idx_file = self.client.get_response(
                    master_idx_url, self.params, **kwargs).text
            else:
                raise EDGARQueryError("""File {filename} not found.
                                     There may be no filings for the given day/quarter."""
                                      .format(filename=self.idx_filename))
        return self._master_idx_file
コード例 #3
0
    def _get_master_idx_file(self, update_cache=False, **kwargs):
        """Get master file with all filings from given date.

        Args:
            update_cache (bool, optional): Whether master index should be updated
                method call. Defaults to False.
            kwargs: Keyword arguments to pass to `client.get_response`.

        Returns:
            text (str): Idx file as string.

        Raises:
            EDGARQueryError: If no file of the form master.<DATE>.idx
                is found.
        """
        if self._master_idx_file is None or update_cache:
            formatted_date = datetime.datetime.strftime("%y%m%d", self._date)
            formatted_file_name = "master.{date}.idx".format(
                date=formatted_date)
            if formatted_file_name in self._get_quarterly_directory().text:
                master_idx_url = "{path}/master.{date}.idx".format(
                    path=self.path, date=formatted_date)
                self._master_idx_file = self.client.get_response(
                    master_idx_url, **kwargs).text
            else:
                raise EDGARQueryError("""File master.{date}.idx not found.
                                     There may be no filings for this day.""".
                                      format(date=formatted_date))
        return self._master_idx_file
コード例 #4
0
    def get_response(self, path, params, **kwargs):
        """Execute HTTP request and returns response if valid.

        Args:
            path (str): A properly-formatted path
            params (dict): Dictionary of parameters to pass
            to request.

        Returns:
            response (requests.Response): A `requests.Response` object.

        Raises:
            EDGARQueryError: If problems arise when making query.
        """
        prepared_url = self._prepare_query(path)
        response = None
        for i in range(self.retry_count + 1):
            response = requests.get(prepared_url, params=params, **kwargs)
            try:
                self._validate_response(response)
            except EDGARQueryError:
                # Raise query error if on last retry
                if i == self.retry_count:
                    raise EDGARQueryError()
            finally:
                time.sleep(self.pause)
        self.response = response
        return self.response
コード例 #5
0
    def _fetch_report(self, company_code, cik, priorto, count, filing_type):
        """Fetch filings.

        Args:
          company_code (str): Code used to help find company filings.
              Often the company's ticker is used.
          cik (Union[str, int]): Central Index Key assigned by SEC.
              See https://www.sec.gov/edgar/searchedgar/cik.htm to search for
              a company's CIK.
          priorto (Union[str, datetime.datetime]): Most recent report to consider.
              Must be in form 'YYYYMMDD' or
              valid ``datetime.datetime`` object.
          filing_type (str): Choose from list of valid filing types.
              Includes '10-Q', '10-K', '8-K', '13-F', 'SD'.

        Returns:
          None
        """
        priorto = sanitize_date(priorto)
        cik = self._check_cik(cik)
        self._make_directory(company_code, cik, priorto, filing_type)

        # generate the url to crawl
        base_url = "http://www.sec.gov/cgi-bin/browse-edgar"
        params = {
            'action': 'getcompany',
            'owner': 'exclude',
            'output': 'xml',
            'CIK': cik,
            'type': filing_type,
            'dateb': priorto,
            'count': count
        }
        print("started {filing_type} {company_code}".format(
            filing_type=filing_type, company_code=company_code))
        r = requests.get(base_url, params=params)
        if r.status_code == 200:
            data = r.text
            # get doc list data
            docs = self._create_document_list(data)

            try:
                self._save_in_directory(company_code, cik, priorto,
                                        filing_type, docs)
            except Exception as e:
                print(str(e))  # Need to use str for Python 2.5
        else:
            raise EDGARQueryError(r.status_code)

        print("Successfully downloaded all the files")
コード例 #6
0
ファイル: network_client.py プロジェクト: alexanu/sec-edgar
    def _validate_response(response):
        """Ensure response from EDGAR is valid.

        Args:
            response (requests.response): A requests.response object.

        Raises:
            EDGARQueryError: If response contains EDGAR error message.
        """
        error_messages = ("The value you submitted is not valid",
                          "No matching Ticker Symbol.", "No matching CIK.",
                          "No matching companies.")

        status_code = response.status_code

        if 400 <= status_code < 500:
            if status_code == 400:
                raise EDGARQueryError("The query could not be completed. "
                                      "The page does not exist.")
            elif status_code == 429:
                raise EDGARQueryError(
                    "Error: You have hit the rate limit. "
                    "SEC has banned your IP for 10 minutes. "
                    "Please wait 10 minutes "
                    "before making another request."
                    "https://www.sec.gov/privacy.htm#security")
            else:
                raise EDGARQueryError(
                    "The query could not be completed. "
                    "There was a client-side error with your "
                    "request.")
        elif 500 <= status_code < 600:
            raise EDGARQueryError("The query could not be completed. "
                                  "There was a server-side error with "
                                  "your request.")
        elif any(error_message in response.text
                 for error_message in error_messages):
            raise EDGARQueryError()
        # Need to check for error messages before checking for 200 status code
        elif status_code != 200:
            raise EDGARQueryError()