Python NetworkClient.get_soup 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: secedgar.client.network_client

클래스/타입: NetworkClient

메소드/함수: get_soup

hotexamples.com에서의 예제들: 2

Python NetworkClient.get_soup - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 secedgar.client.network_client.NetworkClient.get_soup에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

NetworkClient(4)

get_soup(2)

자주 사용되는 메소드들

NetworkClient (4)

get_soup (2)

예제 #1

파일 보기

파일: filing.py 프로젝트: tomfunk/sec-edgar

class Filing(AbstractFiling):
    """Base class for receiving EDGAR filings.

    Attributes:
        cik (str): Central Index Key (CIK) for company of interest.
        filing_type (secedgar.filings.filing_types.FilingType): Valid filing type enum.
        start_date (Union[str, datetime.datetime], optional): Date before which not to
            fetch reports. Stands for "date after."
            Defaults to None (will fetch all filings before end_date).
        end_date (Union[str, datetime.datetime], optional): Date after which not to fetch reports.
            Stands for "date before." Defaults to today.

    .. versionadded:: 0.1.5
    """

    # TODO: Maybe allow NetworkClient to take in kwargs
    #  (set to None and if None, create NetworkClient with kwargs)
    def __init__(self,
                 cik,
                 filing_type,
                 start_date=None,
                 end_date=datetime.datetime.today(),
                 client=None,
                 **kwargs):
        self._start_date = start_date
        self._end_date = end_date
        if not isinstance(filing_type, FilingType):
            raise FilingTypeError
        self._filing_type = filing_type
        if not isinstance(cik, CIK):  # make CIK for users if not given
            cik = CIK(cik)
        self._cik = cik
        self._accession_numbers = []
        self._params = {
            'action': 'getcompany',
            'dateb': sanitize_date(self.end_date),
            'output': 'xml',
            'owner': 'include',
            'start': 0,
            'type': self.filing_type.value
        }
        if kwargs.get('count') is not None:
            self._params['count'] = kwargs.get('count')
        if start_date is not None:
            self._params['datea'] = sanitize_date(start_date)
        # Make default client NetworkClient and pass in kwargs
        if client is None:
            self._client = NetworkClient(**kwargs)

    @property
    def path(self):
        """str: Path added to client base."""
        return "cgi-bin/browse-edgar"

    @property
    def params(self):
        """:obj:`dict`: Parameters to include in requests."""
        return self._params

    @property
    def client(self):
        """``secedgar.client.base``: Client to use to make requests."""
        return self._client

    @property
    def start_date(self):
        """Union([datetime.datetime, str]): Date before which no filings are fetched."""
        return self._start_date

    @start_date.setter
    def start_date(self, val):
        self._start_date = val
        self._params['datea'] = sanitize_date(val)

    @property
    def end_date(self):
        """Union([datetime.datetime, str]): Date after which no filings are fetched."""
        return self._end_date

    @end_date.setter
    def end_date(self, val):
        self._end_date = val
        self._params['dateb'] = sanitize_date(val)

    @property
    def filing_type(self):
        """``secedgar.filings.FilingType``: FilingType enum of filing."""
        return self._filing_type

    @filing_type.setter
    def filing_type(self, filing_type):
        if not isinstance(filing_type, FilingType):
            raise FilingTypeError
        self._filing_type = filing_type
        self._params['type'] = filing_type.value

    @property
    def accession_numbers(self):
        return self._accession_numbers

    @property
    def ciks(self):
        """:obj:`list` of :obj:`str`: List of CIK strings."""
        return self._cik.ciks

    def get_urls(self, **kwargs):
        """Get urls for all CIKs given to Filing object.

        Args:
            kwargs: Anything to be passed to requests when making get request.

        Returns:
            urls (list): List of urls for txt files to download.
        """
        urls = []
        for cik in self.ciks:
            urls.extend(self._get_urls_for_cik(cik, **kwargs))
        return urls

    # TODO: Change this to return accession numbers that are turned into URLs later
    def _get_urls_for_cik(self, cik, **kwargs):
        """
        Get all urls for specific company according to CIK that match
        start date, end date, filing_type, and count parameters.

        Args:
            cik (str): CIK for company.
            kwargs: Anything to be passed to requests when making get request.

        Returns:
            txt_urls (list of str): Up to the desired number of URLs for that specific company
            if available.
        """
        self.params['CIK'] = cik
        links = []
        self.params["start"] = 0  # set start back to 0 before paginating

        # TODO: Make paginate utility outside of this class
        while len(links) < self._client.count:
            data = self._client.get_soup(self.path, self.params, **kwargs)
            links.extend([link.string for link in data.find_all("filinghref")])
            # TODO: Consider making client adopt most efficient count
            self.params["start"] += self._client.count
            if len(data.find_all("filinghref")) == 0:
                break  # break if no more filings left

        txt_urls = [link[:link.rfind("-")] + ".txt" for link in links]
        return txt_urls[:self.client.count]

    @staticmethod
    def _get_accession_numbers(links):
        """Gets accession numbers given list of links of the form
        https://www.sec.gov/Archives/edgar/data/<cik>/<first part of accession number before '-'>
        /<accession number>-index.htm

        Args:
            links (list): List of links to extract accession numbers from.

        Returns:
            List of accession numbers for given links.
        """
        return [link.split('/')[-1].replace('-index.htm', '') for link in links]

    # TODO: break this method down further
    def save(self, directory):
        """Save files in specified directory.
        Each txt url looks something like:
        https://www.sec.gov/Archives/edgar/data/1018724/000101872419000043/0001018724-19-000043.txt

        Args:
            directory (str): Path to directory where files should be saved.

        Returns:
            None

        Raises:
            ValueError: If no text urls are available for given filing object.
        """
        urls = self.get_urls()
        if len(urls) == 0:
            raise ValueError("No filings available.")
        doc_names = [url.split("/")[-1] for url in urls]
        for (url, doc_name) in list(zip(urls, doc_names)):
            cik = doc_name.split('-')[0]
            data = requests.get(url).text
            path = os.path.join(directory, cik, self.filing_type.value)
            make_path(path)
            path = os.path.join(path, doc_name)
            with open(path, "w") as f:
                f.write(data)

예제 #2

파일 보기

파일: cik_validator.py 프로젝트: tomfunk/sec-edgar

class CIKValidator(object):
    """Validates company tickers and/or company names based on CIK availability.

    Used internally by the CIK class. Not intended for outside use.

    Args:
        lookups (Union[str, list, tuple]): List of tickers and/or company names for
            which to find CIKs.
        **kwargs: Any keyword arguments needed to be passed to
            _EDGARBase (see class for more details).

    .. versionadded:: 0.1.5
    """
    def __init__(self, lookups, client=None, **kwargs):
        if isinstance(lookups, str):
            self._lookups = [lookups]  # make single string into list
        else:
            try:
                # Check that iterable only contains strings and is not empty
                if not lookups or not all(type(o) is str for o in lookups):
                    raise TypeError
                self._lookups = lookups
            except TypeError:
                raise TypeError("CIKs must be given as string or iterable.")
        self._params = {'action': 'getcompany'}
        if client is None:
            self._client = NetworkClient(**kwargs)

    @property
    def path(self):
        """str: Path to add to client base."""
        return "cgi-bin/browse-edgar"

    @property
    def client(self):
        """``secedgar.client.base``: Client to use to fetch requests."""
        return self._client

    @property
    def params(self):
        """:obj:`dict` Search parameters to add to client."""
        return self._params

    def get_ciks(self):
        """
        Validate lookup values and return corresponding CIKs.

        Returns:
            ciks (dict): Dictionary with lookup terms as keys and CIKs as values.

        """
        ciks = dict()
        for lookup in self._lookups:
            try:
                result = self._get_cik(lookup)
                self._validate_cik(result)  # raises error if not valid CIK
                ciks[lookup] = result
            except CIKError:
                pass  # If multiple companies, found, just print out warnings
        return ciks

    def _get_cik(self, lookup):
        """
        Get cik for lookup value.
        """
        self._validate_lookup(lookup)
        try:  # try to lookup by CIK
            self._params['CIK'] = lookup
            soup = self._client.get_soup(self.path, self.params)
        except EDGARQueryError:  # fallback to lookup by company name
            del self._params[
                'CIK']  # delete this parameter so no conflicts arise
            self._params['company'] = lookup
            soup = self._client.get_soup(self.path, self.params)
        try:  # try to get single CIK for lookup
            span = soup.find('span', {'class': 'companyName'})
            return span.find('a').getText().split()[0]  # returns single CIK
        except AttributeError:  # warn and skip if multiple possibilities for CIK found
            warnings.warn(
                "Lookup '{0}' will be skipped. "
                "Found multiple companies matching '{0}':".format(lookup))
            warnings.warn('\n'.join(self._get_cik_possibilities(soup)))
        finally:
            # Delete parameters after lookup
            if self._params.get('company') is not None:
                del self._params['company']
            if self._params.get('CIK') is not None:
                del self._params['CIK']

    @staticmethod
    def _get_cik_possibilities(soup):
        """Get all CIK possibilities if multiple are listed.

        Args:
            soup (BeautifulSoup): BeautifulSoup object to search through.

        Returns:
            All possible companies that match lookup.
        """
        try:
            # Exclude table header
            table_rows = soup.find('table', {
                'summary': 'Results'
            }).find_all('tr')[1:]
            # Company names are in second column of table
            return [
                ''.join(row.find_all('td')[1].find_all(text=True))
                for row in table_rows
            ]
        except AttributeError:
            # If there are no CIK possibilities, then no results were returned
            raise EDGARQueryError

    @staticmethod
    def _validate_cik(cik):
        """Check if CIK is 10 digit string."""
        if not (isinstance(cik, str) and len(cik) == 10 and cik.isdigit()):
            raise CIKError(cik)

    @staticmethod
    def _validate_lookup(lookup):
        """Ensure that lookup is string.

        Args:
            lookup: Value to lookup.

        Raises:
            TypeError: If lookup is not string.
        """
        if not isinstance(lookup, str):
            raise TypeError(
                "Lookup value must be string. Given type {0}.".format(
                    type(lookup)))