コード例 #1
def get_filings_by_company(company, type, n):
    tree = company.get_all_filings(filing_type=type)
    docs_lxml = Company.get_documents(tree, no_of_documents=n)
    docs_data = Company.get_documents(tree, no_of_documents=n, as_documents=True)
    if not isinstance(docs_lxml, list):
        docs_lxml = [docs_lxml]
        docs_data = [docs_data]

    return (docs_lxml, docs_data)
コード例 #2
def get_filing_metadata(context, name: str, cik: str, filing: str,
                        no_filings: int):
    comp = Company(name, cik)
    tree = comp.get_all_filings(filing)
    docs = comp.get_documents(tree, no_filings, True)

    filings = []

    #TODO #38 change return method to yield AssetMaterialization()
    for document in docs:
        filings.append[clean_filings(document, cik, filing)]

    context.log.info(log_assert_type(filings, dict))
    return filings
コード例 #3
def pull_10K(company_name, company_id):
    company = Company(company_name, company_id)
    tree = company.get_all_filings(filing_type="10-K")
    pre_time = time.time()
    offset = random.randint(1, 25)  #seconds
    if pre_time + offset > time.time():
        docs = Company.get_documents(tree, no_of_documents=3)
        pre_time = time.time()
    text_l = []
    for i in range(len(docs)):
            text = TXTML.parse_full_10K(docs[i])
        except IndexError:
    return text_l
コード例 #4
def file_date(com, cik, no_docs):
    This function is to pull only the filing date
    Serves as the date of measurement for analyzing returns.
    company = Company(com, cik, no_docs)
    tree = company.get_all_filings(filing_type="10-K")
    docs = Company.get_documents(tree,
    dates = []
    for x in range(no_docs):
        doc = docs[x]
        dates.append(doc.content['Filing Date'])

    return dates
コード例 #5
def pull_10K(name, company_id):
    we use this function to perform the get filings.
    we need to run this function and iterarte over our
    list of tickers. Each ticker will get parsed and
    collected into a dataframe.
    company = Company(name, company_id)
    tree = company.get_all_filings(filing_type="10-K")

    docs = Company.get_documents(tree, no_of_documents=6)
    # print("checkpoint: retrieving documents...")
    text_l = []
    for i in range(len(docs)):
            text = TXTML.parse_full_10K(docs[i])
        except IndexError:
    return text_l
コード例 #6
def get_edgar_filing_text(comp_tuples, f_type, n_docs, file_dir, dates_dir):
    Scraping function to get the text from company filings from EDGAR. 
    - comp_tuples : A list with pairwise company tuples. The first element must 
                    be a string with the company name as listed on the EDGAR
                    database. The second element must be a string with the CIK
                    identifier. See get_sp500_tickers_cik_industry(argin) to 
                    easily get the tuples from the S&P500 listed firms.
    - f_type      : A string with the filing type.
    - n_docs      : Number of filings to be fetched, in descending order, i.e.
                    n_docs = 3 will fetch the three newest filings of type 
                    f_type. As a double integer.
    - file_dir    : The master directory where all filings are to be saved. As
                    a string.
    - dates_dir   : The master directory where all filing dates are saved. If 
                    a directory is missing, the function will instead scrape 
                    the dates using get_edgar_filing_date(argin), and create a 
                    new folder with the dates.
    comp_tuples = [['APPLE INC'     , '0000320193'],
                   ['MCDONALDS CORP', '0000063908'],
                   ['MICROSOFT CORP', '0000789019']]

    f_type      = '10-K'     [Or '10-Q']
    n_docs      = 3
    file_dir    = 'Users/Tobias/Dropbox/textfolder/Text Reports U.S.'
    dates_dir   = 'Users/Tobias/Dropbox/textfolder/Dates Reports U.S' 

    print('Fetching data...')
    print('-' * 80 + '\n')
    for idx, comp_tuple in enumerate(comp_tuples):
        comp = edgar.Company(comp_tuple[0], comp_tuple[1])
        tree = comp.get_all_filings(filing_type=f_type)
        docs = Company.get_documents(tree, no_of_documents=n_docs)

        # Now that we have the filings, find get the filing dates for each
        # document. If we have them already, then great, let's load them. If
        # not, call get_edgar_filing_date to get them for this company.
        dir = os.path.join(dates_dir, f_type, comp_tuple[0])
        if not os.path.exists(dir):
            print(('\nCannot find the dates for ' + comp_tuple[0] +
                   '. Attempting to download them...'))
            get_edgar_filing_date([comp_tuple], f_type, dates_dir)
            if '.' in comp_tuple[0][-1]:
                comp_tuple[0] = comp_tuple[0][:-1]
            with open(comp_tuple[0] + '.pickle', 'rb') as file:
                dates = pickle.load(file)
                dates = dates[:n_docs]
        dir = os.path.join(file_dir + '\\' + f_type + '\\' + comp_tuple[0])
        if not os.path.exists(file_dir + '\\' + f_type + '\\' + comp_tuple[0]):
            os.makedirs(file_dir + '\\' + f_type + '\\' + comp_tuple[0])
        os.chdir(file_dir + '\\' + f_type + '\\' + comp_tuple[0])
        for date, doc in zip(dates, docs):
            f = open(date.replace('.pickle', '') + '.txt',
        mes = ('Status: ' + str(int(
            (idx + 1) / len(comp_tuples) * 100)) + '% done')
        sys.stdout.write('\r' + mes)