예제 #1
0
def findWord(comp,cik): 
    try: 
        company = Company(comp,cik) 
        doc = company.get_10K() 
        text = TXTML.parse_full_10K(doc) 
        #print(text) 
        if (re.search('blockchain', text , re.IGNORECASE)): 
            return("exists") 
        else : 
            return("dosenot") 
    except: 
        return("No 10-k") 
예제 #2
0
from sumy.utils import get_stop_words
from sumy.parsers.plaintext import PlaintextParser
from sumy.nlp.tokenizers import Tokenizer
import sys

# company = Company("INTERNATIONAL BUSINESS MACHINES CORP", "0000051143")
# company2 = Company("twitter", "0001418091")
# company3 = Company("Oracle Corp", "0001341439")
company4 = Company("GOOGLE INC", "0001288776")

# edgar = Edgar()
# possible_companies = edgar.find_company_name("Cisco System")
#
# print(possible_companies)

doc = company4.get_10K()
text = TXTML.parse_full_10K(doc)

print('1')

f = open("text2.txt", "w+")
f.write(text)
f.close()

# f = open('text.txt', 'r')
# for line in f:
#     print(line)
#     print()
#     print()
#     print()
예제 #3
0
from edgar import Company, TXTML
import re
import pandas as pd

df = pd.read_excel(r'companylist.xls')

expense_estimates = []
for i in df.index:
    print(expense_estimates)
    CIK_string = df['CIK'][i].split("; ")
    print(df['Company Name'][i])
    company = Company("df['Company Name'][i]", CIK_string[0])
    try:
        doc = company.get_10K()
        text = TXTML.parse_full_10K(doc)
    except IndexError:
        expense_estimates.append(float("NaN"))
        continue
    if not ('hipping' in text):
        expense_estimates.append(float("NaN"))
        continue
    matches = [m.start() for m in re.finditer('hipping', text)]
    #print(matches)
    string = ""
    est_available = False
    for i in matches:
        if '$' in text[i:i + 50]:
            string = text[i:i + 200]
            est_available = True
            break
    if not est_available: