def findWord(comp,cik): try: company = Company(comp,cik) doc = company.get_10K() text = TXTML.parse_full_10K(doc) #print(text) if (re.search('blockchain', text , re.IGNORECASE)): return("exists") else : return("dosenot") except: return("No 10-k")
from sumy.utils import get_stop_words from sumy.parsers.plaintext import PlaintextParser from sumy.nlp.tokenizers import Tokenizer import sys # company = Company("INTERNATIONAL BUSINESS MACHINES CORP", "0000051143") # company2 = Company("twitter", "0001418091") # company3 = Company("Oracle Corp", "0001341439") company4 = Company("GOOGLE INC", "0001288776") # edgar = Edgar() # possible_companies = edgar.find_company_name("Cisco System") # # print(possible_companies) doc = company4.get_10K() text = TXTML.parse_full_10K(doc) print('1') f = open("text2.txt", "w+") f.write(text) f.close() # f = open('text.txt', 'r') # for line in f: # print(line) # print() # print() # print()
from edgar import Company, TXTML import re import pandas as pd df = pd.read_excel(r'companylist.xls') expense_estimates = [] for i in df.index: print(expense_estimates) CIK_string = df['CIK'][i].split("; ") print(df['Company Name'][i]) company = Company("df['Company Name'][i]", CIK_string[0]) try: doc = company.get_10K() text = TXTML.parse_full_10K(doc) except IndexError: expense_estimates.append(float("NaN")) continue if not ('hipping' in text): expense_estimates.append(float("NaN")) continue matches = [m.start() for m in re.finditer('hipping', text)] #print(matches) string = "" est_available = False for i in matches: if '$' in text[i:i + 50]: string = text[i:i + 200] est_available = True break if not est_available: