def get_comp_tuples(companies,data_folder): ''' ''' with open('sp500tickcik', 'rb') as file: data = pickle.load(file) names = [] for idx in range(len(data)): names = np.append(names,data[idx][3]) tick_cik = [] edgar_t = edgar.Edgar() for name in companies: score=[] for textTemp in names: score = np.append(score, fuzz.ratio(textTemp, name)) maxInd = np.where(score == np.amax(score))[0][0] bmc = str(data[maxInd][3]) bmc = bmc.replace("'" , "") bmc = bmc.replace("." , "") comp = edgar_t.findCompanyName(bmc) tick_cik = np.append(tick_cik,(comp[0],data[maxInd][1]))
import os import requests import xlrd import edgar from company import Company import xlsxwriter from bs4 import BeautifulSoup edg = edgar.Edgar() oo = 1e9 def print_log(mess, log_file, verbose=True): if verbose: print(mess) if log_file is not None: log_file.write(mess + "\n") def format_cik_file(input_file="cik.xlsx", output_file="cik.xlsx", verbose=True, log_file=None): cik_sheet = xlrd.open_workbook(input_file).sheet_by_index(0) if os.path.exists(output_file): os.remove(output_file) output_wb = xlsxwriter.Workbook(output_file) output_sheet = output_wb.add_worksheet()
import edgar edgar = edgar.Edgar() possible_companies = edgar.findCompanyName("Cisco System") print(possible_companies) #get Oracle Corp's last 5 form 10-K's company = edgar.Company("Oracle Corp", "0001341439") tree = company.getAllFilings(filingType="10-K") docs = edgar.getDocuments(tree, noOfDocuments=5) #docs is an array of strings, each one is the full text doc #SIC CODES url = "https://www.sec.gov/info/edgar/siccodes.htm" #Developer page #https://www.sec.gov/developer
# Get the page number of the "financial report" section y = re.sub("[^0-9]", "", line) # Remove non-numeric characters fyear = int(y[-4:]) return fyear return fyear def countStatesApperance(doc): table = {} for word in doc.split(): if word in states: table[word] = 1 return len(table) ed = edgar.Edgar() c = "785814" c = c.zfill(10) #n = ed.getCompanyNameByCik(c) company = edgar.Company("INTEGRATED HEALTH SVCS INC", c) tree = company.getAllFilings(filingType="10-K") docs = edgar.getDocuments(tree, noOfDocuments=30) with io.open("C:/Users/William/Desktop/Output.txt", "w", encoding="utf-8") as f: f.write(docs[0]) if len(docs) > 0: print(countStatesApperance(docs[0])) print(extractYear(docs[0]))
URL = 'http://www.sec.gov/cgi-bin/browse-edgar?CIK={}&Find=Search&owner=exclude&action=getcompany' CIK_RE = re.compile(r'.*CIK=(\d{10}).*') # change Ticker into CIK cik_dict = {} for ticker in DEFAULT_TICKERS: f = requests.get(URL.format(ticker), stream=True) results = CIK_RE.findall(f.text) if len(results): cik = str(results[0]) cik_dict[str(ticker).upper()] = str(results[0]) print(cik_dict) # Use edgar to get text compilation of the lxml # Get Company name from CIK edgar1 = edgar.Edgar() cmp_name = edgar1.getCompanyNameByCik(results[0]) print(cmp_name) company = edgar.Company(cmp_name, cik) # Creating filename and url structure file_name = [ f for f in os.listdir(out_path) if os.path.isfile(os.path.join(out_path, f)) ] switched_filename = file_name[0] switched_filename = switched_filename.replace('-', '').replace( '.txt', '/index.json') print(switched_filename) print(file_name)