def get_filings(cik, ticker): t1 = time.time() # create object seccrawler = SecCrawler() companyCode = ticker # company code for apple cik = cik # cik code for apple date = '20170101' # date from which filings should be downloaded count = '10' # no of filings seccrawler.filing_10K(str(companyCode), str(cik), str(date), str(count)) t2 = time.time() print("Total Time taken: "), print(t2 - t1)
def get_filings(cik, ticker): t1 = time.time() # create object seccrawler = SecCrawler() companyCode = ticker cik = cik date = '20170101' count = '10' seccrawler.filing_10K(str(companyCode), str(cik), str(date), str(count)) t2 = time.time() print("Total Time taken: "), print(t2 - t1)
def get_filings(companyCode, date='20200101', cik=None, count=100): if cik is None: with open('company_list.txt', 'r') as f: for line in f: if companyCode in line: line_arr = line.rstrip().split(' ') cik = line_arr[-1] if cik is None: print("cik not provided and not found in list. please try again.") return # create object seccrawler = SecCrawler() seccrawler.filing_10K(str(companyCode), str(cik), str(date), str(count)) dest_dir = companyCode + "/" src_dir = dest_dir + cik + "/10-K/" years_downloaded = [] for old_filename in os.listdir(src_dir): parts = old_filename.split('-') old_year = parts[1] if int(old_year) > 50: new_year = '19' + old_year else: new_year = '20' + old_year years_downloaded.append(new_year) os.rename(src_dir + old_filename, dest_dir + companyCode + '_10K_' + new_year + '.txt') shutil.rmtree(dest_dir + cik + '/') # create object seccrawler = SecCrawler() seccrawler.filing_10Q(str(companyCode), str(cik), str(date), str(count)) dest_dir = companyCode + "/" src_dir = dest_dir + cik + "/10-Q/" years_downloaded = [] for old_filename in os.listdir(src_dir): parts = old_filename.split('-') old_year = parts[1] if int(old_year) > 20: new_year = '19' + old_year else: new_year = '20' + old_year years_downloaded.append(new_year) os.rename(src_dir + old_filename, dest_dir + companyCode + '_10Q_' + new_year + '.txt') shutil.rmtree(dest_dir + cik + '/') return years_downloaded
def getfilings(): t1 = time.time() # create object seccrawler = SecCrawler() companyCode = 'AAPL' # compnay ticker symbol for Apple cik = '0000320193' # cik code for Apple date = '20010101' # date from which filings should be downloaded (01/01/2001 in this case) count = '10' # the number of filings seccrawler.filing_10Q(str(companyCode), str(cik), str(date), str(count)) seccrawler.filing_10K(str(companyCode), str(cik), str(date), str(count)) seccrawler.filing_8K(str(companyCode), str(cik), str(date), str(count)) seccrawler.filing_13F(str(companyCode), str(cik), str(date), str(count)) t2 = time.time() print("Total time taken: ") print(t2 - t1)
def get_filings(): t1 = time.time() DEFAULT_DATA_PATH = os.path.join(os.path.abspath(os.getcwd()), 'output') # create object seccrawler = SecCrawler() companyCode = 'AAPL' # company code for apple cik = '0000320193' # cik code for apple date = '20010101' # date from which filings should be downloaded count = '10' # no of filings seccrawler.filing_10Q(str(companyCode), str(cik), str(date), str(count)) seccrawler.filing_10K(str(companyCode), str(cik), str(date), str(count)) seccrawler.filing_8K(str(companyCode), str(cik), str(date), str(count)) seccrawler.filing_13F(str(companyCode), str(cik), str(date), str(count)) t2 = time.time() print("Total Time taken: "), print(t2 - t1)
def get_all_filings(self, deadline): ''' get the SEC filings for all companies in US stock universe''' t1 = time.time() # create object seccrawler = SecCrawler() companyCode = 'AAPL' # company code for apple cik = '0000320193' # cik code for apple date = '20160101' # date from which filings should be downloaded count = '100' # no of filings seccrawler.filing_10Q(str(companyCode), str(cik), str(date), str(count)) seccrawler.filing_10K(str(companyCode), str(cik), str(date), str(count)) seccrawler.filing_8K(str(companyCode), str(cik), str(date), str(count)) seccrawler.filing_13F(str(companyCode), str(cik), str(date), str(count)) t2 = time.time() print "Total Time taken: " + str(t2-t1),
def get_filings(): t1 = time.time() # create object seccrawler = SecCrawler() companyCode = "AAPL" # company code for apple cik = "0000320193" # cik code for apple date = "20010101" # date from which filings should be downloaded count = "10" # no of filings seccrawler.filing_10Q(str(companyCode), str(cik), str(date), str(count)) seccrawler.filing_10K(str(companyCode), str(cik), str(date), str(count)) seccrawler.filing_8K(str(companyCode), str(cik), str(date), str(count)) seccrawler.filing_13F(str(companyCode), str(cik), str(date), str(count)) t2 = time.time() print "Total Time taken: ", print(t2 - t1)
def test(): t1 = time.time() # file containing company name and corresponding cik codes seccrawler = SecCrawler() company_code_list = list() # company code list cik_list = list() # cik code list date_list = list() # pror date list count_list = list() try: crs = open("data.txt", "r") except: print("No input file Found") # get the company quotes and cik number from the file. for columns in (raw.strip().split() for raw in crs): company_code_list.append(columns[0]) cik_list.append(columns[1]) date_list.append(columns[2]) count_list.append(columns[3]) # call different API from the crawler #for i in range(1, len(cik_list)): # seccrawler.filing_SD(str(company_code_list[i]), str(cik_list[i]), # str(date_list[i]), str(count_list[i])) # seccrawler.filing_10K(str(company_code_list[i]), str(cik_list[i]), # str(date_list[i]), str(count_list[i])) # seccrawler.filing_8K(str(company_code_list[i]), str(cik_list[i]), # str(date_list[i]), str(count_list[i])) seccrawler.filing_10Q('AAPL', '0000320193', '20170101', '1') t2 = time.time() print("Total Time taken: "), print(t2 - t1) crs.close()
def get_filings(): t1 = time.time() # create object seccrawler = SecCrawler() companyCode = 'AAPL' # company code for apple cik = '0000320193' # cik code for apple date = '20010101' # date from which filings should be downloaded count = '10' # no of filings seccrawler.filing_10Q(str(companyCode), str(cik), str(date), str(count)) seccrawler.filing_10K(str(companyCode), str(cik), str(date), str(count)) seccrawler.filing_8K(str(companyCode), str(cik), str(date), str(count)) seccrawler.filing_13F(str(companyCode), str(cik), str(date), str(count)) t2 = time.time() print "Total Time taken: ", print (t2-t1) if __name__ == '__main__': get_filings()
def scrape_edgar(self): """This is the function that will scrape the SEC Edgar website and download all 10-Ks. We are using a third-party package called SECEdgar which does the job, but this would be improved if we wrote our own web crawler that could pre-filter any amended 10-Ks and 10-Ks that fall outside of our date range before downloading them.""" # Remove any duplicates where CUSIP, PERMNO, and CIK match ciks = self.data.drop_duplicates(subset=['CUSIP', 'PERMNO', 'cik']) # Only keep the cik and ticker column ciks = ciks[['cik', 'tic']] # Iterate over each CIK and pull the relevant 10k filings crawler = SecCrawler() end_date = str(self.end) + '1231' count = str(math.ceil((self.end - self.start) / 10) * 10) p = Pool() rows = ciks.to_dict(orient='records') results = p.starmap(crawl, zip(rows, repeat(end_date), repeat(count), repeat(crawler)))
def crawler(): obj = SecCrawler() return obj
""" #%% import re from SECEdgar.crawler import SecCrawler #%% GEt names of companies sector_code = '10' list_companies = [] with open('tic_company_gics.txt') as file: for line in file: sector = line[-3:-1] if sector == sector_code: name_company = re.search('[\w\.]+', line).group()#[:-1] list_companies.append(name_company) list_companies = list(set(list(list_companies))) #%% Download the 10K number_of_companies = 50 date_retrieve = '20150101' number_10K = 5 secCrawler = SecCrawler() for company in list_companies[0:number_of_companies]: secCrawler.filing_10K(company, date_retrieve, number_10K)
def crawler(): _crawler = SecCrawler() yield _crawler shutil.rmtree(_crawler.data_path)
CIK_RE = re.compile(r'.*CIK=(\d{10}).*') cik = '' results = CIK_RE.findall(get(URL.format(name)).content) if len(results): cik = str(results[0]) return cik list_companies = [] with open('tic_company_gics.txt') as file: for line in file: sector = line[-3:-1] if sector == sector_code: name_company = re.search('[\w\.]+', line).group()#[:-1] list_companies.append(name_company) list_companies = list(set(list(list_companies))) #%% GEt SIC code for each company list_companies_cik = [] for company in list_companies: cik = get_cik(company) if len(cik) > 0: list_companies_cik.append((company, cik)) #%% Get 10K secCrawler = SecCrawler() for name, cik in list_companies_cik: secCrawler.filing_10K(name, cik, '20150101', '1')
from SECEdgar.crawler import SecCrawler crawler = SecCrawler() def get(symbol): crawler.filing_10Q(symbol, '0000320193', '20010101', '10') # http://rankandfiled.com/#/data/tickers
from SECEdgar.crawler import SecCrawler filings = SecCrawler() filings.filing_10Q("MSFT", "0000789019", "20190101", 100)
from SECEdgar.crawler import SecCrawler secCrawler = SecCrawler() secCrawler.filing_8K("", "0001527166", "", "2000") secCrawler.filing_8K("AAPL", "0000320193", "", "2000") secCrawler.filing_10Q("AAPL", "0000320193", "20010101", "10") secCrawler.filing_DEF14A("AAPL", "0000320193", "20010101", "10") secCrawler.filing_DEF14A("", "0000320193", "", "") secCrawler.filing_DEF14A("", "0001629210", "", "") # http://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&CIK=0000320193&type=8-K&dateb=20010101&owner=exclude&output=xml&count=10 # https://www.sec.gov/Archives/edgar/data/320193/000091205700010000/0000912057-00-010000.txt # https://www.sec.gov/Archives/edgar/data/320193/000091205700010000/0000912057-00-010000.txt # https://www.sec.gov/Archives/edgar/data/320193/0000912057-00-010000-.txt # www.sec.gov/Archives/edgar/data/320193/000104746999003858/0001047469-99-003858-.txt # https://www.sec.gov/Archives/edgar/data/320193/0001047469-99-003858.txt # http://www.sec.gov/Archives/edgar/data/320193/0001047469-99-003858-.txt from tqdm import tqdm import requests url = "http://www.sec.gov/Archives/edgar/data/320193/000091205700053623/0000912057-00-053623.txt" response = requests.get(url, stream=True) with open("d:/SEC-Edgar-data/AAPL/0000320193/10-K/0000912057-00-053623.txt", "w") as handle: for data in tqdm(response.iter_content()):
def crawler(): _crawler = SecCrawler() yield _crawler