Beispiel #1
0
def nonthreaded_get_filings():
    # create object
    print 'getting filings using a single lonely execution thread :('
    seccrawler = SecCrawler()
    date = '20170313'  # date from which filings should be downloaded
    # date = '20160922'

    count = '100'  # no of filings

    # sp_500 = open('missed_companies2.txt')

    sp_500 = open('sp_500.txt')
    lines = sp_500.readlines()
    sp_500.close()
    companies = [line.split('\t')[1:3] for line in lines[100:]]

    # OVERRIDES FOR TESTING
    # companies = [line.split('\t')[1:3] for line in lines[2:4]]
    # companies = [['AMZN', 'AMZN']]

    start = time.time()
    for companyCode, cik in companies:
        t1 = time.time()
        seccrawler.getFiling(str(companyCode), str(date), str(count), "10-K")
        # seccrawler.getFiling(str(companyCode), str(cik), str(date), str(count), "10-Q")
        # seccrawler.getFiling(str(companyCode), str(cik), str(date), str(count), "8-K")
        t2 = time.time()
        print "Total Time taken for ", companyCode, ": ", str(t2 - t1)
    end = time.time()
    print '\n\n\n FINAL TIME:'
    print end - start
Beispiel #2
0
def test():
    t1 = time.time()
    # file containig company name and corresponding cik codes
    seccrawler = SecCrawler()

    company_code_list = list()  # company code list
    cik_list = list()  # cik code list
    date_list = list()  # pror date list
    count_list = list()

    try:
        with open("data.txt", "r") as f:
            # get the comapny  quotes and cik number from the file.
            for columns in (raw.strip().split() for raw in f):
                company_code_list.append(columns[0])
                cik_list.append(columns[1])
                date_list.append(columns[2])
                count_list.append(columns[3])
    except:
        print("No input file Found")

    # call different  API from the crawler
    for i in range(1, len(cik_list)):
        seccrawler.filing_SD(company_code_list[i], cik_list[i], date_list[i],
                             count_list[i])
        seccrawler.filing_10K(company_code_list[i], cik_list[i], date_list[i],
                              count_list[i])
        seccrawler.filing_8K(company_code_list[i], cik_list[i], date_list[i],
                             count_list[i])
        seccrawler.filing_10Q(company_code_list[i], cik_list[i], date_list[i],
                              count_list[i])

    t2 = time.time()
    print("Total Time taken: {0}".format(t2 - t1))
Beispiel #3
0
def find_and_save_10K_to_folder(ticker,
                                from_date=None,
                                number_of_documents=40,
                                doc_format='xbrl'):
    if from_date is None:
        from_date = datetime.today().strftime('%Y%m%d')
    crawler = SecCrawler()
    cik, company_name = get_cik_and_name_from_ticker(ticker)
    crawler.filing_10K(ticker, cik, company_name, from_date,
                       number_of_documents, doc_format)
Beispiel #4
0
def test():
    start_time = time.time()
    # file containig company name and corresponding CIK codes
    seccrawler = SecCrawler()

    company_code_list = list()  # company code list
    cik_list = list()  # CIK code list
    date_list = list()  # pror date list
    count_list = list()

    try:
        crs = open("data.txt", "r")
    except:
        print("No input file Found")

    # get the company quotes and CIK number from the file.
    for columns in (raw.strip().split() for raw in crs):
        company_code_list.append(columns[0])
        cik_list.append(columns[1])
        date_list.append(columns[2])
        count_list.append(columns[3])

    # call different  API from the crawler
    for i in range(1, len(cik_list)):
        seccrawler.filing_10Q(str(company_code_list[i]), str(cik_list[i]),
                              str(date_list[i]), str(count_list[i]))
        seccrawler.filing_10K(str(company_code_list[i]), str(cik_list[i]),
                              str(date_list[i]), str(count_list[i]))
        seccrawler.filing_8K(str(company_code_list[i]), str(cik_list[i]),
                             str(date_list[i]), str(count_list[i]))

    end_time = time.time()
    print("Total Time taken: "),
    print(end_time - start_time)
    crs.close()
Beispiel #5
0
def test():
	t1 = time.time()
	# file containig company name and corresponding cik codes
	seccrawler = SecCrawler()

	companyCodeList = list()    # company code list 
	cikList = list()	        # cik code list
	dateList = list()           # pror date list
	countList = list()
	
	try:
		crs = open("file.txt", "r")
	except:
		print "No input file Found"
	
	# get the comapny  quotes and cik number from the file.
	for columns in ( raw.strip().split() for raw in crs ):  
	     	companyCodeList.append(columns[0])
		cikList.append(columns[1])
		dateList.append(columns[2])
		countList.append(columns[3])

	del cikList[0]; del companyCodeList[0]; del dateList[0]
	for i in range(len(cikList)):
		seccrawler.filing_10Q(str(companyCodeList[i]), str(cikList[i]), str(dateList[i]), str(countList[i]))
		seccrawler.filing_10K(str(companyCodeList[i]), str(cikList[i]), str(dateList[i]), str(countList[i]))
		seccrawler.filing_8K(str(companyCodeList[i]), str(cikList[i]), str(dateList[i]), str(countList[i]))
	
	t2 = time.time()
	print "Total Time taken: ",
	print (t2-t1)
	crs.close()
Beispiel #6
0
def test():
	t1 = time.time()
	# file containig company name and corresponding cik codes
	seccrawler = SecCrawler()

	companyCodeList = list()    # company code list
	cikList = list()	        # cik code list
	dateList = list()           # pror date list
	countList = list()

	try:
		crs = open("data.txt", "r")
	except:
		print "No input file Found"

	# get the comapny  quotes and cik number from the file.
	for columns in ( raw.strip().split() for raw in crs ):
	    companyCodeList.append(columns[0])
	    cikList.append(columns[1])
	    dateList.append(columns[2])
	    countList.append(columns[3])

	del cikList[0]; del companyCodeList[0]; del dateList[0]

	for i in range(len(cikList)):
		seccrawler.filing_10Q(str(companyCodeList[i]), str(cikList[i]), str(dateList[i]), str(countList[i]))
		seccrawler.filing_10K(str(companyCodeList[i]), str(cikList[i]), str(dateList[i]), str(countList[i]))
		seccrawler.filing_8K(str(companyCodeList[i]), str(cikList[i]), str(dateList[i]), str(countList[i]))

	t2 = time.time()
	print "Total Time taken: ",
	print (t2-t1)
	crs.close()
Beispiel #7
0
def getSingleCompanyFiling(companyCode, logPath='downloaded_companies.txt'):
    date = '20170315'  # date from which filings should be downloaded
    count = '100'  # no of filings

    seccrawler = SecCrawler()
    t1 = time.time()
    try:
        seccrawler.getFiling(str(companyCode), str(date), str(count), "10-K")
    except:
        raise Exception("".join(traceback.format_exception(*sys.exc_info())))
    # seccrawler.getFiling(str(companyCode), str(cik), str(date), str(count), "10-Q")
    # seccrawler.getFiling(str(companyCode), str(cik), str(date), str(count), "8-K")
    t2 = time.time()
    logString = "Total Time taken for " + companyCode + ": " + str(t2 - t1)
    f = open(logPath, 'a+')
    f.write(companyCode + '\n')
    f.close()
    return logString
Beispiel #8
0
def test():
    t1 = time.time()
    # file containig company name and corresponding cik codes
    seccrawler = SecCrawler()

    company_code_list = list()  # company code list
    cik_list = list()  # cik code list
    date_list = list()  # pror date list
    count_list = list()

    print("Enter the company name?")
    comp11 = str(input())
    cik11 = get_cik(comp11)
    print("Enter the date in format YYYYMMDD?")
    date11 = str(input())
    company_code_list.append(comp11)
    cik_list.append(cik11)
    date_list.append(date11)
    count_list.append("2")

    # call different  API from the crawler
    for i in range(len(cik_list)):
        seccrawler.filing_10Q(str(company_code_list[i]), str(cik_list[i]),
                              str(date_list[i]), str(count_list[i]))
        seccrawler.filing_10K(str(company_code_list[i]), str(cik_list[i]),
                              str(date_list[i]), str(count_list[i]))
        # seccrawler.filing_8K(str(company_code_list[i]), str(cik_list[i]),
        #     str(date_list[i]), str(count_list[i]))

    t2 = time.time()
    print("Total Time taken: "),
    print(t2 - t1)
Beispiel #9
0
def extractSectionFromExistingFilings(numThreads):
    filename = '../check.txt'
    completedFilename = 'extraction_log.txt'
    f = open(completedFilename, 'r')
    completedLines = f.readlines()
    f.close()
    completed = [line.strip() for line in completedLines]

    with open(filename) as check:
        lines = check.readlines()
        readLines = [[line.split('\t')[0].strip(), [line.split('\t')[6].strip()], \
            [line.split('\t')[5].strip()], [line.split('\t')[6].strip()], '10-K'] for line in lines]

    sectionsToGet = []
    for line in reversed(readLines):
        companyCode, filingURLList, docNameList, indexURLList, filingType = line
        if docNameList[0] not in completed:
            sectionsToGet.append(line)
        else:
            print 'Already downloaded ', docNameList[0]

    if numThreads > 1:
        print 'THREADED'
        results = calculateParallel(sectionsToGet, extractSingleSection,
                                    numThreads)
    else:
        print 'NONTHREADED'
        seccrawler = SecCrawler()

        for lineElem in sectionsToGet:
            companyCode, filingURLList, docNameList, indexURLList, filingType = lineElem
            if '.txt' not in filingURLList[0]:
                seccrawler.save_in_directory(companyCode, filingURLList,
                                             docNameList, indexURLList,
                                             filingType)
                f = open(completedFilename, 'a+')
                f.write(docNameList[0] + '\n')
                f.close()
Beispiel #10
0
def extractSingleSection(inputs):
    companyCode, filingURLList, docNameList, indexURLList, filingType = inputs
    logString = 'Skipping text file.'
    if '.txt' not in filingURLList[0]:
        t1 = time.time()
        seccrawler = SecCrawler()
        try:
            seccrawler.save_in_directory(companyCode, filingURLList,
                                         docNameList, indexURLList, filingType)
            # Put all exception text into an exception and raise that

            t2 = time.time()

            completedFilename = 'extraction_log.txt'
            f = open(completedFilename, 'a+')
            f.write(docNameList[0] + '\n')
            f.close()
            logString = "Total Time taken for " + companyCode + "sections: " + str(
                t2 - t1)

        except:
            raise Exception("".join(
                traceback.format_exception(*sys.exc_info())))
    return logString
Beispiel #11
0
def main():
    """Pull info for apple and google."""
    sec_crawler = SecCrawler()

    apple = COMPANIES["Apple"]
    google = COMPANIES["Google"]

    # Pull 10Q's for apple
    sec_crawler.filing_10q(apple.code, apple.cik, '20170101', 2)

    # Pull 10K's for apple
    sec_crawler.filing_10k(google.code, google.cik, '20170101', 2)
def test():
    t1 = time.time()
    # file containig company name and corresponding cik codes
    seccrawler = SecCrawler()

    company_code_list = list()  # company code list
    cik_list = list()  # cik code list
    date_list = list()  # pror date list
    count_list = list()

    try:
        crs = open(sys.argv[1], "r")
    except:
        print("No input file Found")

    # get the comapny  quotes and cik number from the file.
    for columns in (raw.strip().split() for raw in crs):
        company_code_list.append(columns[0])
        cik_list.append(columns[1])
        date_list.append(columns[2])
        count_list.append(columns[3])

    # call different  API from the crawler
    for i in range(0, len(cik_list)):

        seccrawler.filing_NCSR(str(company_code_list[i]), str(cik_list[i]),
                               str(date_list[i]), str(count_list[i]))

        seccrawler.filing_NCSRS(str(company_code_list[i]), str(cik_list[i]),
                                str(date_list[i]), str(count_list[i]))

        seccrawler.filing_NQ(str(company_code_list[i]), str(cik_list[i]),
                             str(date_list[i]), str(count_list[i]))

    t2 = time.time()
    print("Total Time taken: "),
    print(t2 - t1)
    crs.close()
Beispiel #13
0
def test():
    t1 = time.time()
    # file containig company name and corresponding cik codes
    seccrawler = SecCrawler()

    company_code_list = list()  # company code list
    cik_list = list()  # cik code list
    date_list = list()  # pror date list
    count_list = list()

    try:
        f = open("data.txt", "w")
    except:
        print("No input file found")

    try:
        workbook = xlrd.open_workbook('listofITfirms.xls')
    except:
        print("No input file found")

    sheet = workbook.sheet_by_index(0)
    sheet.cell_value(0, 0)

    # Extracting number of rows
    print(sheet.nrows)
    f.write("Ticker   CIK        priorto(YYYYMMDD) Count\n")
    sheet.cell_value(0, 4)

    # need to just keep incrementing... How many?
    # range from 2 to 1514 # has headers
    for i in range(1, 3):
        # f.write("%s,%s\n" % sheet.cell_value(i, 1))
        #print("%d\n" % i)
        if sheet.cell_value(i, 1) and sheet.cell_value(i, 4):
            f.write("%s   %d   20190101   100\n" %
                    (sheet.cell_value(i, 1), sheet.cell_value(i, 4)))
    f.close()

    try:
        crs = open("data.txt", "r")
    except:
        print("No input file Found")

    # get the comapny  tickers and cik number from the file.
    for columns in (raw.strip().split() for raw in crs):
        company_code_list.append(columns[0])
        cik_list.append(columns[1])
        date_list.append(columns[2])
        count_list.append(columns[3])

    # call different  API from the crawler
    row = 1  # needs to match the range end
    col = 1

    for i in range(1, len(cik_list)):
        seccrawler.filing_10K(str(company_code_list[i]), str(cik_list[i]),
                              str(date_list[i]), str(count_list[i]))

    t2 = time.time()
    print("Total Time taken: "),
    print(t2 - t1)
    crs.close()