def test_edgar_download_html(): ingestor = Ingestor() edgar = Edgar("html", "2013-01-01") ingestor.file_downloader(edgar.ingest_stock("AAPL"), docs_directory) assert os.path.exists(docs_directory + "/a2124888z10-k.htm") is True
def test_edgar_download_xbrl(): ingestor = Ingestor() edgar = Edgar("xbrl", "2014-01-01") ingestor.file_downloader(edgar.ingest_stock("AAPL"), docs_directory) assert os.path.exists(docs_directory + "/aapl-20130928.xml") is True
exit(0) # always declare the signal handler first signal.signal(signal.SIGINT, quit_gracefully) env = lucene.initVM() queryer = Queryer("index", "hits") print('Using Directory: ', queryer.store_dir) # directory for storing downloaded docs directoryToWalk = "docs" # and start the indexer # note the indexer thread is set to daemon causing it to terminate on a SIGINT indexer = Indexer(queryer.store_dir, queryer.writer, directoryToWalk) ingestor = Ingestor() edgar = Edgar() with open('data.txt', 'r') as reader: for line in reader: ingestor.file_downloader(edgar.ingest_stock(line.rstrip()), directoryToWalk) indexer.indexDocs() # start up the terminal query interface queryer.run(queryer.writer, queryer.analyzer) # if return from Querying then call the signal handler # to clean up the writer cleanly quit_gracefully()
#! /usr/bin/env python # encoding: utf-8 import os from ingestor import Ingestor, IngestorException, Sedar ingestor = Ingestor() # xbrl or html? sedar = Sedar("xbrl") docs_directory = "test" # if the directory we will download files does not exist, create it if not os.path.exists(docs_directory): os.mkdir(docs_directory) # for every ticker in our input file, download all the relevant documents with open('sedar_tickers.txt', 'r') as reader: for line in reader: ingestor.file_downloader(sedar.ingest_stock(line.rstrip()), docs_directory)
exit(0) # always declare the signal handler first signal.signal(signal.SIGINT, quit_gracefully) env = lucene.initVM() queryer = Queryer("index", "hits") print 'Using Directory: ', queryer.store_dir # directory for storing downloaded docs directoryToWalk = "docs" # and start the indexer # note the indexer thread is set to daemon causing it to terminate on a SIGINT indexer = Indexer(queryer.store_dir, queryer.writer, directoryToWalk) ingestor = Ingestor() edgar = Edgar() with open('edgar_tickers.txt', 'r') as reader: for line in reader: ingestor.file_downloader(edgar.ingest_stock(line.rstrip()), directoryToWalk) indexer.indexDocs() # start up the terminal query interface queryer.run(queryer.writer, queryer.analyzer) # if return from Querying then call the signal handler # to clean up the writer cleanly quit_gracefully()
#! /usr/bin/env python # encoding: utf-8 import os from ingestor import Ingestor, IngestorException, Edgar ingestor = Ingestor() # xbrl or html? edgar = Edgar("xbrl") docs_directory = "test" # if the directory we will download files does not exist, create it if not os.path.exists(docs_directory): os.mkdir(docs_directory) # for every ticker in our input file, download all the relevant documents with open('edgar_tickers.txt', 'r') as reader: for line in reader: ingestor.file_downloader(edgar.ingest_stock(line.rstrip()), docs_directory)