def test_edgar_download_html():

    ingestor = Ingestor()
    edgar = Edgar("html", "2013-01-01")
    ingestor.file_downloader(edgar.ingest_stock("AAPL"), docs_directory)

    assert os.path.exists(docs_directory + "/a2124888z10-k.htm") is True
def test_edgar_download_xbrl():

    ingestor = Ingestor()
    edgar = Edgar("xbrl", "2014-01-01")
    ingestor.file_downloader(edgar.ingest_stock("AAPL"), docs_directory)

    assert os.path.exists(docs_directory + "/aapl-20130928.xml") is True
def test_edgar_download_xbrl():

    ingestor = Ingestor()
    edgar = Edgar("xbrl", "2014-01-01")
    ingestor.file_downloader(edgar.ingest_stock("AAPL"), docs_directory)

    assert os.path.exists(docs_directory + "/aapl-20130928.xml") is True
def test_edgar_download_html():

    ingestor = Ingestor()
    edgar = Edgar("html", "2013-01-01")
    ingestor.file_downloader(edgar.ingest_stock("AAPL"), docs_directory)

    assert os.path.exists(docs_directory + "/a2124888z10-k.htm") is True
    exit(0)

# always declare the signal handler first
signal.signal(signal.SIGINT, quit_gracefully)

env = lucene.initVM()
queryer = Queryer("index", "hits")
print('Using Directory: ', queryer.store_dir)

# directory for storing downloaded docs
directoryToWalk = "docs"

# and start the indexer
# note the indexer thread is set to daemon causing it to terminate on a SIGINT
indexer = Indexer(queryer.store_dir, queryer.writer, directoryToWalk)
ingestor = Ingestor()
edgar = Edgar()

with open('data.txt', 'r') as reader:
    for line in reader:
        ingestor.file_downloader(edgar.ingest_stock(line.rstrip()),
         directoryToWalk)
        indexer.indexDocs()

# start up the terminal query interface
queryer.run(queryer.writer, queryer.analyzer)

# if return from Querying then call the signal handler
# to clean up the writer cleanly
quit_gracefully()
#! /usr/bin/env python
# encoding: utf-8

import os
from ingestor import Ingestor, IngestorException, Sedar

ingestor = Ingestor()

# xbrl or html?
sedar = Sedar("xbrl")

docs_directory = "test"

# if the directory we will download files does not exist, create it
if not os.path.exists(docs_directory):
    os.mkdir(docs_directory)

# for every ticker in our input file, download all the relevant documents
with open('sedar_tickers.txt', 'r') as reader:
    for line in reader:
        ingestor.file_downloader(sedar.ingest_stock(line.rstrip()),
                                 docs_directory)
Beispiel #7
0
    exit(0)

# always declare the signal handler first
signal.signal(signal.SIGINT, quit_gracefully)

env = lucene.initVM()
queryer = Queryer("index", "hits")
print 'Using Directory: ', queryer.store_dir

# directory for storing downloaded docs
directoryToWalk = "docs"

# and start the indexer
# note the indexer thread is set to daemon causing it to terminate on a SIGINT
indexer = Indexer(queryer.store_dir, queryer.writer, directoryToWalk)
ingestor = Ingestor()
edgar = Edgar()

with open('edgar_tickers.txt', 'r') as reader:
    for line in reader:
        ingestor.file_downloader(edgar.ingest_stock(line.rstrip()),
        directoryToWalk)
        indexer.indexDocs()

# start up the terminal query interface
queryer.run(queryer.writer, queryer.analyzer)

# if return from Querying then call the signal handler
# to clean up the writer cleanly
quit_gracefully()
#! /usr/bin/env python
# encoding: utf-8

import os
from ingestor import Ingestor, IngestorException, Edgar

ingestor = Ingestor()

# xbrl or html?
edgar = Edgar("xbrl")

docs_directory = "test"

# if the directory we will download files does not exist, create it
if not os.path.exists(docs_directory):
    os.mkdir(docs_directory)

# for every ticker in our input file, download all the relevant documents
with open('edgar_tickers.txt', 'r') as reader:
    for line in reader:
        ingestor.file_downloader(edgar.ingest_stock(line.rstrip()),
         docs_directory)