コード例 #1
0
def count_chunk_elements1(fname,
                          chunksize=1000000,
                          max_chunk=None,
                          process_chunk=False):

    symbol_roots = Counter()

    for (i, chunk) in enumerate(
            islice(
                raw_taq.TAQ2Chunks(fname,
                                   chunksize=chunksize,
                                   process_chunk=process_chunk), max_chunk)):

        counts = np.unique(chunk[:]['Symbol_root'], return_counts=True)
        symbol_roots.update(dict(zip_longest(counts[0], counts[1])))

        #print("\r {0}".format(i),end="")

    return symbol_roots
コード例 #2
0
ファイル: test_taq.py プロジェクト: Giraffewhale/python-taq
def test_row_values(fname):
    sample = taq.TAQ2Chunks(sample_data_dir + fname)
    chunk = next(sample.iter_)
    assert len(chunk) == sample.chunksize
    first_row_vals = {}

    for (x, y) in config.items('file1-row-values'):
        first_row_vals[x] = y

    print(first_row_vals)

    field_mapping = {}
    field_names = chunk.dtype.names
    i = 0
    for field in field_names:
        field_lower = field.lower()
        field_mapping[field_lower] = str(chunk[0][i])
        i += 1
        assert field_mapping[field_lower] == first_row_vals[field_lower]
    print(field_mapping)
コード例 #3
0
#!/usr/bin/env python3

import raw_taq
import pandas as pd
import numpy as np
from statistics import mode, StatisticsError


def process_chunks(taq):
    chunk_gen = taq.convert_taq(20)  #create a generator for calling each chunk
    first_chunk = next(chunk_gen)

    accum = pd.DataFrame(first_chunk)

    for chunk in chunk_gen:
        accum.append(pd.DataFrame(chunk))
    print(accum)


if __name__ == '__main__':
    # fname = '../local_data/EQY_US_ALL_BBO_20150102.zip'
    # fname = '../local_data/EQY_US_ALL_BBO_20140206.zip'
    from sys import argv
    fname = '../local_data/EQY_US_ALL_BBO_201501' + argv[1] + '.zip'
    print("processing", fname)

    local_taq = raw_taq.TAQ2Chunks(fname)
    process_chunks(local_taq)
コード例 #4
0
if __name__ == '__main__':
    options = read_command(sys.argv)

    # Prompt user to overwrite previous output files
    clear_log_dir()


    for i in range(len(DATA_FILES)):

        test_file = DATA_FILES[i]
        # Generate name for output file. Assumes filename of form "EQY_US_ALL_BBO_YYYYMMDD.zip"
        out_name = test_file[15:23]

        # type(sample) is raw_taq.TAQ2Chunks
        sample = taq.TAQ2Chunks(test_file)

        print ("+++ Creating log file for [" + test_file + "] as ./test-logs/"+out_name+"_log.txt")
        with open("test-logs/"+out_name+"_log.txt", 'w') as log:
            for chunk in sample.iter_:
                # chunk is a numpy array of tuples
                # print (type(chunk[0]))

                sorted_dtype = [(x,str(y[0])) for x,y in sorted(chunk.dtype.fields.items(),key=lambda k: k[0])]

                for attr, type in sorted_dtype:
                    log.write(attr + "     ")

                # for attr, type in chunk.dtype.fields.items():
                #     print (attr)
                #     print ("    ")
コード例 #5
0
                                   process_chunk=process_chunk), max_chunk)):

        counts = np.unique(chunk[:]['Symbol_root'], return_counts=True)
        symbol_roots.update(dict(zip_longest(counts[0], counts[1])))

        #print("\r {0}".format(i),end="")

    return symbol_roots


if __name__ == '__main__':

    t0 = time.time()

    faqname = "/global/scratch/aculich/mirror/EQY_US_ALL_BBO/EQY_US_ALL_BBO_2015/EQY_US_ALL_BBO_201501/EQY_US_ALL_BBO_20150102.zip"
    chunks = raw_taq.TAQ2Chunks(faqname, chunksize=1, process_chunk=False)

    try:
        max_chunk = int(argv[1])
    except:
        max_chunk = None

    c = count_chunk_elements1(faqname, max_chunk=max_chunk)

    t1 = time.time()

    print("total number of records", sum(c.values()))

    print("timing:", t0, t1, t1 - t0)

    for (i, (k, v)) in enumerate(islice(c.most_common(), 100)):