예제 #1
0
def main(fname_in, fname_out, size, frac):
    taq_in = marketflow.TAQ2Chunks(fname_in, do_process_chunk=False)
    downsampled = tp.Downsample(taq_in, frac)
    sanitized = tp.Sanitizer(tp.SplitChunks(downsampled, 'Symbol_root'))

    writ_len = 0
    with open(fname_out, 'wb') as ofile:
        ofile.write(taq_in.first_line)

        for chunk in sanitized:
            if len(chunk) + writ_len > size:
                break
            ofile.write(chunk)
            writ_len += len(chunk)

        line_len = len(taq_in.first_line)
        datestr, numlines = taq_in.first_line.split(b':')
        first_line = datestr + b':' + str(
            ' ' * 4).encode() + str(writ_len).encode()
        first_line += str(' ' *
                          (line_len - len(first_line) - 2)).encode() + b'\r\n'
        ofile.seek(0)
        ofile.write(first_line)

    basename = path.basename(fname_out)
    with ZipFile(fname_out + '.zip', 'w') as zf:
        zf.write(fname_out, basename, ZIP_DEFLATED)
예제 #2
0
def main(fname_in, fname_out, size, frac):
    taq_in = marketflow.TAQ2Chunks(fname_in, do_process_chunk=False)
    downsampled = tp.Downsample(taq_in, frac)
    # We should downsample enough that things will fit in memory!
    recombined = tp.JoinedChunks(tp.SplitChunks(downsampled, 'Symbol_root'),
                                 'Symbol_root')
    sanitized = tp.Sanitizer(recombined)

    # Assemble our chunks - all of this should fit into memory for quick n'
    # easy testing
    write_len = 0
    chunks = []
    for chunk in sanitized:
        if len(chunk) + write_len > size:
            break
        chunks.append(chunk)
        write_len += len(chunk)

    # Compute a correct first line for this derived file
    line_len = len(taq_in.first_line)
    datestr, numlines = taq_in.first_line.split(b':')
    first_line = datestr + b':' + b' ' * 4 + str(write_len).encode()
    # padding for the rest of the line
    first_line += b' ' * (line_len - len(first_line) - 2) + b'\r\n'

    with open(fname_out, 'wb') as ofile:
        ofile.write(first_line)

        for chunk in sorted(chunks, key=lambda x: x[0]['Symbol_root']):
            ofile.write(chunk)

    basename = path.basename(fname_out)
    with ZipFile(fname_out + '.zip', 'w') as zf:
        zf.write(fname_out, basename, ZIP_DEFLATED)
예제 #3
0
def test_row_values(fname, numlines=5):

    sample = marketflow.TAQ2Chunks(sample_data_dir + fname,
                                   chunksize=chunksize)
    chunk = next(sample)

    # Check len(chunk) == min(sample.chunksize, length of file)
    print (sample.numlines)
    assert len(chunk) == sample.chunksize

    # Use raw_taq to read in raw bytes
    chunk_unprocessed_gen = marketflow.TAQ2Chunks(sample_data_dir + fname, chunksize=numlines, do_process_chunk=False)
    chunk_processed_gen = marketflow.TAQ2Chunks(sample_data_dir + fname, chunksize=numlines, do_process_chunk=True)
    chunk = next(chunk_unprocessed_gen)
    chunk_proc = next(chunk_processed_gen)

    month, day, year = chunk_unprocessed_gen.month, chunk_unprocessed_gen.day, chunk_unprocessed_gen.year

    for i in range(chunk.shape[0]):
        entry = chunk[i]
        msec = int(entry['msec'][2:5])

        date_object = arrow.Arrow(year, month, day,
            hour=int(entry['hour']),
            minute=int(entry['minute']),
            second=int(entry['msec'][0:2]),
            tzinfo=gettz('America/New York'))

        unix_time = date_object.timestamp + msec/1000

        assert unix_time == chunk_proc[i]['Time']

        # in bytes
        symbol_root = entry['Symbol_root']
        symbol_suffix = entry['Symbol_suffix']
        bid_price = int(entry['Bid_Price'][0:7]) + int(entry['Bid_Price'][7:11])/10000
        bid_size = int(entry['Bid_Size'])
        ask_price = int(entry['Ask_Price'][0:7]) + int(entry['Ask_Price'][7:11])/10000
        ask_size = int(entry['Ask_Size'])

        # Add assert statements
        assert bid_price == chunk_proc[i][7]
        assert bid_size == chunk_proc[i][8]
        assert ask_price == chunk_proc[i][9]
        assert ask_size == chunk_proc[i][10]
예제 #4
0
def test_ini_row_value():
    '''Test values read explicitly from test_taq.ini'''
    sample = marketflow.TAQ2Chunks(sample_data_dir +
                                   config['taq-data']['std-test-file'],
                                   chunksize=chunksize)
    chunk = next(sample)
    row0 = chunk[0]
    test_values = config['std-test-row-values']

    assert float(test_values['time']) == row0['Time']
    assert int(test_values['hour']) == row0['hour']
    assert int(test_values['minute']) == row0['minute']
    assert int(test_values['msec']) == row0['msec']
    assert test_values['exchange'].encode('ascii') == row0['Exchange']
    assert test_values['symbol_root'].encode('ascii') == row0['Symbol_root']
예제 #5
0
def test_h5_files(fname, tmpdir):
    # XXX Update to be appropriate conversion to HDF5
    sample = marketflow.TAQ2Chunks(sample_data_dir + fname)