def test_missing_cols(chunkstore_lib): index = DatetimeIndex(pd.date_range('2019-01-01', periods=3, freq='D'), name='date') index2 = DatetimeIndex(pd.date_range('2019-01-04', periods=3, freq='D'), name='date') expected_index = DatetimeIndex(pd.date_range('2019-01-01', periods=6, freq='D'), name='date') expected_df = DataFrame( { 'A': [1, 2, 3, 40, 50, 60], 'B': [5.0, 6.0, 7.0, np.nan, np.nan, np.nan] }, index=expected_index) df = pd.DataFrame({'A': [1, 2, 3], 'B': [5, 6, 7]}, index=index) chunkstore_lib.write('test', df, chunk_size='D') df = pd.DataFrame({'A': [40, 50, 60]}, index=index2) chunkstore_lib.append('test', df, chunk_size='D') assert_frame_equal_(chunkstore_lib.read('test'), expected_df, check_freq=False) df = chunkstore_lib.read('test', columns=['B']) assert_frame_equal_(df, expected_df['B'].to_frame(), check_freq=False)
def test_compression(chunkstore_lib): """ Issue 407 - Chunkstore was not removing the 1st segment, with segment id -1 so on an append it would append new chunks with id 0 and 1, and a subsequent read would still pick up -1 (which should have been removed or overwritten). Since the -1 segment (which previously indicated a standalone segment) is no longer needed, the special -1 segment id is now removed """ def generate_data(date): """ Generates a dataframe that is almost exactly the size of a segment in chunkstore """ df = pd.DataFrame(np.random.randn(10000 * 16, 12), columns=[ 'beta', 'btop', 'earnyild', 'growth', 'industry', 'leverage', 'liquidty', 'momentum', 'resvol', 'sid', 'size', 'sizenl' ]) df['date'] = date return df date = pd.Timestamp('2000-01-01') df = generate_data(date) chunkstore_lib.write('test', df, chunk_size='A') date += pd.Timedelta(1, unit='D') df2 = generate_data(date) chunkstore_lib.append('test', df2) read = chunkstore_lib.read('test') assert_frame_equal_(read, pd.concat([df, df2], ignore_index=True))
def test_with_nans(): df = pd.DataFrame(np.random.randint(0, 100, size=(100, 4)), columns=list('ABCD')) df['A'] = np.NaN n = FrametoArraySerializer() a = n.serialize(df) assert_frame_equal_(df, n.deserialize(a))
def test_rewrite(chunkstore_lib): """ Issue 427 incorrectly storing and updating metadata. dataframes without an index have no "index" field in their metadata, so updating existing metadata does not remove the index field. Also, metadata was incorrectly being stored. symbol, start, and end are the index for the collection, but metadata was being stored without an index (so it was defaulting to null,null,null) """ date_range = pd.date_range(start=dt(2017, 5, 1, 1), periods=8, freq='6H') df = DataFrame( data={'something': [100, 200, 300, 400, 500, 600, 700, 800]}, index=DatetimeIndex(date_range, name='date')) chunkstore_lib.write('test', df, chunk_size='D') df2 = DataFrame(data={ 'something': [100, 200, 300, 400, 500, 600, 700, 800], 'date': date_range }) chunkstore_lib.write('test', df2, chunk_size='D') ret = chunkstore_lib.read('test') assert_frame_equal_(ret, df2)
def test_read_apply(chunkstore_lib): df = create_test_data(index=False, size=20) chunkstore_lib.write('test', df, chunk_size='M') def func(df): df['data0'] += 1.0 return df for data in read_apply(chunkstore_lib, 'test', func): assert_frame_equal_(data, func(df))
def test_ts_write_pandas(tickstore_lib): data = DUMMY_DATA tickstore_lib.write('SYM', data) data = tickstore_lib.read('SYM', columns=None) assert data.index[0] == dt(2013, 1, 1, tzinfo=mktz('Europe/London')) assert data.a[0] == 1 tickstore_lib.delete('SYM') tickstore_lib.write('SYM', data) read = tickstore_lib.read('SYM', columns=None) assert_frame_equal_(read, data, check_names=False)
def test_date_interval(chunkstore_lib): date_range = pd.date_range(start=dt(2017, 5, 1), periods=8, freq='D') df = DataFrame(data={'data': range(8)}, index=DatetimeIndex(date_range, name='date')) # test with index chunkstore_lib.write('test', df, chunk_size='D') ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), CLOSED_OPEN)) assert_frame_equal_(ret, df[1:4], check_freq=False) ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), OPEN_OPEN)) assert_frame_equal_(ret, df[2:4], check_freq=False) ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), OPEN_CLOSED)) assert_frame_equal_(ret, df[2:5], check_freq=False) ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), CLOSED_CLOSED)) assert_frame_equal_(ret, df[1:5], check_freq=False) ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), None, CLOSED_OPEN)) assert_frame_equal_(ret, df[1:8], check_freq=False) # test without index df = DataFrame(data={'data': range(8), 'date': date_range}) chunkstore_lib.write('test2', df, chunk_size='D') ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), CLOSED_OPEN)) assert (len(ret) == 3) ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), OPEN_OPEN)) assert (len(ret) == 2) ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), OPEN_CLOSED)) assert (len(ret) == 3) ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), CLOSED_CLOSED)) assert (len(ret) == 4) ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), None, CLOSED_OPEN)) assert (len(ret) == 7)
def test_frame_converter(): f = FrameConverter() df = pd.DataFrame(np.random.randint(0, 100, size=(100, 4)), columns=list('ABCD')) assert_frame_equal_(f.objify(f.docify(df)), df)
def test_empty_columns(): df = pd.DataFrame(data={'A': [], 'B': [], 'C': []}) n = FrametoArraySerializer() a = n.serialize(df) assert_frame_equal_(df, n.deserialize(a))
def test_with_strings(): f = FrameConverter() df = pd.DataFrame(data={'one': ['a', 'b', 'c']}) assert_frame_equal_(f.objify(f.docify(df)), df)