Exemple #1
0
def test_missing_cols(chunkstore_lib):
    index = DatetimeIndex(pd.date_range('2019-01-01', periods=3, freq='D'),
                          name='date')
    index2 = DatetimeIndex(pd.date_range('2019-01-04', periods=3, freq='D'),
                           name='date')
    expected_index = DatetimeIndex(pd.date_range('2019-01-01',
                                                 periods=6,
                                                 freq='D'),
                                   name='date')
    expected_df = DataFrame(
        {
            'A': [1, 2, 3, 40, 50, 60],
            'B': [5.0, 6.0, 7.0, np.nan, np.nan, np.nan]
        },
        index=expected_index)

    df = pd.DataFrame({'A': [1, 2, 3], 'B': [5, 6, 7]}, index=index)
    chunkstore_lib.write('test', df, chunk_size='D')

    df = pd.DataFrame({'A': [40, 50, 60]}, index=index2)
    chunkstore_lib.append('test', df, chunk_size='D')

    assert_frame_equal_(chunkstore_lib.read('test'),
                        expected_df,
                        check_freq=False)
    df = chunkstore_lib.read('test', columns=['B'])
    assert_frame_equal_(df, expected_df['B'].to_frame(), check_freq=False)
Exemple #2
0
def test_compression(chunkstore_lib):
    """
    Issue 407 - Chunkstore was not removing the 1st segment, with segment id -1
    so on an append it would append new chunks with id 0 and 1, and a subsequent read
    would still pick up -1 (which should have been removed or overwritten).
    Since the -1 segment (which previously indicated a standalone segment) is no
    longer needed, the special -1 segment id is now removed
    """
    def generate_data(date):
        """
        Generates a dataframe that is almost exactly the size of
        a segment in chunkstore
        """
        df = pd.DataFrame(np.random.randn(10000 * 16, 12),
                          columns=[
                              'beta', 'btop', 'earnyild', 'growth', 'industry',
                              'leverage', 'liquidty', 'momentum', 'resvol',
                              'sid', 'size', 'sizenl'
                          ])
        df['date'] = date

        return df

    date = pd.Timestamp('2000-01-01')
    df = generate_data(date)
    chunkstore_lib.write('test', df, chunk_size='A')
    date += pd.Timedelta(1, unit='D')
    df2 = generate_data(date)
    chunkstore_lib.append('test', df2)
    read = chunkstore_lib.read('test')
    assert_frame_equal_(read, pd.concat([df, df2], ignore_index=True))
def test_with_nans():
    df = pd.DataFrame(np.random.randint(0, 100, size=(100, 4)),
                      columns=list('ABCD'))
    df['A'] = np.NaN
    n = FrametoArraySerializer()
    a = n.serialize(df)
    assert_frame_equal_(df, n.deserialize(a))
Exemple #4
0
def test_rewrite(chunkstore_lib):
    """
    Issue 427
    incorrectly storing and updating metadata. dataframes without an index
    have no "index" field in their metadata, so updating existing
    metadata does not remove the index field.
    Also, metadata was incorrectly being stored. symbol, start, and end
    are the index for the collection, but metadata was being
    stored without an index (so it was defaulting to null,null,null)
    """
    date_range = pd.date_range(start=dt(2017, 5, 1, 1), periods=8, freq='6H')

    df = DataFrame(
        data={'something': [100, 200, 300, 400, 500, 600, 700, 800]},
        index=DatetimeIndex(date_range, name='date'))

    chunkstore_lib.write('test', df, chunk_size='D')

    df2 = DataFrame(data={
        'something': [100, 200, 300, 400, 500, 600, 700, 800],
        'date': date_range
    })

    chunkstore_lib.write('test', df2, chunk_size='D')
    ret = chunkstore_lib.read('test')
    assert_frame_equal_(ret, df2)
Exemple #5
0
def test_read_apply(chunkstore_lib):
    df = create_test_data(index=False, size=20)
    chunkstore_lib.write('test', df, chunk_size='M')

    def func(df):
        df['data0'] += 1.0
        return df

    for data in read_apply(chunkstore_lib, 'test', func):
        assert_frame_equal_(data, func(df))
def test_ts_write_pandas(tickstore_lib):
    data = DUMMY_DATA
    tickstore_lib.write('SYM', data)

    data = tickstore_lib.read('SYM', columns=None)
    assert data.index[0] == dt(2013, 1, 1, tzinfo=mktz('Europe/London'))
    assert data.a[0] == 1
    tickstore_lib.delete('SYM')
    tickstore_lib.write('SYM', data)

    read = tickstore_lib.read('SYM', columns=None)
    assert_frame_equal_(read, data, check_names=False)
Exemple #7
0
def test_date_interval(chunkstore_lib):
    date_range = pd.date_range(start=dt(2017, 5, 1), periods=8, freq='D')

    df = DataFrame(data={'data': range(8)},
                   index=DatetimeIndex(date_range, name='date'))

    # test with index
    chunkstore_lib.write('test', df, chunk_size='D')

    ret = chunkstore_lib.read('test',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5),
                                                    CLOSED_OPEN))
    assert_frame_equal_(ret, df[1:4], check_freq=False)
    ret = chunkstore_lib.read('test',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5), OPEN_OPEN))
    assert_frame_equal_(ret, df[2:4], check_freq=False)
    ret = chunkstore_lib.read('test',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5),
                                                    OPEN_CLOSED))
    assert_frame_equal_(ret, df[2:5], check_freq=False)
    ret = chunkstore_lib.read('test',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5),
                                                    CLOSED_CLOSED))
    assert_frame_equal_(ret, df[1:5], check_freq=False)
    ret = chunkstore_lib.read('test',
                              chunk_range=DateRange(dt(2017, 5, 2), None,
                                                    CLOSED_OPEN))
    assert_frame_equal_(ret, df[1:8], check_freq=False)

    # test without index
    df = DataFrame(data={'data': range(8), 'date': date_range})

    chunkstore_lib.write('test2', df, chunk_size='D')

    ret = chunkstore_lib.read('test2',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5),
                                                    CLOSED_OPEN))
    assert (len(ret) == 3)
    ret = chunkstore_lib.read('test2',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5), OPEN_OPEN))
    assert (len(ret) == 2)
    ret = chunkstore_lib.read('test2',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5),
                                                    OPEN_CLOSED))
    assert (len(ret) == 3)
    ret = chunkstore_lib.read('test2',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5),
                                                    CLOSED_CLOSED))
    assert (len(ret) == 4)
    ret = chunkstore_lib.read('test2',
                              chunk_range=DateRange(dt(2017, 5, 2), None,
                                                    CLOSED_OPEN))
    assert (len(ret) == 7)
def test_frame_converter():
    f = FrameConverter()
    df = pd.DataFrame(np.random.randint(0, 100, size=(100, 4)),
                      columns=list('ABCD'))

    assert_frame_equal_(f.objify(f.docify(df)), df)
def test_empty_columns():
    df = pd.DataFrame(data={'A': [], 'B': [], 'C': []})
    n = FrametoArraySerializer()
    a = n.serialize(df)
    assert_frame_equal_(df, n.deserialize(a))
def test_with_strings():
    f = FrameConverter()
    df = pd.DataFrame(data={'one': ['a', 'b', 'c']})

    assert_frame_equal_(f.objify(f.docify(df)), df)