コード例 #1
0
ファイル: test_fixes.py プロジェクト: vishalbelsare/arctic
def test_missing_cols(chunkstore_lib):
    index = DatetimeIndex(pd.date_range('2019-01-01', periods=3, freq='D'),
                          name='date')
    index2 = DatetimeIndex(pd.date_range('2019-01-04', periods=3, freq='D'),
                           name='date')
    expected_index = DatetimeIndex(pd.date_range('2019-01-01',
                                                 periods=6,
                                                 freq='D'),
                                   name='date')
    expected_df = DataFrame(
        {
            'A': [1, 2, 3, 40, 50, 60],
            'B': [5.0, 6.0, 7.0, np.nan, np.nan, np.nan]
        },
        index=expected_index)

    df = pd.DataFrame({'A': [1, 2, 3], 'B': [5, 6, 7]}, index=index)
    chunkstore_lib.write('test', df, chunk_size='D')

    df = pd.DataFrame({'A': [40, 50, 60]}, index=index2)
    chunkstore_lib.append('test', df, chunk_size='D')

    assert_frame_equal_(chunkstore_lib.read('test'),
                        expected_df,
                        check_freq=False)
    df = chunkstore_lib.read('test', columns=['B'])
    assert_frame_equal_(df, expected_df['B'].to_frame(), check_freq=False)
コード例 #2
0
ファイル: test_fixes.py プロジェクト: vishalbelsare/arctic
def test_compression(chunkstore_lib):
    """
    Issue 407 - Chunkstore was not removing the 1st segment, with segment id -1
    so on an append it would append new chunks with id 0 and 1, and a subsequent read
    would still pick up -1 (which should have been removed or overwritten).
    Since the -1 segment (which previously indicated a standalone segment) is no
    longer needed, the special -1 segment id is now removed
    """
    def generate_data(date):
        """
        Generates a dataframe that is almost exactly the size of
        a segment in chunkstore
        """
        df = pd.DataFrame(np.random.randn(10000 * 16, 12),
                          columns=[
                              'beta', 'btop', 'earnyild', 'growth', 'industry',
                              'leverage', 'liquidty', 'momentum', 'resvol',
                              'sid', 'size', 'sizenl'
                          ])
        df['date'] = date

        return df

    date = pd.Timestamp('2000-01-01')
    df = generate_data(date)
    chunkstore_lib.write('test', df, chunk_size='A')
    date += pd.Timedelta(1, unit='D')
    df2 = generate_data(date)
    chunkstore_lib.append('test', df2)
    read = chunkstore_lib.read('test')
    assert_frame_equal_(read, pd.concat([df, df2], ignore_index=True))
コード例 #3
0
def test_with_nans():
    df = pd.DataFrame(np.random.randint(0, 100, size=(100, 4)),
                      columns=list('ABCD'))
    df['A'] = np.NaN
    n = FrametoArraySerializer()
    a = n.serialize(df)
    assert_frame_equal_(df, n.deserialize(a))
コード例 #4
0
ファイル: test_fixes.py プロジェクト: vishalbelsare/arctic
def test_rewrite(chunkstore_lib):
    """
    Issue 427
    incorrectly storing and updating metadata. dataframes without an index
    have no "index" field in their metadata, so updating existing
    metadata does not remove the index field.
    Also, metadata was incorrectly being stored. symbol, start, and end
    are the index for the collection, but metadata was being
    stored without an index (so it was defaulting to null,null,null)
    """
    date_range = pd.date_range(start=dt(2017, 5, 1, 1), periods=8, freq='6H')

    df = DataFrame(
        data={'something': [100, 200, 300, 400, 500, 600, 700, 800]},
        index=DatetimeIndex(date_range, name='date'))

    chunkstore_lib.write('test', df, chunk_size='D')

    df2 = DataFrame(data={
        'something': [100, 200, 300, 400, 500, 600, 700, 800],
        'date': date_range
    })

    chunkstore_lib.write('test', df2, chunk_size='D')
    ret = chunkstore_lib.read('test')
    assert_frame_equal_(ret, df2)
コード例 #5
0
def test_read_apply(chunkstore_lib):
    df = create_test_data(index=False, size=20)
    chunkstore_lib.write('test', df, chunk_size='M')

    def func(df):
        df['data0'] += 1.0
        return df

    for data in read_apply(chunkstore_lib, 'test', func):
        assert_frame_equal_(data, func(df))
コード例 #6
0
ファイル: test_ts_write.py プロジェクト: vishalbelsare/arctic
def test_ts_write_pandas(tickstore_lib):
    data = DUMMY_DATA
    tickstore_lib.write('SYM', data)

    data = tickstore_lib.read('SYM', columns=None)
    assert data.index[0] == dt(2013, 1, 1, tzinfo=mktz('Europe/London'))
    assert data.a[0] == 1
    tickstore_lib.delete('SYM')
    tickstore_lib.write('SYM', data)

    read = tickstore_lib.read('SYM', columns=None)
    assert_frame_equal_(read, data, check_names=False)
コード例 #7
0
ファイル: test_fixes.py プロジェクト: vishalbelsare/arctic
def test_date_interval(chunkstore_lib):
    date_range = pd.date_range(start=dt(2017, 5, 1), periods=8, freq='D')

    df = DataFrame(data={'data': range(8)},
                   index=DatetimeIndex(date_range, name='date'))

    # test with index
    chunkstore_lib.write('test', df, chunk_size='D')

    ret = chunkstore_lib.read('test',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5),
                                                    CLOSED_OPEN))
    assert_frame_equal_(ret, df[1:4], check_freq=False)
    ret = chunkstore_lib.read('test',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5), OPEN_OPEN))
    assert_frame_equal_(ret, df[2:4], check_freq=False)
    ret = chunkstore_lib.read('test',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5),
                                                    OPEN_CLOSED))
    assert_frame_equal_(ret, df[2:5], check_freq=False)
    ret = chunkstore_lib.read('test',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5),
                                                    CLOSED_CLOSED))
    assert_frame_equal_(ret, df[1:5], check_freq=False)
    ret = chunkstore_lib.read('test',
                              chunk_range=DateRange(dt(2017, 5, 2), None,
                                                    CLOSED_OPEN))
    assert_frame_equal_(ret, df[1:8], check_freq=False)

    # test without index
    df = DataFrame(data={'data': range(8), 'date': date_range})

    chunkstore_lib.write('test2', df, chunk_size='D')

    ret = chunkstore_lib.read('test2',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5),
                                                    CLOSED_OPEN))
    assert (len(ret) == 3)
    ret = chunkstore_lib.read('test2',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5), OPEN_OPEN))
    assert (len(ret) == 2)
    ret = chunkstore_lib.read('test2',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5),
                                                    OPEN_CLOSED))
    assert (len(ret) == 3)
    ret = chunkstore_lib.read('test2',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5),
                                                    CLOSED_CLOSED))
    assert (len(ret) == 4)
    ret = chunkstore_lib.read('test2',
                              chunk_range=DateRange(dt(2017, 5, 2), None,
                                                    CLOSED_OPEN))
    assert (len(ret) == 7)
コード例 #8
0
def test_frame_converter():
    f = FrameConverter()
    df = pd.DataFrame(np.random.randint(0, 100, size=(100, 4)),
                      columns=list('ABCD'))

    assert_frame_equal_(f.objify(f.docify(df)), df)
コード例 #9
0
def test_empty_columns():
    df = pd.DataFrame(data={'A': [], 'B': [], 'C': []})
    n = FrametoArraySerializer()
    a = n.serialize(df)
    assert_frame_equal_(df, n.deserialize(a))
コード例 #10
0
def test_with_strings():
    f = FrameConverter()
    df = pd.DataFrame(data={'one': ['a', 'b', 'c']})

    assert_frame_equal_(f.objify(f.docify(df)), df)