예제 #1
0
def should_return_data_when_date_range_falls_in_a_single_underlying_library(
        toplevel_tickstore, arctic):
    arctic.initialize_library('FEED_2010.LEVEL1', tickstore.TICK_STORE_TYPE)
    tickstore = arctic['FEED_2010.LEVEL1']
    arctic.initialize_library('test_current.toplevel_tickstore',
                              tickstore.TICK_STORE_TYPE)
    tickstore_current = arctic['test_current.toplevel_tickstore']
    toplevel_tickstore._collection.insert_one({
        'start':
        dt(2010, 1, 1),
        'end':
        dt(2010, 12, 31, 23, 59, 59),
        'library_name':
        'FEED_2010.LEVEL1'
    })
    dates = pd.date_range('20100101', periods=6, tz=mktz('Europe/London'))
    df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD'))
    tickstore.write('blah', df)
    tickstore_current.write('blah', df)
    res = toplevel_tickstore.read(
        'blah', DateRange(start=dt(2010, 1, 1), end=dt(2010, 1, 6)),
        list('ABCD'))

    assert_frame_equal(df, res.tz_localize(mktz('Europe/London')))
예제 #2
0
def test_delete_daterange(tickstore_lib):
    DUMMY_DATA = [
        {
            'a': 1.,
            'b': 2.,
            'index': dt(2013, 1, 1, tzinfo=mktz('Europe/London'))
        },
        {
            'a': 3.,
            'b': 4.,
            'index': dt(2013, 2, 1, tzinfo=mktz('Europe/London'))
        },
    ]
    tickstore_lib._chunk_size = 1
    tickstore_lib.write('SYM', DUMMY_DATA)

    # Delete with a date-range
    deleted = tickstore_lib.delete(
        'SYM',
        DateRange(dt(2013, 1, 1, tzinfo=mktz('Europe/London')),
                  dt(2013, 2, 1, tzinfo=mktz('Europe/London')), CLOSED_OPEN))
    assert deleted.deleted_count == 1
    df = tickstore_lib.read('SYM', columns=None)
    assert np.allclose(df['b'].values, np.array([4.]))
예제 #3
0
def test_date_filter_no_index():
    c = DateChunker()
    df = DataFrame(data={'data': [1, 2, 3],
                         'date': [dt(2016, 1, 1),
                                  dt(2016, 1, 2),
                                  dt(2016, 1, 3)]
                         }
                   )

    # OPEN - CLOSED
    assert_frame_equal(c.filter(df, DateRange(None, dt(2016, 1, 3))), df)
    # CLOSED - OPEN
    assert_frame_equal(c.filter(df, DateRange(dt(2016, 1, 1), None)), df)
    # OPEN - OPEN
    assert_frame_equal(c.filter(df, DateRange(None, None)), df)
    # CLOSED - OPEN (far before data range)
    assert_frame_equal(c.filter(df, DateRange(dt(2000, 1, 1), None)), df)
    # CLOSED - OPEN (far after range)
    assert(c.filter(df, DateRange(dt(2020, 1, 2), None)).empty)
    # OPEN - CLOSED
    assert_frame_equal(c.filter(df, DateRange(None, dt(2020, 1, 1))), df)
    # CLOSED - CLOSED (after range)
    assert(c.filter(df, DateRange(dt(2017, 1, 1), dt(2018, 1, 1))).empty)
예제 #4
0
def test_date_filter():
    c = DateChunker()
    df = DataFrame(data={'data': [1, 2, 3]},
                   index=MultiIndex.from_tuples([(dt(2016, 1, 1), 1),
                                                 (dt(2016, 1, 2), 1),
                                                 (dt(2016, 1, 3), 1)],
                                                names=['date', 'id'])
                   )

    # OPEN - CLOSED
    assert_frame_equal(c.filter(df, DateRange(None, dt(2016, 1, 3))), df)
    # CLOSED - OPEN
    assert_frame_equal(c.filter(df, DateRange(dt(2016, 1, 1), None)), df)
    # OPEN - OPEN
    assert_frame_equal(c.filter(df, DateRange(None, None)), df)
    # CLOSED - OPEN (far before data range)
    assert_frame_equal(c.filter(df, DateRange(dt(2000, 1, 1), None)), df)
    # CLOSED - OPEN (far after range)
    assert(c.filter(df, DateRange(dt(2020, 1, 2), None)).empty)
    # OPEN - CLOSED
    assert_frame_equal(c.filter(df, DateRange(None, dt(2020, 1, 1))), df)
    # CLOSED - CLOSED (after range)
    assert(c.filter(df, DateRange(dt(2017, 1, 1), dt(2018, 1, 1))).empty)
예제 #5
0
def test_read_with_image(tickstore_lib):
    DUMMY_DATA = [
        {
            'a': 1.,
            'index': dt(2013, 1, 1, 11, 00, tzinfo=mktz('Europe/London'))
        },
        {
            'b': 4.,
            'index': dt(2013, 1, 1, 12, 00, tzinfo=mktz('Europe/London'))
        },
    ]
    # Add an image
    tickstore_lib.write('SYM', DUMMY_DATA)
    tickstore_lib._collection.update_one({}, {
        '$set': {
            'im': {
                'i': {
                    'a': 37.,
                    'c': 2.,
                },
                't': dt(2013, 1, 1, 10, tzinfo=mktz('Europe/London'))
            }
        }
    })

    dr = DateRange(dt(2013, 1, 1), dt(2013, 1, 2))
    # tickstore_lib.read('SYM', columns=None)
    df = tickstore_lib.read('SYM', columns=None, date_range=dr)
    assert df['a'][0] == 1

    # Read with the image as well - all columns
    df = tickstore_lib.read('SYM',
                            columns=None,
                            date_range=dr,
                            include_images=True)
    assert set(df.columns) == set(('a', 'b', 'c'))
    assert_array_equal(df['a'].values, np.array([37, 1, np.nan]))
    assert_array_equal(df['b'].values, np.array([np.nan, np.nan, 4]))
    assert_array_equal(df['c'].values, np.array([2, np.nan, np.nan]))
    assert df.index[0] == dt(2013, 1, 1, 10, tzinfo=mktz('Europe/London'))
    assert df.index[1] == dt(2013, 1, 1, 11, tzinfo=mktz('Europe/London'))
    assert df.index[2] == dt(2013, 1, 1, 12, tzinfo=mktz('Europe/London'))

    # Read just columns from the updates
    df = tickstore_lib.read('SYM',
                            columns=('a', 'b'),
                            date_range=dr,
                            include_images=True)
    assert set(df.columns) == set(('a', 'b'))
    assert_array_equal(df['a'].values, np.array([37, 1, np.nan]))
    assert_array_equal(df['b'].values, np.array([np.nan, np.nan, 4]))
    assert df.index[0] == dt(2013, 1, 1, 10, tzinfo=mktz('Europe/London'))
    assert df.index[1] == dt(2013, 1, 1, 11, tzinfo=mktz('Europe/London'))
    assert df.index[2] == dt(2013, 1, 1, 12, tzinfo=mktz('Europe/London'))

    # Read one column from the updates
    df = tickstore_lib.read('SYM',
                            columns=('a', ),
                            date_range=dr,
                            include_images=True)
    assert set(df.columns) == set(('a', ))
    assert_array_equal(df['a'].values, np.array([37, 1]))
    assert df.index[0] == dt(2013, 1, 1, 10, tzinfo=mktz('Europe/London'))
    assert df.index[1] == dt(2013, 1, 1, 11, tzinfo=mktz('Europe/London'))

    # Read just the image column
    df = tickstore_lib.read('SYM',
                            columns=['c'],
                            date_range=dr,
                            include_images=True)
    assert set(df.columns) == set(['c'])
    assert_array_equal(df['c'].values, np.array([2]))
    assert df.index[0] == dt(2013, 1, 1, 10, tzinfo=mktz('Europe/London'))
예제 #6
0
def test_read_no_data(tickstore_lib):
    with pytest.raises(NoDataFoundException):
        tickstore_lib.read('missing_sym', DateRange(20131212, 20131212))
예제 #7
0
def test_date_interval(chunkstore_lib):
    date_range = pd.date_range(start=dt(2017, 5, 1), periods=8, freq='D')

    df = DataFrame(data={'data': range(8)},
                   index=DatetimeIndex(date_range, name='date'))

    # test with index
    chunkstore_lib.write('test', df, chunk_size='D')

    ret = chunkstore_lib.read('test',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5),
                                                    CLOSED_OPEN))
    assert_frame_equal_(ret, df[1:4], check_freq=False)
    ret = chunkstore_lib.read('test',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5), OPEN_OPEN))
    assert_frame_equal_(ret, df[2:4], check_freq=False)
    ret = chunkstore_lib.read('test',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5),
                                                    OPEN_CLOSED))
    assert_frame_equal_(ret, df[2:5], check_freq=False)
    ret = chunkstore_lib.read('test',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5),
                                                    CLOSED_CLOSED))
    assert_frame_equal_(ret, df[1:5], check_freq=False)
    ret = chunkstore_lib.read('test',
                              chunk_range=DateRange(dt(2017, 5, 2), None,
                                                    CLOSED_OPEN))
    assert_frame_equal_(ret, df[1:8], check_freq=False)

    # test without index
    df = DataFrame(data={'data': range(8), 'date': date_range})

    chunkstore_lib.write('test2', df, chunk_size='D')

    ret = chunkstore_lib.read('test2',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5),
                                                    CLOSED_OPEN))
    assert (len(ret) == 3)
    ret = chunkstore_lib.read('test2',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5), OPEN_OPEN))
    assert (len(ret) == 2)
    ret = chunkstore_lib.read('test2',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5),
                                                    OPEN_CLOSED))
    assert (len(ret) == 3)
    ret = chunkstore_lib.read('test2',
                              chunk_range=DateRange(dt(2017, 5, 2),
                                                    dt(2017, 5, 5),
                                                    CLOSED_CLOSED))
    assert (len(ret) == 4)
    ret = chunkstore_lib.read('test2',
                              chunk_range=DateRange(dt(2017, 5, 2), None,
                                                    CLOSED_OPEN))
    assert (len(ret) == 7)
예제 #8
0
def test_get_libraries_no_data_raises_exception(toplevel_tickstore, arctic):
    date_range = DateRange(start=dt(2009, 1, 1), end=dt(2010, 12, 31, 23, 59, 59, 999000))
    with pytest.raises(NoDataFoundException):
        toplevel_tickstore._get_libraries(date_range)
예제 #9
0
def test_should_add_underlying_library_where_another_library_exists_in_a_non_overlapping_daterange(toplevel_tickstore, arctic):
    toplevel_tickstore._collection.insert_one({'library_name': 'FEED_2011.LEVEL1', 'start': dt(2011, 1, 1), 'end': dt(2011, 12, 31)})
    arctic.initialize_library('FEED_2010.LEVEL1', tickstore.TICK_STORE_TYPE)
    toplevel_tickstore.add(DateRange(start=dt(2010, 1, 1), end=dt(2010, 12, 31, 23, 59, 59, 999000)), 'FEED_2010.LEVEL1')
    assert set([res['library_name'] for res in toplevel_tickstore._collection.find()]) == set(['FEED_2010.LEVEL1', 'FEED_2011.LEVEL1'])
예제 #10
0
Copyright (C) 2020 Man Group
For a list of authors, see README.md; for the license, see file LICENSE in project root directory.
"""
from datetime import datetime as dt, timedelta as dtd

import numpy as np
import pandas as pd
import pytest
from pandas.util.testing import assert_frame_equal

from arctic.date import DateRange, mktz
from arctic.exceptions import NoDataFoundException, LibraryNotFoundException, OverlappingDataException
from arctic.tickstore import tickstore
from arctic.tickstore import toplevel

FEED_2010_LEVEL1 = toplevel.TickStoreLibrary('FEED_2010.LEVEL1', DateRange(dt(2010, 1, 1), dt(2010, 12, 31, 23, 59, 59)))
FEED_2011_LEVEL1 = toplevel.TickStoreLibrary('FEED_2011.LEVEL1', DateRange(dt(2011, 1, 1), dt(2011, 12, 31, 23, 59, 59)))
FEED_2012_LEVEL1 = toplevel.TickStoreLibrary('FEED_2012.LEVEL1', DateRange(dt(2012, 1, 1), dt(2012, 12, 31, 23, 59, 59)))

@pytest.mark.parametrize(('start', 'end', 'expected'),
                         [(dt(2010, 2, 1), dt(2010, 4, 1), [FEED_2010_LEVEL1]),
                          (dt(2011, 2, 1), dt(2011, 4, 1), [FEED_2011_LEVEL1]),
                          (dt(2010, 2, 1), dt(2011, 4, 1), [FEED_2010_LEVEL1, FEED_2011_LEVEL1]),
                          (dt(2011, 2, 1), dt(2012, 4, 1), [FEED_2011_LEVEL1, FEED_2012_LEVEL1]),
                          (dt(2010, 2, 1), dt(2012, 4, 1), [FEED_2010_LEVEL1, FEED_2011_LEVEL1, FEED_2012_LEVEL1]),
                          (dt(2009, 2, 1), dt(2010, 12, 31), [FEED_2010_LEVEL1]),
                          (dt(2012, 2, 1), dt(2013, 12, 31), [FEED_2012_LEVEL1]),
                          (dt(2009, 2, 1), dt(2009, 12, 31), []),
                          (dt(2013, 2, 1), dt(2013, 12, 31), []),
                          ])
def test_should_return_libraries_for_the_given_daterange(toplevel_tickstore, start, end, expected):
예제 #11
0
def test_raise_exception_if_date_range_does_not_contain_start_and_end_date():
    store = TopLevelTickStore(Mock())
    dr = DateRange(start=None, end=None)
    with pytest.raises(Exception) as e:
        store._get_library_metadata(dr)
    assert "The date range {0} must contain a start and end date".format(dr) in str(e.value)
예제 #12
0
def test_raise_error_add_library_is_called_with_a_date_range_not_on_day_boundaries(start, end):
    with pytest.raises(AssertionError) as e:
        self = create_autospec(TopLevelTickStore, _arctic_lib=MagicMock(), _collection=MagicMock())
        self._get_library_metadata.return_value = []
        TopLevelTickStore.add(self, DateRange(start=start, end=end), "blah")
    assert "Date range should fall on UTC day boundaries" in str(e.value)
예제 #13
0
def test_daterange_eq():
    dr = DateRange(dt(2013, 1, 1))
    assert ((dr == None) == False)
    assert (dr == dr)
예제 #14
0
def test_daterange_raises():
    with pytest.raises(ValueError):
        assert (DateRange(dt(2013, 1, 1), dt(2000, 1, 1)))
예제 #15
0
from datetime import datetime as dt, timedelta as dtd

import numpy as np
import pandas as pd
import pytest
from pandas.util.testing import assert_frame_equal

from arctic.date import DateRange, mktz
from arctic.exceptions import NoDataFoundException, LibraryNotFoundException, OverlappingDataException
from arctic.tickstore import tickstore
from arctic.tickstore import toplevel

FEED_2010_LEVEL1 = toplevel.TickStoreLibrary(
    'FEED_2010.LEVEL1', DateRange(dt(2010, 1, 1), dt(2010, 12, 31, 23, 59,
                                                     59)))
FEED_2011_LEVEL1 = toplevel.TickStoreLibrary(
    'FEED_2011.LEVEL1', DateRange(dt(2011, 1, 1), dt(2011, 12, 31, 23, 59,
                                                     59)))
FEED_2012_LEVEL1 = toplevel.TickStoreLibrary(
    'FEED_2012.LEVEL1', DateRange(dt(2012, 1, 1), dt(2012, 12, 31, 23, 59,
                                                     59)))


@pytest.mark.parametrize(('start', 'end', 'expected'), [
    (dt(2010, 2, 1), dt(2010, 4, 1), [FEED_2010_LEVEL1]),
    (dt(2011, 2, 1), dt(2011, 4, 1), [FEED_2011_LEVEL1]),
    (dt(2010, 2, 1), dt(2011, 4, 1), [FEED_2010_LEVEL1, FEED_2011_LEVEL1]),
    (dt(2011, 2, 1), dt(2012, 4, 1), [FEED_2011_LEVEL1, FEED_2012_LEVEL1]),
    (dt(2010, 2, 1), dt(
        2012, 4, 1), [FEED_2010_LEVEL1, FEED_2011_LEVEL1, FEED_2012_LEVEL1]),
    (dt(2009, 2, 1), dt(2010, 12, 31), [FEED_2010_LEVEL1]),
예제 #16
0
def test_intersection_preserves_boundaries():
    # Non-matching boundaries
    assert DateRange('20110101', '20110102', OPEN_OPEN) == DateRange(
        '20110101', '20110103',
        OPEN_CLOSED).intersection(DateRange('20110101', '20110102', OPEN_OPEN))
    assert DateRange('20110101', '20110102', OPEN_OPEN) == DateRange(
        '20110101', '20110102',
        OPEN_OPEN).intersection(DateRange('20110101', '20110103', OPEN_CLOSED))
    assert DateRange('20110102', '20110103', OPEN_OPEN) == DateRange(
        '20110102', '20110103',
        OPEN_OPEN).intersection(DateRange('20110101', '20110103', CLOSED_OPEN))

    assert DateRange('20110102', '20110103', CLOSED_OPEN) == DateRange(
        '20110102', '20110103', CLOSED_OPEN).intersection(
            DateRange('20110101', '20110103', CLOSED_OPEN))
    assert DateRange('20110102', '20110103', CLOSED_OPEN) == DateRange(
        '20110101', '20110103', CLOSED_OPEN).intersection(
            DateRange('20110102', '20110103', CLOSED_OPEN))

    # Matching boundaries
    assert DateRange('20110101', '20110102', OPEN_OPEN) == DateRange(
        '20110101', '20110102',
        CLOSED_OPEN).intersection(DateRange('20110101', '20110102', OPEN_OPEN))
    assert DateRange('20110101', '20110102', OPEN_OPEN) == DateRange(
        '20110101', '20110102',
        OPEN_OPEN).intersection(DateRange('20110101', '20110102', OPEN_CLOSED))
예제 #17
0
def test_daterange_arg_parsing(date_range):
    d1 = DateRange(date_range[0], date_range[1])
    assert d1.start == dt(2011, 1, 2)
    assert d1.end == dt(2011, 12, 31)
    assert d1.unbounded is False
예제 #18
0
def test_raise_exception_if_date_range_overlaps():
    self = create_autospec(TopLevelTickStore, _arctic_lib=MagicMock())
    self._get_library_metadata.return_value = [TickStoreLibrary('lib1', None), ]
    with pytest.raises(OverlappingDataException) as e:
        TopLevelTickStore.add(self, DateRange(start=dt(2010, 1, 1), end=dt(2011, 1, 1, 23, 59, 59, 999000)), "blah")
    assert "There are libraries that overlap with the date range:" in str(e.value)
예제 #19
0
def test_should_add_underlying_library_where_none_exists(toplevel_tickstore, arctic):
    arctic.initialize_library('FEED_2010.LEVEL1', tickstore.TICK_STORE_TYPE)
    toplevel_tickstore.add(DateRange(start=dt(2010, 1, 1), end=dt(2010, 12, 31, 23, 59, 59, 999000)), 'FEED_2010.LEVEL1')
    assert toplevel_tickstore._collection.find_one({'library_name': 'FEED_2010.LEVEL1'})
예제 #20
0
def forEachTick(callback, mainKey, dataKeys, start=None, end=None, t=1):
    #get the time interval where we have all needed data
    start, end = db.getMasterInterval(chunkStore, db.dbKeys.values(), start,
                                      end)

    print("Starting generating properties from", start, "to", end)

    lastEnd = None

    mainData = chunkStore.read(mainKey, chunk_range=DateRange(start, end))

    if debug: print("Loaded mainData:", mainData)

    iterators = {}

    for key in db.dbKeys:  #for each key (not value) that we store in the dbKeys
        if dataKeys and key not in dataKeys: continue
        iterators[key] = chunkStore.iterator(db.dbKeys[key],
                                             chunk_range=DateRange(start, end))
        print("Working with requested data", key)

    data = {}  #[next(iterators[i]) for i in range(len(iterators))]

    for key in iterators:  # load the first chunks for all data
        data[key] = decodeRawData(next(iterators[key]))

    startTime = time.time()

    for mainRow in mainData.iterrows():
        rowData = mainRow[1]

        if rowData.date < start or rowData.date > end:
            continue  #we don't want to be doing anything outside of our main interval

        if lastEnd is None:
            lastEnd = rowData.date  #if this is the first row we read
        else:
            #our interval is > lastEnd and <= rowData.date
            currentStart = lastEnd
            currentEnd = rowData.date
            lastEnd = currentEnd

            #print("Loading data for dates", currentStart, currentEnd)

            #load the needed data

            tickData = {}
            for key in data:
                tickData[key] = subsetByDate(data[key], currentStart,
                                             currentEnd)

                while not containsFullInterval(data[key], tickData[key],
                                               currentEnd):
                    print("Loading new chunk for key", key,
                          tickData[key].head(2), tickData[key].tail(2),
                          data[key].head(2), data[key].tail(2), currentStart,
                          currentEnd)
                    print("Processing of the chunk took " +
                          str(time.time() - startTime) + "s.")
                    startTime = time.time()
                    data[key] = decodeRawData(
                        next(iterators[key]
                             ))  #load another data chunk and append it
                    newPart = subsetByDate(data[key], currentStart, currentEnd)
                    tickData[key] = pd.concat([tickData[key], newPart])
                if debug:
                    print(tickData[key].head(2))
                    print(tickData[key].tail(2))

            callback(tickData, currentEnd)
예제 #21
0
def test_should_raise_exception_if_library_does_not_exist(toplevel_tickstore):
    with pytest.raises(LibraryNotFoundException) as e:
        toplevel_tickstore.add(DateRange(start=dt(2010, 1, 1), end=dt(2010, 12, 31, 23, 59, 59, 999000)), 'FEED_2010.LEVEL1')
        assert toplevel_tickstore._collection.find_one({'library_name': 'FEED_2010.LEVEL1'})
    assert "Library FEED_2010.LEVEL1 was not correctly initialized" in str(e.value)
예제 #22
0
from datetime import datetime as dt
import operator
import pytest
import itertools
import six

from arctic.date import DateRange, string_to_daterange, CLOSED_CLOSED, CLOSED_OPEN, OPEN_CLOSED, OPEN_OPEN

test_ranges_for_bounding = {
    "unbounded": (DateRange(), None, None, True, None, None),
    "unbounded_right": (DateRange('20110101'), dt(2011, 1,
                                                  1), None, True, True, None),
    "unbounded_left":
    (DateRange(None, '20111231'), None, dt(2011, 12, 31), True, None, True),
    "closed_by_default": (DateRange('20110101', '20111231'), dt(2011, 1, 1),
                          dt(2011, 12, 31), False, True, True),
    "closed_explicitly": (DateRange('20110101', '20111231', CLOSED_CLOSED),
                          dt(2011, 1, 1), dt(2011, 12, 31), False, True, True),
    "closed_open": (DateRange('20110101', '20111231', CLOSED_OPEN),
                    dt(2011, 1, 1), dt(2011, 12, 31), False, True, False),
    "open_closed": (DateRange('20110101', '20111231', OPEN_CLOSED),
                    dt(2011, 1, 1), dt(2011, 12, 31), False, False, True),
    "open_open": (DateRange('20110101', '20111231', OPEN_OPEN), dt(2011, 1, 1),
                  dt(2011, 12, 31), False, False, False),
}
test_ranges_for_bounding = sorted(six.iteritems(test_ranges_for_bounding),
                                  key=operator.itemgetter(1))


def eq_nan(*args):
    if all(arg is None for arg in args):
예제 #23
0
def test_get_libraries_no_data_raises_exception_tzinfo_given(toplevel_tickstore, arctic):
    tzinfo = mktz('Asia/Chongqing')
    date_range = DateRange(start=dt(2009, 1, 1, tzinfo=tzinfo),
                           end=dt(2010, 12, 31, 23, 59, 59, 999000, tzinfo=tzinfo))
    with pytest.raises(NoDataFoundException):
        toplevel_tickstore._get_libraries(date_range)
예제 #24
0
def test_daterange_invalid_start():
    with pytest.raises(TypeError) as ex:
        DateRange(1.1, None)
    assert "unsupported type for start" in str(ex.value)
예제 #25
0
def test_date_range(tickstore_lib):
    tickstore_lib.write('SYM', DUMMY_DATA)
    df = tickstore_lib.read('SYM',
                            date_range=DateRange(20130101, 20130103),
                            columns=None)
    assert_array_equal(df['a'].values, np.array([1, np.nan, np.nan]))
    assert_array_equal(df['b'].values, np.array([2., 3., 5.]))
    assert_array_equal(df['c'].values, np.array([np.nan, 4., 6.]))

    tickstore_lib.delete('SYM')

    # Chunk every 3 symbols and lets have some fun
    tickstore_lib._chunk_size = 3
    tickstore_lib.write('SYM', DUMMY_DATA)

    with patch('pymongo.collection.Collection.find',
               side_effect=tickstore_lib._collection.find) as f:
        df = tickstore_lib.read('SYM',
                                date_range=DateRange(20130101, 20130103),
                                columns=None)
        assert_array_equal(df['b'].values, np.array([2., 3., 5.]))
        assert tickstore_lib._collection.find(
            f.call_args_list[-1][0][0]).count() == 1
        df = tickstore_lib.read('SYM',
                                date_range=DateRange(20130102, 20130103),
                                columns=None)
        assert_array_equal(df['b'].values, np.array([3., 5.]))
        assert tickstore_lib._collection.find(
            f.call_args_list[-1][0][0]).count() == 1
        df = tickstore_lib.read('SYM',
                                date_range=DateRange(20130103, 20130103),
                                columns=None)
        assert_array_equal(df['b'].values, np.array([5.]))
        assert tickstore_lib._collection.find(
            f.call_args_list[-1][0][0]).count() == 1

        df = tickstore_lib.read('SYM',
                                date_range=DateRange(20130102, 20130104),
                                columns=None)
        assert_array_equal(df['b'].values, np.array([3., 5., 7.]))
        assert tickstore_lib._collection.find(
            f.call_args_list[-1][0][0]).count() == 2
        df = tickstore_lib.read('SYM',
                                date_range=DateRange(20130102, 20130105),
                                columns=None)
        assert_array_equal(df['b'].values, np.array([3., 5., 7., 9.]))
        assert tickstore_lib._collection.find(
            f.call_args_list[-1][0][0]).count() == 2

        df = tickstore_lib.read('SYM',
                                date_range=DateRange(20130103, 20130104),
                                columns=None)
        assert_array_equal(df['b'].values, np.array([5., 7.]))
        assert tickstore_lib._collection.find(
            f.call_args_list[-1][0][0]).count() == 2
        df = tickstore_lib.read('SYM',
                                date_range=DateRange(20130103, 20130105),
                                columns=None)
        assert_array_equal(df['b'].values, np.array([5., 7., 9.]))
        assert tickstore_lib._collection.find(
            f.call_args_list[-1][0][0]).count() == 2

        df = tickstore_lib.read('SYM',
                                date_range=DateRange(20130104, 20130105),
                                columns=None)
        assert_array_equal(df['b'].values, np.array([7., 9.]))
        assert tickstore_lib._collection.find(
            f.call_args_list[-1][0][0]).count() == 1

        # Test the different open-closed behaviours
        df = tickstore_lib.read('SYM',
                                date_range=DateRange(20130104, 20130105,
                                                     CLOSED_CLOSED),
                                columns=None)
        assert_array_equal(df['b'].values, np.array([7., 9.]))
        df = tickstore_lib.read('SYM',
                                date_range=DateRange(20130104, 20130105,
                                                     CLOSED_OPEN),
                                columns=None)
        assert_array_equal(df['b'].values, np.array([7.]))
        df = tickstore_lib.read('SYM',
                                date_range=DateRange(20130104, 20130105,
                                                     OPEN_CLOSED),
                                columns=None)
        assert_array_equal(df['b'].values, np.array([9.]))
        df = tickstore_lib.read('SYM',
                                date_range=DateRange(20130104, 20130105,
                                                     OPEN_OPEN),
                                columns=None)
        assert_array_equal(df['b'].values, np.array([]))
예제 #26
0
def test_daterange_invalid_end():
    with pytest.raises(TypeError) as ex:
        DateRange(None, object())
    assert "unsupported type for end" in str(ex.value)
예제 #27
0
def test_read_chunk_boundaries(tickstore_lib):
    SYM1_DATA = [
        {
            'a': 1.,
            'b': 2.,
            'index': dt(2013, 6, 1, 12, 00, tzinfo=mktz('UTC'))
        },
        {
            'a': 3.,
            'b': 4.,
            'index': dt(2013, 6, 1, 13, 00, tzinfo=mktz('UTC'))
        },
        # Chunk boundary here
        {
            'a': 5.,
            'b': 6.,
            'index': dt(2013, 6, 1, 14, 00, tzinfo=mktz('UTC'))
        }
    ]
    SYM2_DATA = [
        {
            'a': 7.,
            'b': 8.,
            'index': dt(2013, 6, 1, 12, 30, tzinfo=mktz('UTC'))
        },
        {
            'a': 9.,
            'b': 10.,
            'index': dt(2013, 6, 1, 13, 30, tzinfo=mktz('UTC'))
        },
        # Chunk boundary here
        {
            'a': 11.,
            'b': 12.,
            'index': dt(2013, 6, 1, 14, 30, tzinfo=mktz('UTC'))
        }
    ]
    tickstore_lib._chunk_size = 2
    tickstore_lib.write('SYM1', SYM1_DATA)
    tickstore_lib.write('SYM2', SYM2_DATA)

    assert len(
        tickstore_lib.read('SYM1',
                           columns=None,
                           date_range=DateRange(
                               dt(2013, 6, 1, 12, 45, tzinfo=mktz('UTC')),
                               dt(2013, 6, 1, 15, 00,
                                  tzinfo=mktz('UTC'))))) == 2
    assert len(
        tickstore_lib.read('SYM2',
                           columns=None,
                           date_range=DateRange(
                               dt(2013, 6, 1, 12, 45, tzinfo=mktz('UTC')),
                               dt(2013, 6, 1, 15, 00,
                                  tzinfo=mktz('UTC'))))) == 2

    assert len(
        tickstore_lib.read(['SYM1', 'SYM2'],
                           columns=None,
                           date_range=DateRange(
                               dt(2013, 6, 1, 12, 45, tzinfo=mktz('UTC')),
                               dt(2013, 6, 1, 15, 00,
                                  tzinfo=mktz('UTC'))))) == 4
예제 #28
0
def test_daterange_index():
    start, end = dt(2000, 1, 1), dt(3000, 1, 1)
    dr = DateRange(start, end)
    assert dr[0] == start
    assert dr[1] == end
예제 #29
0
    def read_time_series_cache_from_disk(self, fname, engine='hdf5', start_date=None, finish_date=None,
                                         db_server=DataConstants().db_server,
                                         db_port=DataConstants().db_port, username=None, password=None):
        """Reads time series cache from disk in either HDF5 or bcolz

        Parameters
        ----------
        fname : str (or list)
            file to be read from
        engine : str (optional)
            'hd5' - reads HDF5 files (default)
            'arctic' - reads from Arctic/MongoDB database
            'bcolz' = reads from bcolz file (not fully implemented)
        start_date : str/datetime (optional)
            Start date
        finish_date : str/datetime (optional)
            Finish data
        db_server : str
            IP address of MongdDB (default '127.0.0.1')

        Returns
        -------
        DataFrame
        """

        logger = LoggerManager.getLogger(__name__)

        data_frame_list = []

        if not(isinstance(fname, list)):
            if '*' in fname:
                fname = glob.glob(fname)
            else:
                fname = [fname]

        for fname_single in fname:
            logger.debug("Reading " + fname_single + "..")

            if (engine == 'bcolz'):
                try:
                    name = self.get_bcolz_filename(fname_single)
                    zlens = bcolz.open(rootdir=name)
                    data_frame = zlens.todataframe()

                    data_frame.index = pandas.DatetimeIndex(data_frame['DTS_'])
                    data_frame.index.name = 'Date'
                    del data_frame['DTS_']

                    # convert invalid characters (which Bcolz can't deal with) to more readable characters for pandas
                    data_frame.columns = self.find_replace_chars(data_frame.columns, _replace_chars, _invalid_chars)
                    data_frame.columns = [x[2:] for x in data_frame.columns]
                except:
                    data_frame = None

            elif (engine == 'redis'):
                import redis

                fname_single = os.path.basename(fname_single).replace('.', '_')

                msg = None

                try:
                    r = redis.StrictRedis(host=db_server, port=db_port, db=0)
                    msg = r.get(fname_single)

                except:
                    self.logger.info("Cache not existent for " + fname_single + " in Redis")

                if msg is None:
                    data_frame = None
                else:

                    self.logger.info('Load Redis cache: ' + fname_single)

                    data_frame = pandas.read_msgpack(msg)

            elif (engine == 'arctic'):
                socketTimeoutMS = 2 * 1000

                import pymongo
                from arctic import Arctic

                fname_single = os.path.basename(fname_single).replace('.', '_')

                self.logger.info('Load Arctic/MongoDB library: ' + fname_single)

                if username is not None and password is not None:
                    c = pymongo.MongoClient(
                        host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port),
                        connect=False)  # , username=username, password=password)
                else:
                    c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False)

                store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS)

                # Access the library
                try:
                    library = store[fname_single]

                    if start_date is None and finish_date is None:
                        item = library.read(fname_single)
                    else:
                        from arctic.date import DateRange
                        item = library.read(fname_single, date_range=DateRange(start_date, finish_date))

                    c.close()

                    self.logger.info('Read ' + fname_single)

                    data_frame = item.data

                except Exception as e:
                    self.logger.warning('Library does not exist: ' + fname_single + ' & message is ' + str(e))
                    data_frame = None

            elif os.path.isfile(self.get_h5_filename(fname_single)):
                store = pandas.HDFStore(self.get_h5_filename(fname_single))
                data_frame = store.select("data")

                if ('intraday' in fname_single):
                    data_frame = data_frame.astype('float32')

                store.close()

            elif os.path.isfile(fname_single):
                data_frame = pandas.read_parquet(fname_single)

            data_frame_list.append(data_frame)

        if len(data_frame_list) == 1:
            return data_frame_list[0]

        return data_frame_list
예제 #30
0
def test_intersection_returns_inner_boundaries():
    # #start:
    assert DateRange('20110103', ).intersection(
        DateRange('20110102')).start == dt(2011, 1, 3)
    assert DateRange('20110102', ).intersection(
        DateRange('20110103')).start == dt(2011, 1, 3)
    assert DateRange(None, ).intersection(DateRange('20110103')).start == dt(
        2011, 1, 3)
    assert DateRange('20110103').intersection(DateRange(None)).start == dt(
        2011, 1, 3)

    # #end:
    assert DateRange(
        None,
        '20110103',
    ).intersection(DateRange(None, '20110102')).end == dt(2011, 1, 2)
    assert DateRange(
        None,
        '20110102',
    ).intersection(DateRange(None, '20110103')).end == dt(2011, 1, 2)
    assert DateRange(
        None,
        None,
    ).intersection(DateRange(None, '20110103')).end == dt(2011, 1, 3)
    assert DateRange(None, '20110103').intersection(DateRange(None,
                                                              None)).end == dt(
                                                                  2011, 1, 3)