def should_return_data_when_date_range_falls_in_a_single_underlying_library( toplevel_tickstore, arctic): arctic.initialize_library('FEED_2010.LEVEL1', tickstore.TICK_STORE_TYPE) tickstore = arctic['FEED_2010.LEVEL1'] arctic.initialize_library('test_current.toplevel_tickstore', tickstore.TICK_STORE_TYPE) tickstore_current = arctic['test_current.toplevel_tickstore'] toplevel_tickstore._collection.insert_one({ 'start': dt(2010, 1, 1), 'end': dt(2010, 12, 31, 23, 59, 59), 'library_name': 'FEED_2010.LEVEL1' }) dates = pd.date_range('20100101', periods=6, tz=mktz('Europe/London')) df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD')) tickstore.write('blah', df) tickstore_current.write('blah', df) res = toplevel_tickstore.read( 'blah', DateRange(start=dt(2010, 1, 1), end=dt(2010, 1, 6)), list('ABCD')) assert_frame_equal(df, res.tz_localize(mktz('Europe/London')))
def test_delete_daterange(tickstore_lib): DUMMY_DATA = [ { 'a': 1., 'b': 2., 'index': dt(2013, 1, 1, tzinfo=mktz('Europe/London')) }, { 'a': 3., 'b': 4., 'index': dt(2013, 2, 1, tzinfo=mktz('Europe/London')) }, ] tickstore_lib._chunk_size = 1 tickstore_lib.write('SYM', DUMMY_DATA) # Delete with a date-range deleted = tickstore_lib.delete( 'SYM', DateRange(dt(2013, 1, 1, tzinfo=mktz('Europe/London')), dt(2013, 2, 1, tzinfo=mktz('Europe/London')), CLOSED_OPEN)) assert deleted.deleted_count == 1 df = tickstore_lib.read('SYM', columns=None) assert np.allclose(df['b'].values, np.array([4.]))
def test_date_filter_no_index(): c = DateChunker() df = DataFrame(data={'data': [1, 2, 3], 'date': [dt(2016, 1, 1), dt(2016, 1, 2), dt(2016, 1, 3)] } ) # OPEN - CLOSED assert_frame_equal(c.filter(df, DateRange(None, dt(2016, 1, 3))), df) # CLOSED - OPEN assert_frame_equal(c.filter(df, DateRange(dt(2016, 1, 1), None)), df) # OPEN - OPEN assert_frame_equal(c.filter(df, DateRange(None, None)), df) # CLOSED - OPEN (far before data range) assert_frame_equal(c.filter(df, DateRange(dt(2000, 1, 1), None)), df) # CLOSED - OPEN (far after range) assert(c.filter(df, DateRange(dt(2020, 1, 2), None)).empty) # OPEN - CLOSED assert_frame_equal(c.filter(df, DateRange(None, dt(2020, 1, 1))), df) # CLOSED - CLOSED (after range) assert(c.filter(df, DateRange(dt(2017, 1, 1), dt(2018, 1, 1))).empty)
def test_date_filter(): c = DateChunker() df = DataFrame(data={'data': [1, 2, 3]}, index=MultiIndex.from_tuples([(dt(2016, 1, 1), 1), (dt(2016, 1, 2), 1), (dt(2016, 1, 3), 1)], names=['date', 'id']) ) # OPEN - CLOSED assert_frame_equal(c.filter(df, DateRange(None, dt(2016, 1, 3))), df) # CLOSED - OPEN assert_frame_equal(c.filter(df, DateRange(dt(2016, 1, 1), None)), df) # OPEN - OPEN assert_frame_equal(c.filter(df, DateRange(None, None)), df) # CLOSED - OPEN (far before data range) assert_frame_equal(c.filter(df, DateRange(dt(2000, 1, 1), None)), df) # CLOSED - OPEN (far after range) assert(c.filter(df, DateRange(dt(2020, 1, 2), None)).empty) # OPEN - CLOSED assert_frame_equal(c.filter(df, DateRange(None, dt(2020, 1, 1))), df) # CLOSED - CLOSED (after range) assert(c.filter(df, DateRange(dt(2017, 1, 1), dt(2018, 1, 1))).empty)
def test_read_with_image(tickstore_lib): DUMMY_DATA = [ { 'a': 1., 'index': dt(2013, 1, 1, 11, 00, tzinfo=mktz('Europe/London')) }, { 'b': 4., 'index': dt(2013, 1, 1, 12, 00, tzinfo=mktz('Europe/London')) }, ] # Add an image tickstore_lib.write('SYM', DUMMY_DATA) tickstore_lib._collection.update_one({}, { '$set': { 'im': { 'i': { 'a': 37., 'c': 2., }, 't': dt(2013, 1, 1, 10, tzinfo=mktz('Europe/London')) } } }) dr = DateRange(dt(2013, 1, 1), dt(2013, 1, 2)) # tickstore_lib.read('SYM', columns=None) df = tickstore_lib.read('SYM', columns=None, date_range=dr) assert df['a'][0] == 1 # Read with the image as well - all columns df = tickstore_lib.read('SYM', columns=None, date_range=dr, include_images=True) assert set(df.columns) == set(('a', 'b', 'c')) assert_array_equal(df['a'].values, np.array([37, 1, np.nan])) assert_array_equal(df['b'].values, np.array([np.nan, np.nan, 4])) assert_array_equal(df['c'].values, np.array([2, np.nan, np.nan])) assert df.index[0] == dt(2013, 1, 1, 10, tzinfo=mktz('Europe/London')) assert df.index[1] == dt(2013, 1, 1, 11, tzinfo=mktz('Europe/London')) assert df.index[2] == dt(2013, 1, 1, 12, tzinfo=mktz('Europe/London')) # Read just columns from the updates df = tickstore_lib.read('SYM', columns=('a', 'b'), date_range=dr, include_images=True) assert set(df.columns) == set(('a', 'b')) assert_array_equal(df['a'].values, np.array([37, 1, np.nan])) assert_array_equal(df['b'].values, np.array([np.nan, np.nan, 4])) assert df.index[0] == dt(2013, 1, 1, 10, tzinfo=mktz('Europe/London')) assert df.index[1] == dt(2013, 1, 1, 11, tzinfo=mktz('Europe/London')) assert df.index[2] == dt(2013, 1, 1, 12, tzinfo=mktz('Europe/London')) # Read one column from the updates df = tickstore_lib.read('SYM', columns=('a', ), date_range=dr, include_images=True) assert set(df.columns) == set(('a', )) assert_array_equal(df['a'].values, np.array([37, 1])) assert df.index[0] == dt(2013, 1, 1, 10, tzinfo=mktz('Europe/London')) assert df.index[1] == dt(2013, 1, 1, 11, tzinfo=mktz('Europe/London')) # Read just the image column df = tickstore_lib.read('SYM', columns=['c'], date_range=dr, include_images=True) assert set(df.columns) == set(['c']) assert_array_equal(df['c'].values, np.array([2])) assert df.index[0] == dt(2013, 1, 1, 10, tzinfo=mktz('Europe/London'))
def test_read_no_data(tickstore_lib): with pytest.raises(NoDataFoundException): tickstore_lib.read('missing_sym', DateRange(20131212, 20131212))
def test_date_interval(chunkstore_lib): date_range = pd.date_range(start=dt(2017, 5, 1), periods=8, freq='D') df = DataFrame(data={'data': range(8)}, index=DatetimeIndex(date_range, name='date')) # test with index chunkstore_lib.write('test', df, chunk_size='D') ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), CLOSED_OPEN)) assert_frame_equal_(ret, df[1:4], check_freq=False) ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), OPEN_OPEN)) assert_frame_equal_(ret, df[2:4], check_freq=False) ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), OPEN_CLOSED)) assert_frame_equal_(ret, df[2:5], check_freq=False) ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), CLOSED_CLOSED)) assert_frame_equal_(ret, df[1:5], check_freq=False) ret = chunkstore_lib.read('test', chunk_range=DateRange(dt(2017, 5, 2), None, CLOSED_OPEN)) assert_frame_equal_(ret, df[1:8], check_freq=False) # test without index df = DataFrame(data={'data': range(8), 'date': date_range}) chunkstore_lib.write('test2', df, chunk_size='D') ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), CLOSED_OPEN)) assert (len(ret) == 3) ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), OPEN_OPEN)) assert (len(ret) == 2) ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), OPEN_CLOSED)) assert (len(ret) == 3) ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), dt(2017, 5, 5), CLOSED_CLOSED)) assert (len(ret) == 4) ret = chunkstore_lib.read('test2', chunk_range=DateRange(dt(2017, 5, 2), None, CLOSED_OPEN)) assert (len(ret) == 7)
def test_get_libraries_no_data_raises_exception(toplevel_tickstore, arctic): date_range = DateRange(start=dt(2009, 1, 1), end=dt(2010, 12, 31, 23, 59, 59, 999000)) with pytest.raises(NoDataFoundException): toplevel_tickstore._get_libraries(date_range)
def test_should_add_underlying_library_where_another_library_exists_in_a_non_overlapping_daterange(toplevel_tickstore, arctic): toplevel_tickstore._collection.insert_one({'library_name': 'FEED_2011.LEVEL1', 'start': dt(2011, 1, 1), 'end': dt(2011, 12, 31)}) arctic.initialize_library('FEED_2010.LEVEL1', tickstore.TICK_STORE_TYPE) toplevel_tickstore.add(DateRange(start=dt(2010, 1, 1), end=dt(2010, 12, 31, 23, 59, 59, 999000)), 'FEED_2010.LEVEL1') assert set([res['library_name'] for res in toplevel_tickstore._collection.find()]) == set(['FEED_2010.LEVEL1', 'FEED_2011.LEVEL1'])
Copyright (C) 2020 Man Group For a list of authors, see README.md; for the license, see file LICENSE in project root directory. """ from datetime import datetime as dt, timedelta as dtd import numpy as np import pandas as pd import pytest from pandas.util.testing import assert_frame_equal from arctic.date import DateRange, mktz from arctic.exceptions import NoDataFoundException, LibraryNotFoundException, OverlappingDataException from arctic.tickstore import tickstore from arctic.tickstore import toplevel FEED_2010_LEVEL1 = toplevel.TickStoreLibrary('FEED_2010.LEVEL1', DateRange(dt(2010, 1, 1), dt(2010, 12, 31, 23, 59, 59))) FEED_2011_LEVEL1 = toplevel.TickStoreLibrary('FEED_2011.LEVEL1', DateRange(dt(2011, 1, 1), dt(2011, 12, 31, 23, 59, 59))) FEED_2012_LEVEL1 = toplevel.TickStoreLibrary('FEED_2012.LEVEL1', DateRange(dt(2012, 1, 1), dt(2012, 12, 31, 23, 59, 59))) @pytest.mark.parametrize(('start', 'end', 'expected'), [(dt(2010, 2, 1), dt(2010, 4, 1), [FEED_2010_LEVEL1]), (dt(2011, 2, 1), dt(2011, 4, 1), [FEED_2011_LEVEL1]), (dt(2010, 2, 1), dt(2011, 4, 1), [FEED_2010_LEVEL1, FEED_2011_LEVEL1]), (dt(2011, 2, 1), dt(2012, 4, 1), [FEED_2011_LEVEL1, FEED_2012_LEVEL1]), (dt(2010, 2, 1), dt(2012, 4, 1), [FEED_2010_LEVEL1, FEED_2011_LEVEL1, FEED_2012_LEVEL1]), (dt(2009, 2, 1), dt(2010, 12, 31), [FEED_2010_LEVEL1]), (dt(2012, 2, 1), dt(2013, 12, 31), [FEED_2012_LEVEL1]), (dt(2009, 2, 1), dt(2009, 12, 31), []), (dt(2013, 2, 1), dt(2013, 12, 31), []), ]) def test_should_return_libraries_for_the_given_daterange(toplevel_tickstore, start, end, expected):
def test_raise_exception_if_date_range_does_not_contain_start_and_end_date(): store = TopLevelTickStore(Mock()) dr = DateRange(start=None, end=None) with pytest.raises(Exception) as e: store._get_library_metadata(dr) assert "The date range {0} must contain a start and end date".format(dr) in str(e.value)
def test_raise_error_add_library_is_called_with_a_date_range_not_on_day_boundaries(start, end): with pytest.raises(AssertionError) as e: self = create_autospec(TopLevelTickStore, _arctic_lib=MagicMock(), _collection=MagicMock()) self._get_library_metadata.return_value = [] TopLevelTickStore.add(self, DateRange(start=start, end=end), "blah") assert "Date range should fall on UTC day boundaries" in str(e.value)
def test_daterange_eq(): dr = DateRange(dt(2013, 1, 1)) assert ((dr == None) == False) assert (dr == dr)
def test_daterange_raises(): with pytest.raises(ValueError): assert (DateRange(dt(2013, 1, 1), dt(2000, 1, 1)))
from datetime import datetime as dt, timedelta as dtd import numpy as np import pandas as pd import pytest from pandas.util.testing import assert_frame_equal from arctic.date import DateRange, mktz from arctic.exceptions import NoDataFoundException, LibraryNotFoundException, OverlappingDataException from arctic.tickstore import tickstore from arctic.tickstore import toplevel FEED_2010_LEVEL1 = toplevel.TickStoreLibrary( 'FEED_2010.LEVEL1', DateRange(dt(2010, 1, 1), dt(2010, 12, 31, 23, 59, 59))) FEED_2011_LEVEL1 = toplevel.TickStoreLibrary( 'FEED_2011.LEVEL1', DateRange(dt(2011, 1, 1), dt(2011, 12, 31, 23, 59, 59))) FEED_2012_LEVEL1 = toplevel.TickStoreLibrary( 'FEED_2012.LEVEL1', DateRange(dt(2012, 1, 1), dt(2012, 12, 31, 23, 59, 59))) @pytest.mark.parametrize(('start', 'end', 'expected'), [ (dt(2010, 2, 1), dt(2010, 4, 1), [FEED_2010_LEVEL1]), (dt(2011, 2, 1), dt(2011, 4, 1), [FEED_2011_LEVEL1]), (dt(2010, 2, 1), dt(2011, 4, 1), [FEED_2010_LEVEL1, FEED_2011_LEVEL1]), (dt(2011, 2, 1), dt(2012, 4, 1), [FEED_2011_LEVEL1, FEED_2012_LEVEL1]), (dt(2010, 2, 1), dt( 2012, 4, 1), [FEED_2010_LEVEL1, FEED_2011_LEVEL1, FEED_2012_LEVEL1]), (dt(2009, 2, 1), dt(2010, 12, 31), [FEED_2010_LEVEL1]),
def test_intersection_preserves_boundaries(): # Non-matching boundaries assert DateRange('20110101', '20110102', OPEN_OPEN) == DateRange( '20110101', '20110103', OPEN_CLOSED).intersection(DateRange('20110101', '20110102', OPEN_OPEN)) assert DateRange('20110101', '20110102', OPEN_OPEN) == DateRange( '20110101', '20110102', OPEN_OPEN).intersection(DateRange('20110101', '20110103', OPEN_CLOSED)) assert DateRange('20110102', '20110103', OPEN_OPEN) == DateRange( '20110102', '20110103', OPEN_OPEN).intersection(DateRange('20110101', '20110103', CLOSED_OPEN)) assert DateRange('20110102', '20110103', CLOSED_OPEN) == DateRange( '20110102', '20110103', CLOSED_OPEN).intersection( DateRange('20110101', '20110103', CLOSED_OPEN)) assert DateRange('20110102', '20110103', CLOSED_OPEN) == DateRange( '20110101', '20110103', CLOSED_OPEN).intersection( DateRange('20110102', '20110103', CLOSED_OPEN)) # Matching boundaries assert DateRange('20110101', '20110102', OPEN_OPEN) == DateRange( '20110101', '20110102', CLOSED_OPEN).intersection(DateRange('20110101', '20110102', OPEN_OPEN)) assert DateRange('20110101', '20110102', OPEN_OPEN) == DateRange( '20110101', '20110102', OPEN_OPEN).intersection(DateRange('20110101', '20110102', OPEN_CLOSED))
def test_daterange_arg_parsing(date_range): d1 = DateRange(date_range[0], date_range[1]) assert d1.start == dt(2011, 1, 2) assert d1.end == dt(2011, 12, 31) assert d1.unbounded is False
def test_raise_exception_if_date_range_overlaps(): self = create_autospec(TopLevelTickStore, _arctic_lib=MagicMock()) self._get_library_metadata.return_value = [TickStoreLibrary('lib1', None), ] with pytest.raises(OverlappingDataException) as e: TopLevelTickStore.add(self, DateRange(start=dt(2010, 1, 1), end=dt(2011, 1, 1, 23, 59, 59, 999000)), "blah") assert "There are libraries that overlap with the date range:" in str(e.value)
def test_should_add_underlying_library_where_none_exists(toplevel_tickstore, arctic): arctic.initialize_library('FEED_2010.LEVEL1', tickstore.TICK_STORE_TYPE) toplevel_tickstore.add(DateRange(start=dt(2010, 1, 1), end=dt(2010, 12, 31, 23, 59, 59, 999000)), 'FEED_2010.LEVEL1') assert toplevel_tickstore._collection.find_one({'library_name': 'FEED_2010.LEVEL1'})
def forEachTick(callback, mainKey, dataKeys, start=None, end=None, t=1): #get the time interval where we have all needed data start, end = db.getMasterInterval(chunkStore, db.dbKeys.values(), start, end) print("Starting generating properties from", start, "to", end) lastEnd = None mainData = chunkStore.read(mainKey, chunk_range=DateRange(start, end)) if debug: print("Loaded mainData:", mainData) iterators = {} for key in db.dbKeys: #for each key (not value) that we store in the dbKeys if dataKeys and key not in dataKeys: continue iterators[key] = chunkStore.iterator(db.dbKeys[key], chunk_range=DateRange(start, end)) print("Working with requested data", key) data = {} #[next(iterators[i]) for i in range(len(iterators))] for key in iterators: # load the first chunks for all data data[key] = decodeRawData(next(iterators[key])) startTime = time.time() for mainRow in mainData.iterrows(): rowData = mainRow[1] if rowData.date < start or rowData.date > end: continue #we don't want to be doing anything outside of our main interval if lastEnd is None: lastEnd = rowData.date #if this is the first row we read else: #our interval is > lastEnd and <= rowData.date currentStart = lastEnd currentEnd = rowData.date lastEnd = currentEnd #print("Loading data for dates", currentStart, currentEnd) #load the needed data tickData = {} for key in data: tickData[key] = subsetByDate(data[key], currentStart, currentEnd) while not containsFullInterval(data[key], tickData[key], currentEnd): print("Loading new chunk for key", key, tickData[key].head(2), tickData[key].tail(2), data[key].head(2), data[key].tail(2), currentStart, currentEnd) print("Processing of the chunk took " + str(time.time() - startTime) + "s.") startTime = time.time() data[key] = decodeRawData( next(iterators[key] )) #load another data chunk and append it newPart = subsetByDate(data[key], currentStart, currentEnd) tickData[key] = pd.concat([tickData[key], newPart]) if debug: print(tickData[key].head(2)) print(tickData[key].tail(2)) callback(tickData, currentEnd)
def test_should_raise_exception_if_library_does_not_exist(toplevel_tickstore): with pytest.raises(LibraryNotFoundException) as e: toplevel_tickstore.add(DateRange(start=dt(2010, 1, 1), end=dt(2010, 12, 31, 23, 59, 59, 999000)), 'FEED_2010.LEVEL1') assert toplevel_tickstore._collection.find_one({'library_name': 'FEED_2010.LEVEL1'}) assert "Library FEED_2010.LEVEL1 was not correctly initialized" in str(e.value)
from datetime import datetime as dt import operator import pytest import itertools import six from arctic.date import DateRange, string_to_daterange, CLOSED_CLOSED, CLOSED_OPEN, OPEN_CLOSED, OPEN_OPEN test_ranges_for_bounding = { "unbounded": (DateRange(), None, None, True, None, None), "unbounded_right": (DateRange('20110101'), dt(2011, 1, 1), None, True, True, None), "unbounded_left": (DateRange(None, '20111231'), None, dt(2011, 12, 31), True, None, True), "closed_by_default": (DateRange('20110101', '20111231'), dt(2011, 1, 1), dt(2011, 12, 31), False, True, True), "closed_explicitly": (DateRange('20110101', '20111231', CLOSED_CLOSED), dt(2011, 1, 1), dt(2011, 12, 31), False, True, True), "closed_open": (DateRange('20110101', '20111231', CLOSED_OPEN), dt(2011, 1, 1), dt(2011, 12, 31), False, True, False), "open_closed": (DateRange('20110101', '20111231', OPEN_CLOSED), dt(2011, 1, 1), dt(2011, 12, 31), False, False, True), "open_open": (DateRange('20110101', '20111231', OPEN_OPEN), dt(2011, 1, 1), dt(2011, 12, 31), False, False, False), } test_ranges_for_bounding = sorted(six.iteritems(test_ranges_for_bounding), key=operator.itemgetter(1)) def eq_nan(*args): if all(arg is None for arg in args):
def test_get_libraries_no_data_raises_exception_tzinfo_given(toplevel_tickstore, arctic): tzinfo = mktz('Asia/Chongqing') date_range = DateRange(start=dt(2009, 1, 1, tzinfo=tzinfo), end=dt(2010, 12, 31, 23, 59, 59, 999000, tzinfo=tzinfo)) with pytest.raises(NoDataFoundException): toplevel_tickstore._get_libraries(date_range)
def test_daterange_invalid_start(): with pytest.raises(TypeError) as ex: DateRange(1.1, None) assert "unsupported type for start" in str(ex.value)
def test_date_range(tickstore_lib): tickstore_lib.write('SYM', DUMMY_DATA) df = tickstore_lib.read('SYM', date_range=DateRange(20130101, 20130103), columns=None) assert_array_equal(df['a'].values, np.array([1, np.nan, np.nan])) assert_array_equal(df['b'].values, np.array([2., 3., 5.])) assert_array_equal(df['c'].values, np.array([np.nan, 4., 6.])) tickstore_lib.delete('SYM') # Chunk every 3 symbols and lets have some fun tickstore_lib._chunk_size = 3 tickstore_lib.write('SYM', DUMMY_DATA) with patch('pymongo.collection.Collection.find', side_effect=tickstore_lib._collection.find) as f: df = tickstore_lib.read('SYM', date_range=DateRange(20130101, 20130103), columns=None) assert_array_equal(df['b'].values, np.array([2., 3., 5.])) assert tickstore_lib._collection.find( f.call_args_list[-1][0][0]).count() == 1 df = tickstore_lib.read('SYM', date_range=DateRange(20130102, 20130103), columns=None) assert_array_equal(df['b'].values, np.array([3., 5.])) assert tickstore_lib._collection.find( f.call_args_list[-1][0][0]).count() == 1 df = tickstore_lib.read('SYM', date_range=DateRange(20130103, 20130103), columns=None) assert_array_equal(df['b'].values, np.array([5.])) assert tickstore_lib._collection.find( f.call_args_list[-1][0][0]).count() == 1 df = tickstore_lib.read('SYM', date_range=DateRange(20130102, 20130104), columns=None) assert_array_equal(df['b'].values, np.array([3., 5., 7.])) assert tickstore_lib._collection.find( f.call_args_list[-1][0][0]).count() == 2 df = tickstore_lib.read('SYM', date_range=DateRange(20130102, 20130105), columns=None) assert_array_equal(df['b'].values, np.array([3., 5., 7., 9.])) assert tickstore_lib._collection.find( f.call_args_list[-1][0][0]).count() == 2 df = tickstore_lib.read('SYM', date_range=DateRange(20130103, 20130104), columns=None) assert_array_equal(df['b'].values, np.array([5., 7.])) assert tickstore_lib._collection.find( f.call_args_list[-1][0][0]).count() == 2 df = tickstore_lib.read('SYM', date_range=DateRange(20130103, 20130105), columns=None) assert_array_equal(df['b'].values, np.array([5., 7., 9.])) assert tickstore_lib._collection.find( f.call_args_list[-1][0][0]).count() == 2 df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105), columns=None) assert_array_equal(df['b'].values, np.array([7., 9.])) assert tickstore_lib._collection.find( f.call_args_list[-1][0][0]).count() == 1 # Test the different open-closed behaviours df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, CLOSED_CLOSED), columns=None) assert_array_equal(df['b'].values, np.array([7., 9.])) df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, CLOSED_OPEN), columns=None) assert_array_equal(df['b'].values, np.array([7.])) df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, OPEN_CLOSED), columns=None) assert_array_equal(df['b'].values, np.array([9.])) df = tickstore_lib.read('SYM', date_range=DateRange(20130104, 20130105, OPEN_OPEN), columns=None) assert_array_equal(df['b'].values, np.array([]))
def test_daterange_invalid_end(): with pytest.raises(TypeError) as ex: DateRange(None, object()) assert "unsupported type for end" in str(ex.value)
def test_read_chunk_boundaries(tickstore_lib): SYM1_DATA = [ { 'a': 1., 'b': 2., 'index': dt(2013, 6, 1, 12, 00, tzinfo=mktz('UTC')) }, { 'a': 3., 'b': 4., 'index': dt(2013, 6, 1, 13, 00, tzinfo=mktz('UTC')) }, # Chunk boundary here { 'a': 5., 'b': 6., 'index': dt(2013, 6, 1, 14, 00, tzinfo=mktz('UTC')) } ] SYM2_DATA = [ { 'a': 7., 'b': 8., 'index': dt(2013, 6, 1, 12, 30, tzinfo=mktz('UTC')) }, { 'a': 9., 'b': 10., 'index': dt(2013, 6, 1, 13, 30, tzinfo=mktz('UTC')) }, # Chunk boundary here { 'a': 11., 'b': 12., 'index': dt(2013, 6, 1, 14, 30, tzinfo=mktz('UTC')) } ] tickstore_lib._chunk_size = 2 tickstore_lib.write('SYM1', SYM1_DATA) tickstore_lib.write('SYM2', SYM2_DATA) assert len( tickstore_lib.read('SYM1', columns=None, date_range=DateRange( dt(2013, 6, 1, 12, 45, tzinfo=mktz('UTC')), dt(2013, 6, 1, 15, 00, tzinfo=mktz('UTC'))))) == 2 assert len( tickstore_lib.read('SYM2', columns=None, date_range=DateRange( dt(2013, 6, 1, 12, 45, tzinfo=mktz('UTC')), dt(2013, 6, 1, 15, 00, tzinfo=mktz('UTC'))))) == 2 assert len( tickstore_lib.read(['SYM1', 'SYM2'], columns=None, date_range=DateRange( dt(2013, 6, 1, 12, 45, tzinfo=mktz('UTC')), dt(2013, 6, 1, 15, 00, tzinfo=mktz('UTC'))))) == 4
def test_daterange_index(): start, end = dt(2000, 1, 1), dt(3000, 1, 1) dr = DateRange(start, end) assert dr[0] == start assert dr[1] == end
def read_time_series_cache_from_disk(self, fname, engine='hdf5', start_date=None, finish_date=None, db_server=DataConstants().db_server, db_port=DataConstants().db_port, username=None, password=None): """Reads time series cache from disk in either HDF5 or bcolz Parameters ---------- fname : str (or list) file to be read from engine : str (optional) 'hd5' - reads HDF5 files (default) 'arctic' - reads from Arctic/MongoDB database 'bcolz' = reads from bcolz file (not fully implemented) start_date : str/datetime (optional) Start date finish_date : str/datetime (optional) Finish data db_server : str IP address of MongdDB (default '127.0.0.1') Returns ------- DataFrame """ logger = LoggerManager.getLogger(__name__) data_frame_list = [] if not(isinstance(fname, list)): if '*' in fname: fname = glob.glob(fname) else: fname = [fname] for fname_single in fname: logger.debug("Reading " + fname_single + "..") if (engine == 'bcolz'): try: name = self.get_bcolz_filename(fname_single) zlens = bcolz.open(rootdir=name) data_frame = zlens.todataframe() data_frame.index = pandas.DatetimeIndex(data_frame['DTS_']) data_frame.index.name = 'Date' del data_frame['DTS_'] # convert invalid characters (which Bcolz can't deal with) to more readable characters for pandas data_frame.columns = self.find_replace_chars(data_frame.columns, _replace_chars, _invalid_chars) data_frame.columns = [x[2:] for x in data_frame.columns] except: data_frame = None elif (engine == 'redis'): import redis fname_single = os.path.basename(fname_single).replace('.', '_') msg = None try: r = redis.StrictRedis(host=db_server, port=db_port, db=0) msg = r.get(fname_single) except: self.logger.info("Cache not existent for " + fname_single + " in Redis") if msg is None: data_frame = None else: self.logger.info('Load Redis cache: ' + fname_single) data_frame = pandas.read_msgpack(msg) elif (engine == 'arctic'): socketTimeoutMS = 2 * 1000 import pymongo from arctic import Arctic fname_single = os.path.basename(fname_single).replace('.', '_') self.logger.info('Load Arctic/MongoDB library: ' + fname_single) if username is not None and password is not None: c = pymongo.MongoClient( host="mongodb://" + username + ":" + password + "@" + str(db_server) + ":" + str(db_port), connect=False) # , username=username, password=password) else: c = pymongo.MongoClient(host="mongodb://" + str(db_server) + ":" + str(db_port), connect=False) store = Arctic(c, socketTimeoutMS=socketTimeoutMS, serverSelectionTimeoutMS=socketTimeoutMS) # Access the library try: library = store[fname_single] if start_date is None and finish_date is None: item = library.read(fname_single) else: from arctic.date import DateRange item = library.read(fname_single, date_range=DateRange(start_date, finish_date)) c.close() self.logger.info('Read ' + fname_single) data_frame = item.data except Exception as e: self.logger.warning('Library does not exist: ' + fname_single + ' & message is ' + str(e)) data_frame = None elif os.path.isfile(self.get_h5_filename(fname_single)): store = pandas.HDFStore(self.get_h5_filename(fname_single)) data_frame = store.select("data") if ('intraday' in fname_single): data_frame = data_frame.astype('float32') store.close() elif os.path.isfile(fname_single): data_frame = pandas.read_parquet(fname_single) data_frame_list.append(data_frame) if len(data_frame_list) == 1: return data_frame_list[0] return data_frame_list
def test_intersection_returns_inner_boundaries(): # #start: assert DateRange('20110103', ).intersection( DateRange('20110102')).start == dt(2011, 1, 3) assert DateRange('20110102', ).intersection( DateRange('20110103')).start == dt(2011, 1, 3) assert DateRange(None, ).intersection(DateRange('20110103')).start == dt( 2011, 1, 3) assert DateRange('20110103').intersection(DateRange(None)).start == dt( 2011, 1, 3) # #end: assert DateRange( None, '20110103', ).intersection(DateRange(None, '20110102')).end == dt(2011, 1, 2) assert DateRange( None, '20110102', ).intersection(DateRange(None, '20110103')).end == dt(2011, 1, 2) assert DateRange( None, None, ).intersection(DateRange(None, '20110103')).end == dt(2011, 1, 3) assert DateRange(None, '20110103').intersection(DateRange(None, None)).end == dt( 2011, 1, 3)