def test_lookup_symbol(self): # Incrementing by two so that start and end dates for each # generated Asset don't overlap (each Asset's end_date is the # day after its start date.) dates = pd.date_range('2013-01-01', freq='2D', periods=5, tz='UTC') df = pd.DataFrame.from_records( [ { 'sid': i, 'symbol': 'existing', 'start_date': date.value, 'end_date': (date + timedelta(days=1)).value, 'exchange': 'NYSE', } for i, date in enumerate(dates) ] ) self.write_assets(equities=df) finder = self.asset_finder for _ in range(2): # Run checks twice to test for caching bugs. with self.assertRaises(SymbolNotFound): finder.lookup_symbol('NON_EXISTING', dates[0]) with self.assertRaises(MultipleSymbolsFound): finder.lookup_symbol('EXISTING', None) for i, date in enumerate(dates): # Verify that we correctly resolve multiple symbols using # the supplied date result = finder.lookup_symbol('EXISTING', date) self.assertEqual(result.symbol, 'EXISTING') self.assertEqual(result.sid, i)
def test_group_by_type(self): equities = make_simple_equity_info( range(5), start_date=pd.Timestamp('2014-01-01'), end_date=pd.Timestamp('2015-01-01'), ) futures = make_commodity_future_info( first_sid=6, root_symbols=['CL'], years=[2014], ) # Intersecting sid queries, to exercise loading of partially-cached # results. queries = [ ([0, 1, 3], [6, 7]), ([0, 2, 3], [7, 10]), (list(equities.index), list(futures.index)), ] self.write_assets( equities=equities, futures=futures, ) finder = self.asset_finder for equity_sids, future_sids in queries: results = finder.group_by_type(equity_sids + future_sids) self.assertEqual( results, {'equity': set(equity_sids), 'future': set(future_sids)}, )
def test_retrieve_specific_type(self, type_, lookup_name, failure_type): equities = make_simple_equity_info( range(5), start_date=pd.Timestamp('2014-01-01'), end_date=pd.Timestamp('2015-01-01'), ) max_equity = equities.index.max() futures = make_commodity_future_info( first_sid=max_equity + 1, root_symbols=['CL'], years=[2014], ) equity_sids = [0, 1] future_sids = [max_equity + 1, max_equity + 2, max_equity + 3] if type_ == Equity: success_sids = equity_sids fail_sids = future_sids else: fail_sids = equity_sids success_sids = future_sids self.write_assets( equities=equities, futures=futures, ) finder = self.asset_finder # Run twice to exercise caching. lookup = getattr(finder, lookup_name) for _ in range(2): results = lookup(success_sids) self.assertIsInstance(results, dict) self.assertEqual(set(results.keys()), set(success_sids)) self.assertEqual( valmap(int, results), dict(zip(success_sids, success_sids)), ) self.assertEqual( {type_}, {type(asset) for asset in itervalues(results)}, ) with self.assertRaises(failure_type): lookup(fail_sids) with self.assertRaises(failure_type): # Should fail if **any** of the assets are bad. lookup([success_sids[0], fail_sids[0]])
def test_retrieve_all(self): equities = make_simple_equity_info( range(5), start_date=pd.Timestamp('2014-01-01'), end_date=pd.Timestamp('2015-01-01'), ) max_equity = equities.index.max() futures = make_commodity_future_info( first_sid=max_equity + 1, root_symbols=['CL'], years=[2014], ) self.write_assets( equities=equities, futures=futures, ) finder = self.asset_finder all_sids = finder.sids self.assertEqual(len(all_sids), len(equities) + len(futures)) queries = [ # Empty Query. (), # Only Equities. tuple(equities.index[:2]), # Only Futures. tuple(futures.index[:3]), # Mixed, all cache misses. tuple(equities.index[2:]) + tuple(futures.index[3:]), # Mixed, all cache hits. tuple(equities.index[2:]) + tuple(futures.index[3:]), # Everything. all_sids, all_sids, ] for sids in queries: equity_sids = [i for i in sids if i <= max_equity] future_sids = [i for i in sids if i > max_equity] results = finder.retrieve_all(sids) self.assertEqual(sids, tuple(map(int, results))) self.assertEqual( [Equity for _ in equity_sids] + [Future for _ in future_sids], list(map(type, results)), ) self.assertEqual( ( list(equities.symbol.loc[equity_sids]) + list(futures.symbol.loc[future_sids]) ), list(asset.symbol for asset in results), )
def test_lookup_symbol_delimited(self): as_of = pd.Timestamp('2013-01-01', tz='UTC') frame = pd.DataFrame.from_records( [ { 'sid': i, 'symbol': 'TEST.%d' % i, 'company_name': "company%d" % i, 'start_date': as_of.value, 'end_date': as_of.value, 'exchange': uuid.uuid4().hex } for i in range(3) ] ) self.write_assets(equities=frame) finder = self.asset_finder asset_0, asset_1, asset_2 = ( finder.retrieve_asset(i) for i in range(3) ) # we do it twice to catch caching bugs for i in range(2): with self.assertRaises(SymbolNotFound): finder.lookup_symbol('TEST', as_of) with self.assertRaises(SymbolNotFound): finder.lookup_symbol('TEST1', as_of) # '@' is not a supported delimiter with self.assertRaises(SymbolNotFound): finder.lookup_symbol('TEST@1', as_of) # Adding an unnecessary fuzzy shouldn't matter. for fuzzy_char in ['-', '/', '_', '.']: self.assertEqual( asset_1, finder.lookup_symbol('TEST%s1' % fuzzy_char, as_of) )
def test_blocked_lookup_symbol_query(self): # we will try to query for more variables than sqlite supports # to make sure we are properly chunking on the client side as_of = pd.Timestamp('2013-01-01', tz='UTC') # we need more sids than we can query from sqlite nsids = SQLITE_MAX_VARIABLE_NUMBER + 10 sids = range(nsids) frame = pd.DataFrame.from_records( [ { 'sid': sid, 'symbol': 'TEST.%d' % sid, 'start_date': as_of.value, 'end_date': as_of.value, 'exchange': uuid.uuid4().hex } for sid in sids ] ) self.write_assets(equities=frame) assets = self.asset_finder.retrieve_equities(sids) assert_equal(viewkeys(assets), set(sids))
def build_lookup_generic_cases(asset_finder_type): """ Generate test cases for the type of asset finder specific by asset_finder_type for test_lookup_generic. """ unique_start = pd.Timestamp('2013-01-01', tz='UTC') unique_end = pd.Timestamp('2014-01-01', tz='UTC') dupe_0_start = pd.Timestamp('2013-01-01', tz='UTC') dupe_0_end = dupe_0_start + timedelta(days=1) dupe_1_start = pd.Timestamp('2013-01-03', tz='UTC') dupe_1_end = dupe_1_start + timedelta(days=1) frame = pd.DataFrame.from_records( [ { 'sid': 0, 'symbol': 'duplicated', 'start_date': dupe_0_start.value, 'end_date': dupe_0_end.value, 'exchange': 'TEST', }, { 'sid': 1, 'symbol': 'duplicated', 'start_date': dupe_1_start.value, 'end_date': dupe_1_end.value, 'exchange': 'TEST', }, { 'sid': 2, 'symbol': 'unique', 'start_date': unique_start.value, 'end_date': unique_end.value, 'exchange': 'TEST', }, ], index='sid') with tmp_assets_db(equities=frame) as assets_db: finder = asset_finder_type(assets_db) dupe_0, dupe_1, unique = assets = [ finder.retrieve_asset(i) for i in range(3) ] dupe_0_start = dupe_0.start_date dupe_1_start = dupe_1.start_date yield ( ## # Scalars # Asset object (finder, assets[0], None, assets[0]), (finder, assets[1], None, assets[1]), (finder, assets[2], None, assets[2]), # int (finder, 0, None, assets[0]), (finder, 1, None, assets[1]), (finder, 2, None, assets[2]), # Duplicated symbol with resolution date (finder, 'DUPLICATED', dupe_0_start, dupe_0), (finder, 'DUPLICATED', dupe_1_start, dupe_1), # Unique symbol, with or without resolution date. (finder, 'UNIQUE', unique_start, unique), (finder, 'UNIQUE', None, unique), ## # Iterables # Iterables of Asset objects. (finder, assets, None, assets), (finder, iter(assets), None, assets), # Iterables of ints (finder, (0, 1), None, assets[:-1]), (finder, iter((0, 1)), None, assets[:-1]), # Iterables of symbols. (finder, ('DUPLICATED', 'UNIQUE'), dupe_0_start, [dupe_0, unique]), (finder, ('DUPLICATED', 'UNIQUE'), dupe_1_start, [dupe_1, unique]), # Mixed types (finder, ('DUPLICATED', 2, 'UNIQUE', 1, dupe_1), dupe_0_start, [dupe_0, assets[2], unique, assets[1], dupe_1]), )