def _lookup_symbol_fuzzy(self, symbol, as_of_date): symbol = symbol.upper() company_symbol, share_class_symbol = split_delimited_symbol(symbol) try: owners = self.fuzzy_symbol_ownership_map[ company_symbol + share_class_symbol ] assert owners, 'empty owners list for %r' % symbol except KeyError: # no equity has ever held a symbol matching the fuzzy symbol raise SymbolNotFound(symbol=symbol) if not as_of_date: if len(owners) == 1: # only one valid match return self.retrieve_asset(owners[0].sid) options = [] for _, _, sid, sym in owners: if sym == symbol: # there are multiple options, look for exact matches options.append(self.retrieve_asset(sid)) if len(options) == 1: # there was only one exact match return options[0] # there are more than one exact match for this fuzzy symbol raise MultipleSymbolsFound( symbol=symbol, options=set(options), ) options = {} for start, end, sid, sym in owners: if start <= as_of_date < end: # see which fuzzy symbols were owned on the asof date. options[sid] = sym if not options: # no equity owned the fuzzy symbol on the date requested raise SymbolNotFound(symbol=symbol) sid_keys = list(options.keys()) # If there was only one owner, or there is a fuzzy and non-fuzzy which # map to the same sid, return it. if len(options) == 1: return self.retrieve_asset(sid_keys[0]) for sid, sym in options.items(): if sym == symbol: # look for an exact match on the asof date return self.retrieve_asset(sid) # multiple equities held tickers matching the fuzzy ticker but # there are no exact matches raise MultipleSymbolsFound( symbol=symbol, options=[self.retrieve_asset(s) for s in sid_keys], )
def lookup_symbol_resolve_multiple(self, symbol, as_of_date=None): """ Return matching Asset of name symbol in database. If multiple Assets are found and as_of_date is not set, raises MultipleSymbolsFound. If no Asset was active at as_of_date, and allow_expired is False raises SymbolNotFound. """ if as_of_date is not None: as_of_date = normalize_date(as_of_date) if symbol not in self.sym_cache: raise SymbolNotFound(symbol=symbol) infos = self.sym_cache[symbol] if as_of_date is None: if len(infos) == 1: return infos[0] else: raise MultipleSymbolsFound(symbol=symbol, options=str(infos)) # Try to find symbol matching as_of_date asset, _ = self._lookup_symbol_in_infos(infos, as_of_date) if asset is None: raise SymbolNotFound(symbol=symbol) return asset
def _lookup_symbol_strict(self, symbol, as_of_date): # split the symbol into the components, if there are no # company/share class parts then share_class_symbol will be empty company_symbol, share_class_symbol = split_delimited_symbol(symbol) try: owners = self.symbol_ownership_map[ company_symbol, share_class_symbol, ] assert owners, 'empty owners list for %r' % symbol except KeyError: # no equity has ever held this symbol raise SymbolNotFound(symbol=symbol) if not as_of_date: if len(owners) > 1: # more than one equity has held this ticker, this is ambigious # without the date raise MultipleSymbolsFound( symbol=symbol, options=set(map( compose(self.retrieve_asset, attrgetter('sid')), owners, )), ) # exactly one equity has ever held this symbol, we may resolve # without the date return self.retrieve_asset(owners[0].sid) for start, end, sid, _ in owners: if start <= as_of_date < end: # find the equity that owned it on the given asof date return self.retrieve_asset(sid) # no equity held the ticker on the given asof date raise SymbolNotFound(symbol=symbol)
def lookup_symbol_resolve_multiple(self, symbol, as_of_date=None): """ Return matching Asset of name symbol in database. If multiple Assets are found and as_of_date is not set, raises MultipleSymbolsFound. If no Asset was active at as_of_date raises SymbolNotFound. """ if as_of_date is not None: as_of_date = pd.Timestamp(normalize_date(as_of_date)) equities_cols = self.equities.c if as_of_date: ad_value = as_of_date.value # If one SID exists for symbol, return that symbol candidates = sa.select((equities_cols.sid,)).where( (equities_cols.symbol == symbol) & (equities_cols.start_date <= ad_value) & (equities_cols.end_date >= ad_value), ).execute().fetchall() if len(candidates) == 1: return self._retrieve_equity(candidates[0]['sid']) # If no SID exists for symbol, return SID with the # highest-but-not-over end_date elif not candidates: sid = sa.select((equities_cols.sid,)).where( (equities_cols.symbol == symbol) & (equities_cols.start_date <= ad_value), ).order_by( equities_cols.end_date.desc(), ).scalar() if sid is not None: return self._retrieve_equity(sid) # If multiple SIDs exist for symbol, return latest start_date with # end_date as a tie-breaker elif len(candidates) > 1: sid = sa.select((equities_cols.sid,)).where( (equities_cols.symbol == symbol) & (equities_cols.start_date <= ad_value), ).order_by( equities_cols.start_date.desc(), equities_cols.end_date.desc(), ).scalar() if sid is not None: return self._retrieve_equity(sid) raise SymbolNotFound(symbol=symbol) else: sids = sa.select((equities_cols.sid,)).where( equities_cols.symbol == symbol, ).execute().fetchall() if len(sids) == 1: return self._retrieve_equity(sids[0]['sid']) elif not sids: raise SymbolNotFound(symbol=symbol) else: raise MultipleSymbolsFound( symbol=symbol, options=list(map( compose(self._retrieve_equity, itemgetter('sid')), sids, )) )
def lookup_symbol(self, symbol, as_of_date, fuzzy=False): """ Return matching Equity of name symbol in database. If multiple Equities are found and as_of_date is not set, raises MultipleSymbolsFound. If no Equity was active at as_of_date raises SymbolNotFound. """ company_symbol, share_class_symbol, fuzzy_symbol = \ split_delimited_symbol(symbol) if as_of_date: # Format inputs as_of_date = pd.Timestamp(as_of_date).normalize() ad_value = as_of_date.value if fuzzy: # Search for a single exact match on the fuzzy column candidates = self._get_fuzzy_candidates_in_range(fuzzy_symbol, ad_value) # If exactly one SID exists for fuzzy_symbol, return that sid if len(candidates) == 1: return self._get_best_candidate(candidates) # Search for exact matches of the split-up company_symbol and # share_class_symbol candidates = self._get_split_candidates_in_range( company_symbol, share_class_symbol, ad_value ) # If exactly one SID exists for symbol, return that symbol # If multiple SIDs exist for symbol, return latest start_date with # end_date as a tie-breaker if candidates: return self._get_best_candidate(candidates) # If no SID exists for symbol, return SID with the # highest-but-not-over end_date elif not candidates: candidates = self._resolve_no_matching_candidates( company_symbol, share_class_symbol, ad_value ) if candidates: return self._get_best_candidate(candidates) raise SymbolNotFound(symbol=symbol) else: # If this is a fuzzy look-up, check if there is exactly one match # for the fuzzy symbol if fuzzy: candidates = self._get_fuzzy_candidates(fuzzy_symbol) if len(candidates) == 1: return self._get_best_candidate(candidates) candidates = self._get_split_candidates(company_symbol, share_class_symbol) if len(candidates) == 1: return self._get_best_candidate(candidates) elif not candidates: raise SymbolNotFound(symbol=symbol) else: raise MultipleSymbolsFound( symbol=symbol, options=self._get_equities_from_candidates(candidates) )
def lookup_symbol_resolve_multiple(self, symbol, as_of_date=None): """ Return matching Asset of name symbol in database. If multiple Assets are found and as_of_date is not set, raises MultipleSymbolsFound. If no Asset was active at as_of_date, and allow_expired is False raises SymbolNotFound. """ if as_of_date is not None: as_of_date = pd.Timestamp(normalize_date(as_of_date)) c = self.conn.cursor() if as_of_date: # If one SID exists for symbol, return that symbol t = (symbol, as_of_date.value, as_of_date.value) query = ("select sid from equities " "where symbol=? " "and start_date<=? " "and end_date>=?") c.execute(query, t) candidates = c.fetchall() if len(candidates) == 1: return self._retrieve_equity(candidates[0][0]) # If no SID exists for symbol, return SID with the # highest-but-not-over end_date if len(candidates) == 0: t = (symbol, as_of_date.value) query = ("select sid from equities " "where symbol=? " "and start_date<=? " "order by end_date desc " "limit 1") c.execute(query, t) data = c.fetchone() if data: return self._retrieve_equity(data[0]) # If multiple SIDs exist for symbol, return latest start_date with # end_date as a tie-breaker if len(candidates) > 1: t = (symbol, as_of_date.value) query = ("select sid from equities " "where symbol=? " + "and start_date<=? " + "order by start_date desc, end_date desc " + "limit 1") c.execute(query, t) data = c.fetchone() if data: return self._retrieve_equity(data[0]) raise SymbolNotFound(symbol=symbol) else: t = (symbol, ) query = ("select sid from equities where symbol=?") c.execute(query, t) data = c.fetchall() if len(data) == 1: return self._retrieve_equity(data[0][0]) elif not data: raise SymbolNotFound(symbol=symbol) else: options = [] for row in data: sid = row[0] asset = self._retrieve_equity(sid) options.append(asset) raise MultipleSymbolsFound(symbol=symbol, options=options)
def lookup_symbol(self, symbol, as_of_date, fuzzy=False): """ Return matching Equity of name symbol in database. If multiple Equities are found and as_of_date is not set, raises MultipleSymbolsFound. If no Equity was active at as_of_date raises SymbolNotFound. """ # Format inputs if as_of_date is not None: as_of_date = pd.Timestamp(normalize_date(as_of_date)) company_symbol, share_class_symbol, fuzzy_symbol = \ split_delimited_symbol(symbol) equities_cols = self.equities.c if as_of_date: ad_value = as_of_date.value if fuzzy: # Search for a single exact match on the fuzzy column fuzzy_candidates = sa.select((equities_cols.sid, )).where( (equities_cols.fuzzy_symbol == fuzzy_symbol) & (equities_cols.start_date <= ad_value) & (equities_cols.end_date >= ad_value), ).execute().fetchall() # If exactly one SID exists for fuzzy_symbol, return that sid if len(fuzzy_candidates) == 1: return self._retrieve_equity(fuzzy_candidates[0]['sid']) # Search for exact matches of the split-up company_symbol and # share_class_symbol candidates = sa.select((equities_cols.sid, )).where( (equities_cols.company_symbol == company_symbol) & (equities_cols.share_class_symbol == share_class_symbol) & (equities_cols.start_date <= ad_value) & (equities_cols.end_date >= ad_value), ).execute().fetchall() # If exactly one SID exists for symbol, return that symbol if len(candidates) == 1: return self._retrieve_equity(candidates[0]['sid']) # If no SID exists for symbol, return SID with the # highest-but-not-over end_date elif not candidates: sid = sa.select((equities_cols.sid, )).where( (equities_cols.company_symbol == company_symbol) & (equities_cols.share_class_symbol == share_class_symbol) & (equities_cols.start_date <= ad_value), ).order_by( equities_cols.end_date.desc(), ).scalar() if sid is not None: return self._retrieve_equity(sid) # If multiple SIDs exist for symbol, return latest start_date with # end_date as a tie-breaker elif len(candidates) > 1: sid = sa.select((equities_cols.sid, )).where( (equities_cols.company_symbol == company_symbol) & (equities_cols.share_class_symbol == share_class_symbol) & (equities_cols.start_date <= ad_value), ).order_by( equities_cols.start_date.desc(), equities_cols.end_date.desc(), ).scalar() if sid is not None: return self._retrieve_equity(sid) raise SymbolNotFound(symbol=symbol) else: # If this is a fuzzy look-up, check if there is exactly one match # for the fuzzy symbol if fuzzy: fuzzy_sids = sa.select((equities_cols.sid, )).where( (equities_cols.fuzzy_symbol == fuzzy_symbol )).execute().fetchall() if len(fuzzy_sids) == 1: return self._retrieve_equity(fuzzy_sids[0]['sid']) sids = sa.select((equities_cols.sid, )).where( (equities_cols.company_symbol == company_symbol) & (equities_cols.share_class_symbol == share_class_symbol) ).execute().fetchall() if len(sids) == 1: return self._retrieve_equity(sids[0]['sid']) elif not sids: raise SymbolNotFound(symbol=symbol) else: raise MultipleSymbolsFound(symbol=symbol, options=list( map( compose( self._retrieve_equity, itemgetter('sid')), sids, )))