def _assign_sid(self, identifier): if hasattr(identifier, '__int__'): return identifier.__int__() if not self.allow_sid_assignment: raise SidAssignmentError(identifier=identifier) if isinstance(identifier, string_types): return self._next_free_sid()
def _load_data(self): # 0) Instantiate empty dictionaries _equities, _futures, _exchanges, _root_symbols = {}, {}, {}, {} # 1) Populate dictionaries # Return the largest sid in our database, if one exists. id_counter = sa.select( [sa.func.max(self.asset_router.c.sid)] ).execute().scalar() # Base sid creation on largest sid in database, or 0 if # no sids exist. if id_counter is None: id_counter = 0 else: id_counter += 1 for output, data in [(_equities, self._equities), (_futures, self._futures), ]: for identifier in data: if isinstance(identifier, Asset): sid = identifier.sid metadata = identifier.to_dict() output[sid] = metadata elif hasattr(identifier, '__int__'): output[identifier.__int__()] = {'symbol': None} else: if self.allow_sid_assignment: output[id_counter] = {'symbol': identifier} id_counter += 1 else: raise SidAssignmentError(identifier=identifier) exchange_counter = 0 for identifier in self._exchanges: if hasattr(identifier, '__int__'): _exchanges[identifier.__int__()] = {} else: _exchanges[exchange_counter] = {'exchange': identifier} exchange_counter += 1 root_symbol_counter = 0 for identifier in self._root_symbols: if hasattr(identifier, '__int__'): _root_symbols[identifier.__int__()] = {} else: _root_symbols[root_symbol_counter] = \ {'root_symbol': identifier} root_symbol_counter += 1 # 2) Convert dictionaries to pandas.DataFrames. _equities = pd.DataFrame.from_dict(_equities, orient='index') _futures = pd.DataFrame.from_dict(_futures, orient='index') _exchanges = pd.DataFrame.from_dict(_exchanges, orient='index') _root_symbols = pd.DataFrame.from_dict(_root_symbols, orient='index') # 3) Return the data inside a named tuple. return AssetData(equities=_equities, futures=_futures, exchanges=_exchanges, root_symbols=_root_symbols)
def insert_metadata(self, identifier, **kwargs): """ Inserts the given metadata kwargs to the entry for the given identifier. Matching fields in the existing entry will be overwritten. :param identifier: The identifier for which to insert metadata :param kwargs: The keyed metadata to insert """ entry = self.metadata_cache.get(identifier, {}) for key, value in kwargs.items(): # Do not accept invalid fields if key not in ASSET_FIELDS: continue # Do not accept Nones if value is None: continue # Do not accept empty strings if value == '': continue # Do not accept nans from dataframes if isinstance(value, float) and np.isnan(value): continue entry[key] = value # Check if the sid is declared try: entry['sid'] except KeyError: # If the identifier is not a sid, assign one if hasattr(identifier, '__int__'): entry['sid'] = identifier.__int__() else: if self.allow_sid_assignment: # Assign the sid the value of its insertion order. # This assumes that we are assigning values to all assets. entry['sid'] = len(self.metadata_cache) else: raise SidAssignmentError(identifier=identifier) self.metadata_cache[identifier] = entry
def _insert_metadata(self, identifier, **kwargs): """ Inserts the given metadata kwargs to the entry for the given identifier. Matching fields in the existing entry will be overwritten. :param identifier: The identifier for which to insert metadata :param kwargs: The keyed metadata to insert """ if identifier in self.metadata_cache: # Multiple pass insertion no longer supported. # This could and probably should raise an Exception, but is # currently just a short-circuit for compatibility with existing # testing structure in the test_algorithm module which creates # multiple sources which all insert redundant metadata. return entry = {} for key, value in kwargs.items(): # Do not accept invalid fields if key not in ASSET_FIELDS: continue # Do not accept Nones if value is None: continue # Do not accept empty strings if value == '': continue # Do not accept nans from dataframes if isinstance(value, float) and np.isnan(value): continue entry[key] = value # Check if the sid is declared try: entry['sid'] except KeyError: # If the identifier is not a sid, assign one if hasattr(identifier, '__int__'): entry['sid'] = identifier.__int__() else: if self.allow_sid_assignment: # Assign the sid the value of its insertion order. # This assumes that we are assigning values to all assets. entry['sid'] = len(self.metadata_cache) else: raise SidAssignmentError(identifier=identifier) # If the file_name is in the kwargs, it will be used as the symbol try: entry['symbol'] = entry.pop('file_name') except KeyError: pass # If the identifier coming in was a string and there is no defined # symbol yet, set the symbol to the incoming identifier try: entry['symbol'] pass except KeyError: if isinstance(identifier, string_types): entry['symbol'] = identifier # If the company_name is in the kwargs, it may be the asset_name try: company_name = entry.pop('company_name') try: entry['asset_name'] except KeyError: entry['asset_name'] = company_name except KeyError: pass # If dates are given as nanos, pop them try: entry['start_date'] = entry.pop('start_date_nano') except KeyError: pass try: entry['end_date'] = entry.pop('end_date_nano') except KeyError: pass try: entry['notice_date'] = entry.pop('notice_date_nano') except KeyError: pass try: entry['expiration_date'] = entry.pop('expiration_date_nano') except KeyError: pass # Process dates to Timestamps try: entry['start_date'] = pd.Timestamp(entry['start_date'], tz='UTC') except KeyError: # Set a default start_date of the EPOCH, so that all date queries # work when a start date is not provided. entry['start_date'] = pd.Timestamp(0, tz='UTC') try: # Set a default end_date of 'now', so that all date queries # work when a end date is not provided. entry['end_date'] = pd.Timestamp(entry['end_date'], tz='UTC') except KeyError: entry['end_date'] = self.end_date_to_assign try: entry['notice_date'] = pd.Timestamp(entry['notice_date'], tz='UTC') except KeyError: pass try: entry['expiration_date'] = pd.Timestamp(entry['expiration_date'], tz='UTC') except KeyError: pass # Build an Asset of the appropriate type, default to Equity asset_type = entry.pop('asset_type', 'equity') if asset_type.lower() == 'equity': try: fuzzy = entry['symbol'].replace(self.fuzzy_char, '') \ if self.fuzzy_char else None except KeyError: fuzzy = None asset = Equity(**entry) c = self.conn.cursor() t = (asset.sid, asset.symbol, asset.asset_name, asset.start_date.value if asset.start_date else None, asset.end_date.value if asset.end_date else None, asset.first_traded.value if asset.first_traded else None, asset.exchange, fuzzy) c.execute( """INSERT INTO equities( sid, symbol, asset_name, start_date, end_date, first_traded, exchange, fuzzy) VALUES(?, ?, ?, ?, ?, ?, ?, ?)""", t) t = (asset.sid, 'equity') c.execute( """INSERT INTO asset_router(sid, asset_type) VALUES(?, ?)""", t) elif asset_type.lower() == 'future': asset = Future(**entry) c = self.conn.cursor() t = (asset.sid, asset.symbol, asset.asset_name, asset.start_date.value if asset.start_date else None, asset.end_date.value if asset.end_date else None, asset.first_traded.value if asset.first_traded else None, asset.exchange, asset.root_symbol, asset.notice_date.value if asset.notice_date else None, asset.expiration_date.value if asset.expiration_date else None, asset.contract_multiplier) c.execute( """INSERT INTO futures( sid, symbol, asset_name, start_date, end_date, first_traded, exchange, root_symbol, notice_date, expiration_date, contract_multiplier) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""", t) t = (asset.sid, 'future') c.execute( """INSERT INTO asset_router(sid, asset_type) VALUES(?, ?)""", t) else: raise InvalidAssetType(asset_type=asset_type) self.metadata_cache[identifier] = entry