def load(name, environ=os.environ, timestamp=None): """Loads a previously ingested bundle. Parameters ---------- name : str The name of the bundle. environ : mapping, optional The environment variables. Defaults of os.environ. timestamp : datetime, optional The timestamp of the data to lookup. Defaults to the current time. Returns ------- bundle_data : BundleData The raw data readers for this bundle. """ if timestamp is None: timestamp = pd.Timestamp.utcnow() timestr = most_recent_data(name, timestamp, environ=environ) return BundleData( asset_finder=AssetFinder( asset_db_path(name, timestr, environ=environ), ), equity_minute_bar_reader=BcolzMinuteBarReader( minute_equity_path(name, timestr, environ=environ), ), equity_daily_bar_reader=CNBcolzDailyBarReader( daily_equity_path(name, timestr, environ=environ), ), adjustment_reader=SQLiteAdjustmentReader( adjustment_db_path(name, timestr, environ=environ), ), fundamental_reader=SQLiteFundamentalsReader( fundamentals_db_path(name, timestr, environ=environ), ), )
def load_sharadar_bundle(name=SHARADAR_BUNDLE_NAME, timestr=SHARADAR_BUNDLE_DIR, environ=os.environ): return BundleData( asset_finder=SQLiteAssetFinder( asset_db_path(name, timestr, environ=environ), ), equity_minute_bar_reader=None, equity_daily_bar_reader=SQLiteDailyBarReader( daily_equity_path(name, timestr, environ=environ), ), adjustment_reader=SQLiteAdjustmentReader( adjustment_db_path(name, timestr, environ=environ), ), )
def component_dataframes(self, convert_dates=True): with SQLiteAdjustmentReader(self.db_path) as r: return r.unpack_db_to_component_dfs(convert_dates=convert_dates)
def calc_dividend_ratios(self, dividends): """ Calculate the ratios to apply to equities when looking back at pricing history so that the price is smoothed over the ex_date, when the market adjusts to the change in equity value due to upcoming dividend. Returns ------- DataFrame A frame in the same format as splits and mergers, with keys - sid, the id of the equity - effective_date, the date in seconds on which to apply the ratio. - ratio, the ratio to apply to backwards looking pricing data. """ if dividends is None or dividends.empty: return pd.DataFrame(np.array( [], dtype=[ ('sid', uint64_dtype), ('effective_date', uint32_dtype), ('ratio', float64_dtype), ], )) pricing_reader = self._equity_daily_bar_reader input_sids = dividends.sid.values unique_sids, sids_ix = np.unique(input_sids, return_inverse=True) dates = pricing_reader.sessions.values start = pd.Timestamp(dates[0], tz='UTC') end = pd.Timestamp(dates[-1], tz='UTC') calendar = self._equity_daily_bar_reader.trading_calendar data_portal = DataPortal(self._asset_finder, trading_calendar=calendar, first_trading_day=start, equity_daily_reader=self._equity_daily_bar_reader, adjustment_reader= SQLiteAdjustmentReader(self._filename)) close = data_portal.get_history_window(assets=unique_sids, end_dt=end, bar_count=calendar.session_distance(start, end), frequency='1d', field='close', data_frequency='daily').values date_ix = np.searchsorted(dates, dividends.ex_date.values) mask = date_ix > 0 date_ix = date_ix[mask] sids_ix = sids_ix[mask] input_dates = dividends.ex_date.values[mask] # subtract one day to get the close on the day prior to the merger previous_close = close[date_ix - 1, sids_ix] input_sids = input_sids[mask] amount = dividends.amount.values[mask] ratio = 1.0 - amount / previous_close non_nan_ratio_mask = ~np.isnan(ratio) for ix in np.flatnonzero(~non_nan_ratio_mask): ex_date = pd.Timestamp(input_dates[ix], tz='UTC') start_date = self._asset_finder.retrieve_asset(input_sids[ix]).start_date if ex_date != start_date: log.warn( "Couldn't compute ratio for dividend" " sid={sid}, ex_date={ex_date:%Y-%m-%d}, start_date={start_date:%Y-%m-%d}, amount={amount:.3f}", sid=input_sids[ix], ex_date=ex_date, amount=amount[ix], start_date=start_date ) valid_ratio_mask = non_nan_ratio_mask > 0 for ix in np.flatnonzero(~valid_ratio_mask): log.warn( "Dividend ratio <= 0 for dividend" " sid={sid}, ex_date={ex_date:%Y-%m-%d}, amount={amount:.3f}", sid=input_sids[ix], ex_date=pd.Timestamp(input_dates[ix]), amount=amount[ix], ) return pd.DataFrame({ 'sid': input_sids[valid_ratio_mask], 'effective_date': input_dates[valid_ratio_mask], 'ratio': ratio[valid_ratio_mask], })