def overwrite_novel_deltas(baseline, deltas, dates): """overwrite any deltas into the baseline set that would have changed our most recently known value. Parameters ---------- baseline : pd.DataFrame The first known values. deltas : pd.DataFrame Overwrites to the baseline data. dates : pd.DatetimeIndex The dates requested by the loader. Returns ------- non_novel_deltas : pd.DataFrame The deltas that do not represent a baseline value. """ get_indexes = dates.searchsorted novel_idx = ( get_indexes(deltas[TS_FIELD_NAME].values, 'right') - get_indexes(deltas[AD_FIELD_NAME].values, 'left') ) <= 1 novel_deltas = deltas.loc[novel_idx] non_novel_deltas = deltas.loc[~novel_idx] cat = pd.concat( (baseline, novel_deltas), ignore_index=True, copy=False, ) sort_values(cat, TS_FIELD_NAME, inplace=True) return cat, non_novel_deltas
def overwrite_novel_deltas(baseline, deltas, dates): """overwrite any deltas into the baseline set that would have changed our most recently known value. Parameters ---------- baseline : pd.DataFrame The first known values. deltas : pd.DataFrame Overwrites to the baseline data. dates : pd.DatetimeIndex The dates requested by the loader. Returns ------- non_novel_deltas : pd.DataFrame The deltas that do not represent a baseline value. """ get_indexes = dates.searchsorted novel_idx = (get_indexes(deltas[TS_FIELD_NAME].values, 'right') - get_indexes(deltas[AD_FIELD_NAME].values, 'left')) <= 1 novel_deltas = deltas.loc[novel_idx] non_novel_deltas = deltas.loc[~novel_idx] cat = pd.concat( (baseline, novel_deltas), ignore_index=True, copy=False, ) sort_values(cat, TS_FIELD_NAME, inplace=True) return cat, non_novel_deltas
def collect_expr(e): """Execute and merge all of the per-column subqueries. Parameters ---------- e : Expr The baseline or deltas expression. Returns ------- result : pd.DataFrame The resulting dataframe. Notes ----- This can return more data than needed. The in memory reindex will handle this. """ return sort_values(reduce( partial(pd.merge, on=added_query_fields, how='outer'), ( odo(where(e, column), pd.DataFrame, **odo_kwargs) for column in columns ), ), TS_FIELD_NAME) # sort for the groupby later
def __init__(self, column, baseline, adjustments=None): self.column = column self.baseline = baseline.values.astype(self.column.dtype) self.dates = baseline.index self.assets = baseline.columns if adjustments is None: adjustments = DataFrame( index=DatetimeIndex([]), columns=ADJUSTMENT_COLUMNS, ) else: # Ensure that columns are in the correct order. adjustments = adjustments.reindex_axis(ADJUSTMENT_COLUMNS, axis=1) sort_values(adjustments, ['apply_date', 'sid'], inplace=True) self.adjustments = adjustments self.adjustment_apply_dates = DatetimeIndex(adjustments.apply_date) self.adjustment_end_dates = DatetimeIndex(adjustments.end_date) self.adjustment_sids = Int64Index(adjustments.sid)
def update_dividends(self, new_dividends): """ Update our dividend frame with new dividends. @new_dividends should be a DataFrame with columns containing at least the entries in zipline.protocol.DIVIDEND_FIELDS. """ # Mark each new dividend with a unique integer id. This ensures that # we can differentiate dividends whose date/sid fields are otherwise # identical. new_dividends['id'] = np.arange( self._dividend_count, self._dividend_count + len(new_dividends), ) self._dividend_count += len(new_dividends) self.dividend_frame = sort_values(pd.concat( [self.dividend_frame, new_dividends] ), ['pay_date', 'ex_date']).set_index('id', drop=False)
def update_dividends(self, new_dividends): """ Update our dividend frame with new dividends. @new_dividends should be a DataFrame with columns containing at least the entries in zipline.protocol.DIVIDEND_FIELDS. """ # Mark each new dividend with a unique integer id. This ensures that # we can differentiate dividends whose date/sid fields are otherwise # identical. new_dividends['id'] = np.arange( self._dividend_count, self._dividend_count + len(new_dividends), ) self._dividend_count += len(new_dividends) self.dividend_frame = sort_values( pd.concat([self.dividend_frame, new_dividends]), ['pay_date', 'ex_date']).set_index('id', drop=False)
def collect_expr(e): """Execute and merge all of the per-column subqueries. Parameters ---------- e : Expr The baseline or deltas expression. Returns ------- result : pd.DataFrame The resulting dataframe. Notes ----- This can return more data than needed. The in memory reindex will handle this. """ return sort_values( reduce( partial(pd.merge, on=added_query_fields, how='outer'), (odo(where(e, column), pd.DataFrame, **odo_kwargs) for column in columns), ), TS_FIELD_NAME) # sort for the groupby later