Example #1
0
def overwrite_novel_deltas(baseline, deltas, dates):
    """overwrite any deltas into the baseline set that would have changed our
    most recently known value.

    Parameters
    ----------
    baseline : pd.DataFrame
        The first known values.
    deltas : pd.DataFrame
        Overwrites to the baseline data.
    dates : pd.DatetimeIndex
        The dates requested by the loader.

    Returns
    -------
    non_novel_deltas : pd.DataFrame
        The deltas that do not represent a baseline value.
    """
    get_indexes = dates.searchsorted
    novel_idx = (
        get_indexes(deltas[TS_FIELD_NAME].values, 'right') -
        get_indexes(deltas[AD_FIELD_NAME].values, 'left')
    ) <= 1
    novel_deltas = deltas.loc[novel_idx]
    non_novel_deltas = deltas.loc[~novel_idx]
    cat = pd.concat(
        (baseline, novel_deltas),
        ignore_index=True,
        copy=False,
    )
    sort_values(cat, TS_FIELD_NAME, inplace=True)
    return cat, non_novel_deltas
Example #2
0
def overwrite_novel_deltas(baseline, deltas, dates):
    """overwrite any deltas into the baseline set that would have changed our
    most recently known value.

    Parameters
    ----------
    baseline : pd.DataFrame
        The first known values.
    deltas : pd.DataFrame
        Overwrites to the baseline data.
    dates : pd.DatetimeIndex
        The dates requested by the loader.

    Returns
    -------
    non_novel_deltas : pd.DataFrame
        The deltas that do not represent a baseline value.
    """
    get_indexes = dates.searchsorted
    novel_idx = (get_indexes(deltas[TS_FIELD_NAME].values, 'right') -
                 get_indexes(deltas[AD_FIELD_NAME].values, 'left')) <= 1
    novel_deltas = deltas.loc[novel_idx]
    non_novel_deltas = deltas.loc[~novel_idx]
    cat = pd.concat(
        (baseline, novel_deltas),
        ignore_index=True,
        copy=False,
    )
    sort_values(cat, TS_FIELD_NAME, inplace=True)
    return cat, non_novel_deltas
Example #3
0
        def collect_expr(e):
            """Execute and merge all of the per-column subqueries.

            Parameters
            ----------
            e : Expr
                The baseline or deltas expression.

            Returns
            -------
            result : pd.DataFrame
                The resulting dataframe.

            Notes
            -----
            This can return more data than needed. The in memory reindex will
            handle this.
            """
            return sort_values(reduce(
                partial(pd.merge, on=added_query_fields, how='outer'),
                (
                    odo(where(e, column), pd.DataFrame, **odo_kwargs)
                    for column in columns
                ),
            ), TS_FIELD_NAME)  # sort for the groupby later
Example #4
0
    def __init__(self, column, baseline, adjustments=None):
        self.column = column
        self.baseline = baseline.values.astype(self.column.dtype)
        self.dates = baseline.index
        self.assets = baseline.columns

        if adjustments is None:
            adjustments = DataFrame(
                index=DatetimeIndex([]),
                columns=ADJUSTMENT_COLUMNS,
            )
        else:
            # Ensure that columns are in the correct order.
            adjustments = adjustments.reindex_axis(ADJUSTMENT_COLUMNS, axis=1)
            sort_values(adjustments, ['apply_date', 'sid'], inplace=True)

        self.adjustments = adjustments
        self.adjustment_apply_dates = DatetimeIndex(adjustments.apply_date)
        self.adjustment_end_dates = DatetimeIndex(adjustments.end_date)
        self.adjustment_sids = Int64Index(adjustments.sid)
Example #5
0
    def __init__(self, column, baseline, adjustments=None):
        self.column = column
        self.baseline = baseline.values.astype(self.column.dtype)
        self.dates = baseline.index
        self.assets = baseline.columns

        if adjustments is None:
            adjustments = DataFrame(
                index=DatetimeIndex([]),
                columns=ADJUSTMENT_COLUMNS,
            )
        else:
            # Ensure that columns are in the correct order.
            adjustments = adjustments.reindex_axis(ADJUSTMENT_COLUMNS, axis=1)
            sort_values(adjustments, ['apply_date', 'sid'], inplace=True)

        self.adjustments = adjustments
        self.adjustment_apply_dates = DatetimeIndex(adjustments.apply_date)
        self.adjustment_end_dates = DatetimeIndex(adjustments.end_date)
        self.adjustment_sids = Int64Index(adjustments.sid)
Example #6
0
    def update_dividends(self, new_dividends):
        """
        Update our dividend frame with new dividends.  @new_dividends should be
        a DataFrame with columns containing at least the entries in
        zipline.protocol.DIVIDEND_FIELDS.
        """

        # Mark each new dividend with a unique integer id.  This ensures that
        # we can differentiate dividends whose date/sid fields are otherwise
        # identical.
        new_dividends['id'] = np.arange(
            self._dividend_count,
            self._dividend_count + len(new_dividends),
        )
        self._dividend_count += len(new_dividends)

        self.dividend_frame = sort_values(pd.concat(
            [self.dividend_frame, new_dividends]
        ), ['pay_date', 'ex_date']).set_index('id', drop=False)
    def update_dividends(self, new_dividends):
        """
        Update our dividend frame with new dividends.  @new_dividends should be
        a DataFrame with columns containing at least the entries in
        zipline.protocol.DIVIDEND_FIELDS.
        """

        # Mark each new dividend with a unique integer id.  This ensures that
        # we can differentiate dividends whose date/sid fields are otherwise
        # identical.
        new_dividends['id'] = np.arange(
            self._dividend_count,
            self._dividend_count + len(new_dividends),
        )
        self._dividend_count += len(new_dividends)

        self.dividend_frame = sort_values(
            pd.concat([self.dividend_frame, new_dividends]),
            ['pay_date', 'ex_date']).set_index('id', drop=False)
Example #8
0
        def collect_expr(e):
            """Execute and merge all of the per-column subqueries.

            Parameters
            ----------
            e : Expr
                The baseline or deltas expression.

            Returns
            -------
            result : pd.DataFrame
                The resulting dataframe.

            Notes
            -----
            This can return more data than needed. The in memory reindex will
            handle this.
            """
            return sort_values(
                reduce(
                    partial(pd.merge, on=added_query_fields, how='outer'),
                    (odo(where(e, column), pd.DataFrame, **odo_kwargs)
                     for column in columns),
                ), TS_FIELD_NAME)  # sort for the groupby later