Ejemplo n.º 1
0
def ffill_digest_frame_from_prior_values(freq,
                                         field,
                                         digest_frame,
                                         pv_frame,
                                         raw=False):
    """
    Forward-fill a digest frame, falling back to the last known prior values if
    necessary.
    """
    # convert to ndarray if necessary
    values = digest_frame
    if raw and isinstance(digest_frame, pd.DataFrame):
        values = digest_frame.values

    nan_sids = pd.isnull(values[0])
    if np.any(nan_sids):
        # If we have any leading nans in the frame, use values from pv_frame to
        # seed values for those sids.
        key_loc = pv_frame.index.get_loc((freq.freq_str, field))
        filler = pv_frame.values[key_loc, nan_sids]
        values[0, nan_sids] = filler

    if raw:
        filled = ffill(values)
        return filled

    return digest_frame.ffill()
Ejemplo n.º 2
0
    def frame_to_series(self, field, frame, columns=None):
        """
        Convert a frame with a DatetimeIndex and sid columns into a series with
        a sid index, using the aggregator defined by the given field.
        """
        if isinstance(frame, pd.DataFrame):
            columns = frame.columns
            frame = frame.values

        if not len(frame):
            return pd.Series(
                data=(0 if field == 'volume' else np.nan),
                index=columns,
            ).values

        if field in ['price', 'close']:
            # shortcircuit for full last row
            vals = frame[-1]
            if np.all(~np.isnan(vals)):
                return vals
            return ffill(frame)[-1]
        elif field == 'open':
            return bfill(frame)[0]
        elif field == 'volume':
            return np.nansum(frame, axis=0)
        elif field == 'high':
            return np.nanmax(frame, axis=0)
        elif field == 'low':
            return np.nanmin(frame, axis=0)
        else:
            raise ValueError("Unknown field {}".format(field))
Ejemplo n.º 3
0
    def frame_to_series(self, field, frame, columns=None):
        """
        Convert a frame with a DatetimeIndex and sid columns into a series with
        a sid index, using the aggregator defined by the given field.
        """
        if isinstance(frame, pd.DataFrame):
            columns = frame.columns
            frame = frame.values

        if not len(frame):
            return pd.Series(
                data=(0 if field == 'volume' else np.nan),
                index=columns,
            ).values

        if field in ['price', 'close_price']:
            # shortcircuit for full last row
            vals = frame[-1]
            if np.all(~np.isnan(vals)):
                return vals
            return ffill(frame)[-1]
        elif field == 'open_price':
            return bfill(frame)[0]
        elif field == 'volume':
            return np.nansum(frame, axis=0)
        elif field == 'high':
            return np.nanmax(frame, axis=0)
        elif field == 'low':
            return np.nanmin(frame, axis=0)
        else:
            raise ValueError("Unknown field {}".format(field))
Ejemplo n.º 4
0
def ffill_digest_frame_from_prior_values(freq,
                                         field,
                                         digest_frame,
                                         pv_frame,
                                         raw=False):
    """
    Forward-fill a digest frame, falling back to the last known prior values if
    necessary.
    """
    # convert to ndarray if necessary
    values = digest_frame
    if raw and isinstance(digest_frame, pd.DataFrame):
        values = digest_frame.values

    nan_sids = pd.isnull(values[0])
    if np.any(nan_sids):
        # If we have any leading nans in the frame, use values from pv_frame to
        # seed values for those sids.
        key_loc = pv_frame.index.get_loc((freq.freq_str, field))
        filler = pv_frame.values[key_loc, nan_sids]
        values[0, nan_sids] = filler

    if raw:
        filled = ffill(values)
        return filled

    return digest_frame.ffill()
Ejemplo n.º 5
0
def test_ffill():
    # test ndim=1
    N = 100
    s = pd.Series(np.random.randn(N))
    mask = random.sample(range(N), 10)
    s.iloc[mask] = np.nan

    correct = s.ffill().values
    test = ffill(s.values)
    assert_almost_equal(correct, test)

    # test ndim=2
    df = pd.DataFrame(np.random.randn(N, N))
    df.iloc[mask] = np.nan
    correct = df.ffill().values
    test = ffill(df.values)
    assert_almost_equal(correct, test)
Ejemplo n.º 6
0
def ffill_buffer_from_prior_values(freq,
                                   field,
                                   buffer_frame,
                                   digest_frame,
                                   pv_frame,
                                   raw=False):
    """
    Forward-fill a buffer frame, falling back to the end-of-period values of a
    digest frame if the buffer frame has leading NaNs.
    """
    # convert to ndarray if necessary
    digest_values = digest_frame
    if raw and isinstance(digest_frame, pd.DataFrame):
        digest_values = digest_frame.values

    buffer_values = buffer_frame
    if raw and isinstance(buffer_frame, pd.DataFrame):
        buffer_values = buffer_frame.values

    nan_sids = pd.isnull(buffer_values[0])
    if np.any(nan_sids) and len(digest_values):
        # If we have any leading nans in the buffer and we have a non-empty
        # digest frame, use the oldest digest values as the initial buffer
        # values.
        buffer_values[0, nan_sids] = digest_values[-1, nan_sids]

    nan_sids = pd.isnull(buffer_values[0])
    if np.any(nan_sids):
        # If we still have leading nans, fall back to the last known values
        # from before the digest.
        key_loc = pv_frame.index.get_loc((freq.freq_str, field))
        filler = pv_frame.values[key_loc, nan_sids]
        buffer_values[0, nan_sids] = filler

    if raw:
        filled = ffill(buffer_values)
        return filled

    return buffer_frame.ffill()
Ejemplo n.º 7
0
def ffill_buffer_from_prior_values(freq,
                                   field,
                                   buffer_frame,
                                   digest_frame,
                                   pv_frame,
                                   raw=False):
    """
    Forward-fill a buffer frame, falling back to the end-of-period values of a
    digest frame if the buffer frame has leading NaNs.
    """
    # convert to ndarray if necessary
    digest_values = digest_frame
    if raw and isinstance(digest_frame, pd.DataFrame):
        digest_values = digest_frame.values

    buffer_values = buffer_frame
    if raw and isinstance(buffer_frame, pd.DataFrame):
        buffer_values = buffer_frame.values

    nan_sids = pd.isnull(buffer_values[0])
    if np.any(nan_sids) and len(digest_values):
        # If we have any leading nans in the buffer and we have a non-empty
        # digest frame, use the oldest digest values as the initial buffer
        # values.
        buffer_values[0, nan_sids] = digest_values[-1, nan_sids]

    nan_sids = pd.isnull(buffer_values[0])
    if np.any(nan_sids):
        # If we still have leading nans, fall back to the last known values
        # from before the digest.
        key_loc = pv_frame.index.get_loc((freq.freq_str, field))
        filler = pv_frame.values[key_loc, nan_sids]
        buffer_values[0, nan_sids] = filler

    if raw:
        filled = ffill(buffer_values)
        return filled

    return buffer_frame.ffill()