コード例 #1
0
def extract_feature_labels_weights(
    df: pd.DataFrame, features_and_labels, **kwargs
) -> Tuple[Tuple[pd.DataFrame, int], pd.DataFrame, pd.DataFrame, pd.Series,
           pd.Series]:
    features = get_pandas_object(df, features_and_labels.features,
                                 **kwargs).dropna()
    labels = get_pandas_object(df, features_and_labels.labels,
                               **kwargs).dropna()
    targets = call_if_not_none(
        get_pandas_object(df, features_and_labels.targets, **kwargs), 'dropna')
    sample_weights = call_if_not_none(
        get_pandas_object(df, features_and_labels.sample_weights, **kwargs),
        'dropna')
    gross_loss = call_if_not_none(
        get_pandas_object(df, features_and_labels.gross_loss, **kwargs),
        'dropna')
    common_index = intersection_of_index(features, labels, targets,
                                         sample_weights, gross_loss)

    if features_and_labels.label_type is not None:
        labels = labels.astype(features_and_labels.label_type)

    return ((features.loc[common_index], len(df) - len(features) + 1),
            labels.loc[common_index], loc_if_not_none(targets, common_index),
            loc_if_not_none(sample_weights, common_index),
            loc_if_not_none(gross_loss, common_index))
コード例 #2
0
ファイル: prediction.py プロジェクト: KIC/pandas-ml-quant
def map_prediction_to_target(df, prediction, targets):
    def _round(val, d):
        return round(val, d) if isinstance(val, float) else val

    dfp = get_pandas_object(df, prediction)
    p = dfp._.values.reshape((len(df), -1))

    dft = get_pandas_object(df, targets)
    t = dft._.values.reshape((len(df), -1))

    if p.shape[1] == t.shape[1]:
        # 1:1 mapping
        index = [(date, _round(target, 2)) for date in df.index for target in dft.loc[date].values]
    elif p.shape[1] == t.shape[1] - 1:
        # we need to build ranges
        def build_tuples(l):
            return [(_round(l[i - 1], 2), _round(l[i], 2)) for i in range(1, len(l))]

        index = [(date, f"{target}") for date in df.index for target in
                 build_tuples(dft.loc[date].tolist())]
    elif p.shape[1] == t.shape[1] + 1:
        # mapping of the left and right extremes using +/- inf
        def build_tuples(l):
            l = [-np.inf, *l, np.inf]
            return [(_round(l[i - 1], 2), _round(l[i], 2)) for i in range(1, len(l))]

        index = [(date, target) for date in df.index for target in
                 build_tuples(dft.loc[date]._.values.tolist())]
    else:
        raise ValueError(f"unable to match {p.shape[1]} predictions to {t.shape[1]} +/-1 targets")

    return pd.DataFrame({"prediction": p.reshape((-1,))},
                        index=pd.MultiIndex.from_tuples(index))
コード例 #3
0
def extract_features(df: pd.DataFrame, features_and_labels,
                     **kwargs) -> Tuple[List, pd.DataFrame, pd.DataFrame]:
    features = get_pandas_object(df, features_and_labels.features,
                                 **kwargs).dropna()
    targets = call_if_not_none(
        get_pandas_object(df, features_and_labels.targets, **kwargs), 'dropna')
    common_index = intersection_of_index(features, targets)

    if len(features) <= 0:
        raise ValueError("not enough data!")

    return (features_and_labels.label_columns, features.loc[common_index],
            loc_if_not_none(targets, common_index))
コード例 #4
0
def ta_stacked_bar(df, columns, figsize=None, ax=None, padding=0.02, **kwargs):
    # TODO add colors ...
    if ax is None:
        fig, ax = new_fig_ts_axis(figsize)

    if padding is not None:
        b, t = ax.get_ylim()

        if b == 0 and t == 1:
            b = np.inf
            t = -np.inf

        ax.set_ylim(min(df[columns].values.min(), b) * (1 - padding), max(df[columns].values.max(), t) * (1 + padding))

    bottom = None
    for column in columns:
        data = get_pandas_object(df, column)

        if bottom is not None:
            kwargs["bottom"] = bottom
            height = data - bottom
        else:
            height = data

        bottom = height if bottom is None else bottom + height
        ax.bar(mdates.date2num(df.index), height, **kwargs)

    return ax
コード例 #5
0
ファイル: model.py プロジェクト: seanahmad/pandas-ml-quant
    def feature_selection(self,
                          features_and_labels: FeaturesAndLabels,
                          top_features: int = 5,
                          correlation_threshold: float = 0.5,
                          minimum_features: int = 1,
                          lags: Iterable[int] = range(100),
                          show_plots: bool = True,
                          figsize: Tuple[int, int] = (12, 10)):
        # extract pandas objects
        features = get_pandas_object(self.df, features_and_labels.features)
        label = get_pandas_object(self.df, features_and_labels.labels)

        # try to estimate good features
        return feature_selection(features, label, top_features,
                                 correlation_threshold, minimum_features, lags,
                                 show_plots, figsize)
コード例 #6
0
def ta_line(df, fields, figsize=None, ax=None, **kwargs):
    data = get_pandas_object(df, fields).values

    if ax is None:
        fig, ax = new_fig_ts_axis(figsize)

    ax.plot(df.index, data, **kwargs)
    return ax
コード例 #7
0
def ta_candlestick(self, open="Open", high="High", low="Low", close="Close", ax=None, figsize=None, **kwargs):
    df = self if isinstance(self, pd.DataFrame) else self._parent

    if ax is None:
        fig, ax = new_fig_ts_axis(figsize)

    # Plot candlestick chart
    data = pd.DataFrame({
        "Date": mdates.date2num(df.index),
        "open": get_pandas_object(df, open),
        "high": get_pandas_object(df, high),
        "low": get_pandas_object(df, low),
        "close": get_pandas_object(df, close),
    })

    candlestick_ohlc(ax, data.values, width=0.6, colorup='g', colordown='r')
    return ax
コード例 #8
0
def extract_labels(df: pd.DataFrame, features_and_labels,
                   **kwargs) -> pd.DataFrame:
    labels = get_pandas_object(df, features_and_labels.labels,
                               **kwargs).dropna()

    if features_and_labels.label_type is not None:
        labels = labels.astype(features_and_labels.label_type)

    return labels
コード例 #9
0
def ta_bar(df, fields, figsize=None, ax=None, colors=None, color_map: str = 'afmhot', **kwargs):
    data = get_pandas_object(df, fields).values
    colors = get_pandas_object(df, colors)

    if ax is None:
        fig, ax = new_fig_ts_axis(figsize)

    bars = ax.bar(df.index, height=data, **kwargs)
    if colors is not None:
        color_function = plt.get_cmap(color_map)
        domain = (colors.values.min(), colors.values.max()) if isinstance(colors, PandasObject) else (colors.min(), colors.max())
        r = ReScaler(domain, (0, 1))

        for i, c in enumerate(colors):
            color = color_function(r(c))
            # TODO if alpha is provided then color = (*color[:-1], alpha)
            bars[i].set_color(color)

    return ax
コード例 #10
0
    def with_trend_lines(self,
                         field="Close",
                         panel=0,
                         edge_periods=3,
                         rescale_digits=4,
                         degrees=(-90, 90),
                         angles=30,
                         rho_digits=2):
        plt.close(self.fig)
        accumulation, lookup =\
            ta_trend_lines(get_pandas_object(self.df, field), edge_periods, rescale_digits, degrees, angles, rho_digits)

        def plot_trend_line(time, touches):
            ax = self.axis[panel]
            td = timedelta(days=time[0]), timedelta(days=time[1])

            # first remove all previous trend lines
            ax.lines = [
                l for l in ax.lines if not l.get_label().startswith(".Trend")
            ]

            # then select the lines from the lookup table
            filtered = lookup[(lookup["touch"] >= touches[0])
                              & (lookup["touch"] <= touches[1])]
            filtered = filtered[(filtered["distance"] >= td[0])
                                & (filtered["distance"] <= td[1])]

            for i, tl in filtered.iterrows():
                points = tl["points"][0], tl["points"][-1]
                ax.plot([p[0] for p in points], [p[1] for p in points],
                        label=".Trend")

            return self.fig

        #  TODO later add a wg.IntSlider to extend the trend lines from ots last point
        min_ts, max_ts = 2, len(self.df)
        time_silder = wg.IntRangeSlider(value=[max_ts, max_ts],
                                        min=min_ts,
                                        max=max_ts,
                                        step=1,
                                        continuous_update=False,
                                        description='Period:')

        min_to, max_to = 2, lookup["touch"].max()
        touch_silder = wg.IntRangeSlider(value=[min_to, max_to],
                                         min=min_to,
                                         max=max_to,
                                         step=1,
                                         continuous_update=False,
                                         description='Touches:')

        wg.interact(plot_trend_line, time=time_silder, touches=touch_silder)
        self.fig.show()
        return self
コード例 #11
0
def extract_feature_labels_weights(df: Typing.PatchedDataFrame,
                                   features_and_labels,
                                   **kwargs) -> FeaturesWithLabels:
    features, targets, latent = extract_features(df, features_and_labels,
                                                 **kwargs)
    labels = extract_labels(df, features_and_labels, **kwargs)
    sample_weights = call_if_not_none(
        get_pandas_object(df, features_and_labels.sample_weights, **kwargs),
        'dropna')
    gross_loss = call_if_not_none(
        get_pandas_object(df, features_and_labels.gross_loss, **kwargs),
        'dropna')

    # do some sanity check for any non numeric values in any of the data frames
    for frame in [features, labels, targets, sample_weights, gross_loss]:
        if frame is not None:
            # we could have nested arrays so we need to use the un-nested values
            values = flatten_nested_list(frame._.values, np.max)
            max_value = max([v.max() for v in values])

            if np.isscalar(max_value) and np.isinf(max_value):
                _log.warning(
                    f"features containing infinit number\n"
                    f"{frame[frame.apply(lambda r: np.isinf(r.values).any(), axis=1)]}"
                )
                frame.replace([np.inf, -np.inf], np.nan, inplace=True)
                frame.dropna(inplace=True)

    # now get the common index and return the filtered data frames
    common_index = intersection_of_index(features, labels, targets,
                                         sample_weights, gross_loss)

    return FeaturesWithLabels(
        FeaturesWithRequiredSamples(
            tuple([f.loc[common_index] for f in features]) if isinstance(
                features, tuple) else features.loc[common_index],
            len(df) - len(features) + 1, len(features.columns)),
        labels.loc[common_index], loc_if_not_none(latent, common_index),
        loc_if_not_none(targets, common_index),
        loc_if_not_none(sample_weights, common_index),
        loc_if_not_none(gross_loss, common_index))
コード例 #12
0
def ta_matrix(df, fields, figsize=None, ax=None, **kwargs):
    data = fields if isinstance(fields, np.ndarray) else (get_pandas_object(
        df, fields).ml.values.squeeze())

    if ax is None:
        fig, ax = new_fig_ts_axis(figsize)

    ax.matshow(data)
    ax.set_yticklabels([])
    ax.set_xticklabels([])

    return ax
コード例 #13
0
def extract_features(df: pd.DataFrame, features_and_labels,
                     **kwargs) -> Tuple[List, pd.DataFrame, pd.DataFrame]:
    if isinstance(features_and_labels.features, tuple):
        # allow multiple feature sets i.e. for multi input layered networks
        features = MultiFrameDecorator([
            get_pandas_object(df, f, **kwargs).dropna()
            for f in features_and_labels.features
        ], True)
    else:
        features = get_pandas_object(df, features_and_labels.features,
                                     **kwargs).dropna()

    targets = call_if_not_none(
        get_pandas_object(df, features_and_labels.targets, **kwargs), 'dropna')
    common_index = intersection_of_index(features, targets)

    if len(features) <= 0:
        raise ValueError("not enough data!")

    return (features_and_labels.label_columns, features.loc[common_index],
            loc_if_not_none(targets, common_index))
コード例 #14
0
def extract_feature_labels_weights(
    df: Typing.PatchedDataFrame, features_and_labels, **kwargs
) -> Tuple[Tuple[pd.DataFrame, int], pd.DataFrame, pd.DataFrame, pd.Series,
           pd.Series]:
    _, features, targets = extract_features(df, features_and_labels, **kwargs)
    labels = get_pandas_object(df, features_and_labels.labels,
                               **kwargs).dropna()
    sample_weights = call_if_not_none(
        get_pandas_object(df, features_and_labels.sample_weights, **kwargs),
        'dropna')
    gross_loss = call_if_not_none(
        get_pandas_object(df, features_and_labels.gross_loss, **kwargs),
        'dropna')

    if features_and_labels.label_type is not None:
        labels = labels.astype(features_and_labels.label_type)

    # do some sanity check for any non numeric values in any of the data frames
    for frame in [features, labels, targets, sample_weights, gross_loss]:
        if frame is not None:
            max = frame._.max()

            if np.isscalar(max) and np.isinf(max):
                _log.warning(
                    "features containing infinit number\n",
                    frame[frame.apply(lambda r: np.isinf(r.values).any(),
                                      axis=1)])
                frame.replace([np.inf, -np.inf], np.nan, inplace=True)
                frame.dropna(inplace=True)

    # now get the common index and return the filtered data frames
    common_index = intersection_of_index(features, labels, targets,
                                         sample_weights, gross_loss)

    return ((tuple([f.loc[common_index] for f in features])
             if isinstance(features, tuple) else features.loc[common_index],
             len(df) - len(features) + 1), labels.loc[common_index],
            loc_if_not_none(targets, common_index),
            loc_if_not_none(sample_weights, common_index),
            loc_if_not_none(gross_loss, common_index))
コード例 #15
0
def extract_feature_labels_weights(
    df: Typing.PatchedDataFrame, features_and_labels, **kwargs
) -> Tuple[Tuple[pd.DataFrame, int], pd.DataFrame, pd.DataFrame, pd.Series,
           pd.Series]:
    features = get_pandas_object(df, features_and_labels.features,
                                 **kwargs).dropna()
    labels = get_pandas_object(df, features_and_labels.labels,
                               **kwargs).dropna()
    targets = call_if_not_none(
        get_pandas_object(df, features_and_labels.targets, **kwargs), 'dropna')
    sample_weights = call_if_not_none(
        get_pandas_object(df, features_and_labels.sample_weights, **kwargs),
        'dropna')
    gross_loss = call_if_not_none(
        get_pandas_object(df, features_and_labels.gross_loss, **kwargs),
        'dropna')

    if features_and_labels.label_type is not None:
        labels = labels.astype(features_and_labels.label_type)

    for frame in [features, labels, targets, sample_weights, gross_loss]:
        if frame is not None:
            max = frame._.values.max()

            if np.isscalar(max) and np.isinf(max):
                _log.warning(
                    "features containing infinit number\n",
                    frame[frame.apply(lambda r: np.isinf(r.values).any(),
                                      axis=1)])
                frame.replace([np.inf, -np.inf], np.nan, inplace=True)
                frame.dropna(inplace=True)

    common_index = intersection_of_index(features, labels, targets,
                                         sample_weights, gross_loss)

    return ((features.loc[common_index], len(df) - len(features) + 1),
            labels.loc[common_index], loc_if_not_none(targets, common_index),
            loc_if_not_none(sample_weights, common_index),
            loc_if_not_none(gross_loss, common_index))
コード例 #16
0
 def extractor(df, **kwargs):
     return get_pandas_object(
         get_pandas_object(df, selectors, **kwargs), postprocessor,
         **kwargs)
コード例 #17
0
 def extractor(df, **kwargs):
     extraction = get_pandas_object(df, list, **kwargs)
     return get_pandas_object(extraction, postprocessor, **kwargs)