Beispiel #1
0
def ta_normalize_row(df: Typing.PatchedDataFrame,
                     normalizer: str = "uniform",
                     level=None):
    # normalizer can be one of minmax01, minmax-11, uniform, standard or callable
    if isinstance(df.columns, pd.MultiIndex) and level is not None:
        return for_each_top_level_column(ta_normalize_row,
                                         level=level)(df, normalizer)
    else:

        def scaler(row):
            values = unpack_nested_arrays(row, split_multi_index_rows=False)
            values_2d = values.reshape(-1, 1)

            if normalizer == 'minmax01':
                return MinMaxScaler().fit(values_2d).transform(
                    values_2d).reshape(values.shape)
            elif normalizer == 'minmax-11':
                return MinMaxScaler(feature_range=(
                    -1, 1)).fit(values_2d).transform(values_2d).reshape(
                        values.shape)
            elif normalizer == 'standard':
                # (value - mean) / std
                return values - values.mean() / np.std(values)
            elif normalizer == 'uniform':
                return ecdf(values_2d).reshape(values.shape)
            elif callable(normalizer):
                return normalizer(row)
            else:
                raise ValueError(
                    'unknown normalizer need to one of: [minmax01, minmax-11, uniform, standard, callable(r)]'
                )

        return df.apply(scaler, axis=1, result_type='broadcast')
Beispiel #2
0
    def __init__(self,
                 df: Typing.PatchedDataFrame,
                 clip_profit_at=0,
                 classes=None,
                 **kwargs):
        super().__init__(df)
        self.clip_profit_at = clip_profit_at
        self.targets = df[TARGET_COLUMN_NAME]

        # calculate confusion indices
        truth, prediction = self._fix_label_prediction_representation()
        distinct_values = len({*truth.reshape(
            (-1, ))}) if classes is None else classes
        cm = empty_lists((distinct_values, distinct_values))

        for i, (t, p) in enumerate(zip(truth, prediction)):
            cm[int(t), int(p)].append(self.df.index[i])

        self.confusion_indices = cm

        # we can calculate the gross loss from the predicted band and the true price,
        #  therefore we need to pass the true price as gross loss such that we calculate the real loss
        self.df_gross_loss = pd.DataFrame(
            {
                "bucket":
                df[[TARGET_COLUMN_NAME]].apply(get_buckets, axis=1, raw=True),
                "pidx":
                df.apply(
                    lambda r: int(r[PREDICTION_COLUMN_NAME]._.values.argmax()),
                    axis=1,
                    raw=False),
                "price":
                df[GROSS_LOSS_COLUMN_NAME].values[:, 0]
            },
            index=df.index)

        # find target for predicted value
        mid = self.targets.shape[1] / 2.0
        self.df_gross_loss["loss"] = self.df_gross_loss.apply(
            lambda r: (r["price"] - r["bucket"][r["pidx"]][0])
            if r["pidx"] <= mid else (r["bucket"][r["pidx"]][1] - r["price"]),
            axis=1,
            raw=False).fillna(0)