def ta_normalize_row(df: Typing.PatchedDataFrame, normalizer: str = "uniform", level=None): # normalizer can be one of minmax01, minmax-11, uniform, standard or callable if isinstance(df.columns, pd.MultiIndex) and level is not None: return for_each_top_level_column(ta_normalize_row, level=level)(df, normalizer) else: def scaler(row): values = unpack_nested_arrays(row, split_multi_index_rows=False) values_2d = values.reshape(-1, 1) if normalizer == 'minmax01': return MinMaxScaler().fit(values_2d).transform( values_2d).reshape(values.shape) elif normalizer == 'minmax-11': return MinMaxScaler(feature_range=( -1, 1)).fit(values_2d).transform(values_2d).reshape( values.shape) elif normalizer == 'standard': # (value - mean) / std return values - values.mean() / np.std(values) elif normalizer == 'uniform': return ecdf(values_2d).reshape(values.shape) elif callable(normalizer): return normalizer(row) else: raise ValueError( 'unknown normalizer need to one of: [minmax01, minmax-11, uniform, standard, callable(r)]' ) return df.apply(scaler, axis=1, result_type='broadcast')
def __init__(self, df: Typing.PatchedDataFrame, clip_profit_at=0, classes=None, **kwargs): super().__init__(df) self.clip_profit_at = clip_profit_at self.targets = df[TARGET_COLUMN_NAME] # calculate confusion indices truth, prediction = self._fix_label_prediction_representation() distinct_values = len({*truth.reshape( (-1, ))}) if classes is None else classes cm = empty_lists((distinct_values, distinct_values)) for i, (t, p) in enumerate(zip(truth, prediction)): cm[int(t), int(p)].append(self.df.index[i]) self.confusion_indices = cm # we can calculate the gross loss from the predicted band and the true price, # therefore we need to pass the true price as gross loss such that we calculate the real loss self.df_gross_loss = pd.DataFrame( { "bucket": df[[TARGET_COLUMN_NAME]].apply(get_buckets, axis=1, raw=True), "pidx": df.apply( lambda r: int(r[PREDICTION_COLUMN_NAME]._.values.argmax()), axis=1, raw=False), "price": df[GROSS_LOSS_COLUMN_NAME].values[:, 0] }, index=df.index) # find target for predicted value mid = self.targets.shape[1] / 2.0 self.df_gross_loss["loss"] = self.df_gross_loss.apply( lambda r: (r["price"] - r["bucket"][r["pidx"]][0]) if r["pidx"] <= mid else (r["bucket"][r["pidx"]][1] - r["price"]), axis=1, raw=False).fillna(0)