def ta_backtest(signal: Typing.PatchedDataFrame, prices: Typing.PatchedPandas, action: Callable[[pd.Series], Tuple[int, float]], slippage: Callable[[float], float] = lambda _: 0): if has_indexed_columns(signal): assert len(signal.columns) == len( prices.columns), "Signal and Prices need the same shape!" res = pd.DataFrame({}, index=signal.index, columns=pd.MultiIndex.from_product([[], []])) for i in range(len(signal.columns)): df = ta_backtest(signal[signal.columns[i]], prices[prices.columns[i]], action, slippage) top_level_name = ",".join(prices.columns[i]) if isinstance( prices.columns[i], tuple) else prices.columns[i] df.columns = pd.MultiIndex.from_product([[top_level_name], df.columns.to_list()]) res = res.join(df) return res assert isinstance(prices, pd.Series), "prices need to be a series!" trades = StreamingTransactionLog() def trade_log_action(row): direction_amount = action(row) if isinstance(direction_amount, tuple): trades.perform_action(*direction_amount) else: trades.rebalance(float(direction_amount)) signal.to_frame().apply(trade_log_action, axis=1, raw=True) return trades.evaluate(prices.rename("price"), slippage)
def ta_naive_edge_detect(df: _t.PatchedDataFrame, period=3): assert period % 2 > 0, "only odd periods are allowed" center = (period - 1) // 2 return df.rolling(period, center=True)\ .apply(lambda s: 1 if (s[0] < s[center] > s[-1]) else -1 if (s[0] > s[center] < s[-1]) else 0, raw=True)\ .rename(f'{df.name}_edge_naive_{period}')
def __init__(self, df: Typing.PatchedDataFrame, model: Model, rebalancing_lag: int = 1, rebalance_after_distance: float = 0, rebalance_after_draw_down: float = None, rebalance_fee: Callable[[float], float] = lambda _: 0, price_column: Any = 'Close', plot_log_base=None, **kwargs): super().__init__(df.sort_index(), model, self._plot_portfolio_and_weights, self._show_risk_metrics, layout=[[0, 0, 1]], **kwargs) assert TARGET_COLUMN_NAME in df, "Target Prices need to be provided via FeaturesAndLabels.target" self.rebalancing_lag = rebalancing_lag self.rebalance_after_distance = rebalance_after_distance self.rebalance_after_draw_down = rebalance_after_draw_down self.rebalance_fee = rebalance_fee self.price_column = price_column self.plot_log_base = plot_log_base self.portfolio = self.construct_portfolio() self.weight_distances = np.linalg.norm( df[PREDICTION_COLUMN_NAME].values - np.roll(df[PREDICTION_COLUMN_NAME].values, 1), axis=1)
def ta_normalize_row(df: Typing.PatchedDataFrame, normalizer: str = "uniform", level=None): # normalizer can be one of minmax01, minmax-11, uniform, standard or callable if isinstance(df.columns, pd.MultiIndex) and level is not None: return for_each_top_level_column(ta_normalize_row, level=level)(df, normalizer) else: def scaler(row): values = unpack_nested_arrays(row, split_multi_index_rows=False) values_2d = values.reshape(-1, 1) if normalizer == 'minmax01': return MinMaxScaler().fit(values_2d).transform( values_2d).reshape(values.shape) elif normalizer == 'minmax-11': return MinMaxScaler(feature_range=( -1, 1)).fit(values_2d).transform(values_2d).reshape( values.shape) elif normalizer == 'standard': # (value - mean) / std return values - values.mean() / np.std(values) elif normalizer == 'uniform': return ecdf(values_2d).reshape(values.shape) elif callable(normalizer): return normalizer(row) else: raise ValueError( 'unknown normalizer need to one of: [minmax01, minmax-11, uniform, standard, callable(r)]' ) return df.apply(scaler, axis=1, result_type='broadcast')
def fit(self, df: Typing.PatchedDataFrame, **kwargs): _log.info(f"fitting submodel: {self.name}") with df.model() as m: fit = m.fit(self.model, **kwargs) self.model = fit.model _log.info(f"fitted submodel: {fit}") return self.predict(df, **kwargs)
def __init__(self, weights: Typing.PatchedDataFrame, prices: Typing.PatchedDataFrame, rebalance_threshold=0.01): super().__init__() self.rebalance_threshold = rebalance_threshold self.prices = prices self.raw_weights = weights self.delta_weights = weights - weights.shift(1).fillna(0) self.rebalancing = self._rebalancing(rebalance_threshold) self.weights = self._smooth_weights( ) if rebalance_threshold is not None else weights self.fractions = self._calculate_fractions() self.portfolio_return = self._evaluate_return() self.performance = (self.portfolio_return + 1).cumprod()
def __init__(self, df: Typing.PatchedDataFrame, clip_profit_at=0, classes=None, **kwargs): super().__init__(df) self.clip_profit_at = clip_profit_at self.targets = df[TARGET_COLUMN_NAME] # calculate confusion indices truth, prediction = self._fix_label_prediction_representation() distinct_values = len({*truth.reshape( (-1, ))}) if classes is None else classes cm = empty_lists((distinct_values, distinct_values)) for i, (t, p) in enumerate(zip(truth, prediction)): cm[int(t), int(p)].append(self.df.index[i]) self.confusion_indices = cm # we can calculate the gross loss from the predicted band and the true price, # therefore we need to pass the true price as gross loss such that we calculate the real loss self.df_gross_loss = pd.DataFrame( { "bucket": df[[TARGET_COLUMN_NAME]].apply(get_buckets, axis=1, raw=True), "pidx": df.apply( lambda r: int(r[PREDICTION_COLUMN_NAME]._.values.argmax()), axis=1, raw=False), "price": df[GROSS_LOSS_COLUMN_NAME].values[:, 0] }, index=df.index) # find target for predicted value mid = self.targets.shape[1] / 2.0 self.df_gross_loss["loss"] = self.df_gross_loss.apply( lambda r: (r["price"] - r["bucket"][r["pidx"]][0]) if r["pidx"] <= mid else (r["bucket"][r["pidx"]][1] - r["price"]), axis=1, raw=False).fillna(0)
def ta_delta_hedged_price(df: Typing.PatchedDataFrame, benchmark): df_bench = get_pandas_object(df, benchmark) idx = intersection_of_index(df, df_bench) df = df.loc[idx] df_bench = df_bench.loc[idx] if hasattr(df, "columns") and not isinstance( benchmark, Typing.AnyPandasObject) and benchmark in df.columns: df = df.drop(benchmark, axis=1) bench_returns = ta_log_returns(df_bench) if df.ndim > 1: bench_returns = np.repeat(bench_returns.values.reshape(-1, 1), df.shape[1], axis=1) delta_hedged = ta_log_returns(df) - bench_returns return np.exp(delta_hedged.cumsum())
def __init__( self, df: Typing.PatchedDataFrame, model: Model, label_returns: Callable[[pd.DataFrame], pd.DataFrame], label_reconstruction: Callable[[pd.DataFrame], pd.DataFrame], sampler: Callable[[pd.Series], float] = lambda params, samples: np.random.normal(*params.values, samples), confidence: Union[float, Tuple[float, float]] = 0.8, forecast_period: int = 1, samples: int = 1000, bins='sqrt', figsize=(16, 16), **kwargs): super().__init__( df.sort_index(), model, self.plot_prediction, self.calc_scores, self.plot_tail_events, layout=[[0, 0], [1, 2]], **kwargs ) self.label_returns = call_callable_dynamic_args(label_returns, y=df[LABEL_COLUMN_NAME], df=df) self.label_reconstruction = call_callable_dynamic_args(label_reconstruction, y=self.label_returns, df=df) self.price_at_estimation = df[TARGET_COLUMN_NAME] if TARGET_COLUMN_NAME in df.columns else None self.sampler = sampler self.figsize = figsize self.forecast_period = forecast_period self.nr_samples = samples self.bins = bins # 0.8 => 0.1, 0.9 self.left_confidence, self.right_confidence = \ confidence if isinstance(confidence, Iterable) else ((1. - confidence) / 2, (1. - confidence) / 2 + confidence) self.expected_confidence = self.right_confidence - self.left_confidence self.cdf = self._estimate_ecdf()
def __init__( self, df: Typing.PatchedDataFrame, model: Model, label_returns: Callable[[pd.DataFrame], pd.DataFrame], label_reconstruction: Callable[[pd.DataFrame], pd.DataFrame], predicted_returns: Callable[[pd.DataFrame], pd.DataFrame], predicted_reconstruction: Callable[[pd.DataFrame], pd.DataFrame], predicted_std: Callable[[pd.DataFrame], pd.DataFrame], confidence: Union[float, Tuple[float, float]] = 0.95, figsize=(16, 16), **kwargs): super().__init__( df.sort_index(), model, self.plot_prediction, self.calc_scores, layout=[[0], [1]], **kwargs ) self.figsize = figsize self.label_returns = call_callable_dynamic_args(label_returns, y=df[LABEL_COLUMN_NAME], df=df) self.label_reconstruction = call_callable_dynamic_args(label_reconstruction, y=self.label_returns, df=df) self.predicted_returns = call_callable_dynamic_args(predicted_returns, y_hat=df[PREDICTION_COLUMN_NAME], df=df) self.prediction_reconstruction = call_callable_dynamic_args(predicted_reconstruction, y_hat=self.predicted_returns, df=df, y=self.label_reconstruction) # confidence intervals self.expected_confidence = np.sum(confidence) self.normal_confidence = NormalConfidence(confidence) self.predicted_std = call_callable_dynamic_args(predicted_std, y=self.label_returns, y_hat=self.predicted_returns, df=df) if isinstance(self.predicted_std, float): self.predicted_std = pd.Series(np.ones(len(self.predicted_returns)) * self.predicted_std, index=self.predicted_returns.index) self.lower = pd.concat([self.predicted_returns, self.predicted_std], join='inner', axis=1).apply(self.normal_confidence.lower, axis=1) self.upper = pd.concat([self.predicted_returns, self.predicted_std], join='inner', axis=1).apply(self.normal_confidence.upper, axis=1)