def test_nested_postprocessors(self): df = pd.DataFrame({ "a": np.arange(20), "b": np.arange(20), "c": np.arange(20) }) def outer_postprocessor(df): return df[["b"]] fl: FeaturesWithLabels = df._.extract( PostProcessedFeaturesAndLabels.from_features_and_labels( PostProcessedFeaturesAndLabels( features=["a"], feature_post_processor=[ lambda df: lag_columns(df, [1, 2]) ], labels=["b", "c"], labels_post_processor=[ lambda df: df + 0.5, lambda df: df + 0.001 ], ), labels_post_processor=[outer_postprocessor], )) self.assertEqual((18, 1), fl.labels.shape) self.assertEqual(19.501, fl.labels.iloc[-1, -1])
def test_post_row_standardisation(self): df = DF_TEST.copy() fl: FeaturesWithLabels = df._.extract( PostProcessedFeaturesAndLabels( features=[ lambda df: df["Close"].ta.log_returns(), lambda df: df["Close"].ta.trix(), lambda df: df["Close"].ta.rsi(), ], feature_post_processor=[ lambda df: df.ta.rnn(20), lambda df: df.ta.normalize_row('minmax01', level=1) ], labels=[Constant(0)], )) f = fl.features_with_required_samples.features self.assertAlmostEqual(1, f.max(axis=1).values.max()) self.assertAlmostEqual(0, f.min(axis=1).values.max()) self.assertEqual( { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59 }, set(f.apply(np.argmax, axis=1).values))
def test_post_processing(self): df = pd.DataFrame({"a": np.arange(20), "b": np.arange(20)}) fl: FeaturesWithLabels = df._.extract( PostProcessedFeaturesAndLabels( features=["a"], feature_post_processor=lambda df: df * 2, labels=["b"], labels_post_processor=lambda df: df.loc[df["b"] % 2 == 0])) self.assertEqual(len(fl.features), 10) self.assertEqual(len(fl.labels), 10) self.assertEqual(fl.labels.values.sum().item(), 90)
def test_post_processing_multiindex_row(self): df = pd.DataFrame({"a": np.arange(20), "b": np.arange(20)}) df.index = pd.MultiIndex.from_product([["A", "B"], range(10)]) fl: FeaturesWithLabels = df._.extract( PostProcessedFeaturesAndLabels( features=["a"], feature_post_processor=lambda df: lag_columns(df, [1, 2]), labels=["b"], labels_post_processor=lambda df: df * 2, )) self.assertIsInstance(fl.features.index, pd.MultiIndex) self.assertIsInstance(fl.labels.index, pd.MultiIndex)
def test_repr_post_processed_features_labels(self): fnl = PostProcessedFeaturesAndLabels( features=[ "Close", lambda df: df["Close"].ta.trix(), ], feature_post_processor=[lambda df: df.ta.rnn(5)], labels=[Constant(0)], targets=[lambda df: df["Close"]], ) print(repr(repr(fnl).replace('"', '\"'))) self.assertEqual( 'PostProcessedFeaturesAndLabels(\t[\'Close\', \' lambda df: df["Close"].ta.trix(),\\n\'], \t[\' lambda df: df.ta.rnn(5)\\n\'], \t[\'Constant(0)\'], \tNone, \tNone, \tNone, \tNone, \tNone, \tNone, \tNone, \t[\' lambda df: df["Close"]\\n\'], \tNone, \tNone, \t{})', repr(fnl))
def test_post_processor_with_multi_frame_decorator(self): df = pd.DataFrame({ "a": np.arange(20), "b": np.arange(20), "c": np.arange(20) }) fl: FeaturesWithLabels = df._.extract( PostProcessedFeaturesAndLabels(features=(["a"], ["b"]), feature_post_processor=([ lambda df: df - 1 ], [lambda df: df + 2]), labels=["c"])) self.assertIsInstance(fl.features, MultiFrameDecorator) self.assertListEqual([18, 21], fl.features.as_joined_frame().iloc[-1].to_list())
def test_feature_post_processing(self): df = DF_TEST.copy() fl: FeaturesWithLabels = df._.extract( PostProcessedFeaturesAndLabels( features=[ "Close", lambda df: df["Close"].ta.trix(), ], feature_post_processor=[lambda df: df.ta.rnn(5)], labels=[Constant(0)], targets=[lambda df: df["Close"]], )) self.assertEqual((6674, 10), fl.features_with_required_samples.features.shape) self.assertEqual((6674, 1), fl.labels.shape) self.assertEqual( (6674, 5, 2), fl.features_with_required_samples.features._.values.shape)
def test_feature_post_processing_pipeline(self): df = DF_TEST.copy() fl: FeaturesWithLabels = df._.extract( PostProcessedFeaturesAndLabels( features=[ lambda df: df._["Close"].ta.log_returns() ], feature_post_processor=[ lambda df: df.flatten_columns(), lambda df: df.ta.rnn(2), lambda df: df.ta.normalize_row(normalizer='uniform'), ], labels=[ lambda df: df._["Close"].ta.log_returns().shift(-1) ] ) ) self.assertEqual((6760, 2, 1), fl.features_with_required_samples.features._.values.shape) np.testing.assert_array_almost_equal(fl.features_with_required_samples.features[-1:].values, np.array([[1.5, 1.0]]))
def test_soft_dtw_loss(self): df = TEST_DF[["Close"]][-21:].copy() class LstmAutoEncoder(PytorchNN): def __init__(self): super().__init__() self.input_size = 1 self.seq_size = 10 self.hidden_size = 2 self.num_layers = 1 self.num_directions = 1 self._encoder =\ nn.RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=True) self._decoder =\ nn.RNN(input_size=self.hidden_size, hidden_size=self.input_size, num_layers=self.num_layers, batch_first=True) def forward_training(self, x): # make sure to treat single elements as batches x = x.view(-1, self.seq_size, self.input_size) batch_size = len(x) hidden_encoder = nn.Parameter( t.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size)) hidden_decoder = nn.Parameter( t.zeros(self.num_layers * self.num_directions, batch_size, self.input_size)) x, _ = self._encoder(x, hidden_encoder) x = t.repeat_interleave(x[:, -2:-1], x.shape[1], dim=1) x, hidden = self._decoder(x, hidden_decoder) return x.squeeze() def encode(self, x): x = x.reshape(-1, self.seq_size, self.input_size) batch_size = len(x) with t.no_grad(): hidden = nn.Parameter( t.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size)) # return last element of sequence return self._encoder(x, hidden)[0][:, -1] def decode(self, x): x = x.reshape(-1, self.seq_size, self.hidden_size) batch_size = len(x) with t.no_grad(): hidden = nn.Parameter( t.zeros(self.num_layers * self.num_directions, batch_size, self.input_size)) return self._decoder(x.float(), hidden)[0] model = PytorchAutoEncoderModel( LstmAutoEncoder, PostProcessedFeaturesAndLabels( df.columns.to_list(), [lambda df: lag_columns(df, 10).dropna()], df.columns.to_list(), [lambda df: lag_columns(df, 10).dropna()], ["condensed-a", "condensed-b"]), SoftDTW, Adam) with df.model() as m: fit = m.fit(model, FittingParameter(epochs=100)) print(fit.test_summary.df) encoded = df.model.predict(fit.model.as_encoder()) print(encoded)
def test_probabilistic_model_with_callback(self): try: pandas_ml_quant_data_provider = importlib.import_module( "pandas_ml_quant") from pandas_ml_quant import PricePredictionSummary from pandas_ml_quant.model.summary.price_prediction_summary import PriceSampledSummary except: print("pandas_ml_quant not found, skipping!") return df = pd.DataFrame({ "Returns": np.random.normal(-0.02, 0.03, 500) + np.random.normal(0.03, 0.02, 500) }) fl = PostProcessedFeaturesAndLabels( features=["Returns"], feature_post_processor=lambda df: df.ta.rnn(20), labels=[ lambda df: df["Returns"].shift(-1).rename("Future_Returns") ], targets=lambda df: (1 + df["Returns"]).cumprod().rename("Close")) model_factory = PytorchNNFactory.create( nn.Sequential( nn.Linear(20, 10), nn.Tanh(), nn.Linear(10, 6), LambdaSplitter( lambda x: T.softmax(x[..., :2], dim=1), lambda x: T.exp(x[..., 2:4]), # enforce one mean positive and the other negativ lambda x: T.cat([T.exp(x[..., 4:5]), -T.exp(x[..., 5:6])], dim=1), )), predictor=lambda n, i: T.cat(n(i), dim=1), trainer=lambda n, i: n(i)) def dist(probs, scales, locs): return MixtureSameFamily(Categorical(probs=probs), Normal(loc=locs, scale=scales)) def loss(y_pred): probs, scales, locs = y_pred return dist(probs, scales, locs) def cdf_cb(arg): probs, scales, locs = arg[..., :2], arg[..., 2:4], arg[..., 4:6] return dist(probs, scales, locs) summary_provider = PriceSampledSummary.with_reconstructor( sampler=wrap_applyable(lambda params, samples: cdf_cb(params). sample([int(samples.item())]), nr_args=2), samples=100, confidence=0.8) model = PytorchModel(module_provider=model_factory, features_and_labels=fl, criterion_provider=lambda: DistributionNLL( loss, penalize_toal_variance_lambda=1.1), optimizer_provider=Adam, summary_provider=summary_provider) fit = df.model.fit( model, FittingParameter(epochs=10, batch_size=6, splitter=naive_splitter(0.25)), #verbose=1, callbacks=[ TestConfidenceInterval( TestConfidenceInterval.CdfConfidenceInterval( wrap_applyable( lambda params, val: cdf_cb(params).cdf(val), nr_args=2), interval=0.8), wrap_applyable(lambda params: cdf_cb(params).variance), early_stopping=True) ]) print(fit.test_summary.calc_scores())