def test_dummy_generator(self): features = pd.DataFrame({"a": np.arange(10)}) targets = pd.DataFrame({"a": np.arange(10)}) one = DataGenerator(DummySplitter(1), features, targets).complete_samples() two = DataGenerator(DummySplitter(2), features, targets).complete_samples() sampled1 = [s for s in one.sample()] sampled2 = [s for s in two.sample()] self.assertEqual(1, len(sampled1)) self.assertEqual(2, len(sampled2))
def test_wrapping_cross_validation(self): features = pd.DataFrame({"a": np.arange(10)}) labels = pd.DataFrame({ "a": np.arange(10), "b": np.arange(10) + 10, "c": np.arange(10) + 20, "d": np.arange(10) + 30, }) s = DataGenerator(DummySplitter(2), features, labels, None, None, None).train_test_sampler() s1 = ExtractMultiMultiModelSampler(0, 2, s) s2 = ExtractMultiMultiModelSampler(1, 2, s) train, test = next(s.sample()) y = train[1] self.assertEqual(4, y.shape[1]) self.assertEqual(780, y.sum()) train, test = next(s1.sample()) y = train[1] self.assertEqual(2, y.shape[1]) self.assertEqual(190, y.sum()) train, test = next(s2.sample()) y = train[1] self.assertEqual(2, y.shape[1]) self.assertEqual(590, y.sum()) for s in s1.sample(): print("----") print(s[0][0]) self.assertEqual(20, sum([len(s[0][0]) for s in s1.sample()])) self.assertEqual(20, sum([len(s[0][0]) for s in s2.sample()]))
def predict(df: pd.DataFrame, model: Model, tail: int = None, samples: int = 1, **kwargs) -> pd.DataFrame: min_required_samples = model.features_and_labels.min_required_samples if tail is not None: if min_required_samples is not None: # just use the tail for feature engineering df = df[-(abs(tail) + (min_required_samples - 1)):] else: _log.warning( "could not determine the minimum required data from the model") kwargs = merge_kwargs(model.features_and_labels.kwargs, model.kwargs, kwargs) columns, features, targets = extract(model.features_and_labels, df, extract_features, **kwargs) if samples > 1: print(f"draw {samples} samples") sampler = DataGenerator(DummySplitter(samples), features, None, targets, None).complete_samples() predictions = model.predict(sampler, **kwargs) y_hat = to_pandas(predictions, index=features.index, columns=columns) return _assemble_result_frame(targets, y_hat, None, None, None, features)
def backtest(df: pd.DataFrame, model: Model, summary_provider: Callable[[pd.DataFrame], Summary] = None, **kwargs) -> Summary: kwargs = merge_kwargs(model.features_and_labels.kwargs, model.kwargs, kwargs) (features, _), labels, targets, weights, gross_loss =\ extract(model.features_and_labels, df, extract_feature_labels_weights, **kwargs) sampler = DataGenerator(DummySplitter(1), features, labels, targets, None).complete_samples() predictions = model.predict(sampler, **kwargs) y_hat = to_pandas(predictions, index=features.index, columns=labels.columns) df_backtest = _assemble_result_frame(targets, y_hat, labels, gross_loss, weights, features) return (summary_provider or model.summary_provider)(df_backtest, model, **kwargs)