Exemple #1
0
    def test_dummy_generator(self):
        features = pd.DataFrame({"a": np.arange(10)})
        targets = pd.DataFrame({"a": np.arange(10)})

        one = DataGenerator(DummySplitter(1), features,
                            targets).complete_samples()
        two = DataGenerator(DummySplitter(2), features,
                            targets).complete_samples()

        sampled1 = [s for s in one.sample()]
        sampled2 = [s for s in two.sample()]

        self.assertEqual(1, len(sampled1))
        self.assertEqual(2, len(sampled2))
Exemple #2
0
    def test_wrapping_cross_validation(self):
        features = pd.DataFrame({"a": np.arange(10)})
        labels = pd.DataFrame({
            "a": np.arange(10),
            "b": np.arange(10) + 10,
            "c": np.arange(10) + 20,
            "d": np.arange(10) + 30,
        })

        s = DataGenerator(DummySplitter(2), features, labels, None, None,
                          None).train_test_sampler()
        s1 = ExtractMultiMultiModelSampler(0, 2, s)
        s2 = ExtractMultiMultiModelSampler(1, 2, s)

        train, test = next(s.sample())
        y = train[1]
        self.assertEqual(4, y.shape[1])
        self.assertEqual(780, y.sum())

        train, test = next(s1.sample())
        y = train[1]
        self.assertEqual(2, y.shape[1])
        self.assertEqual(190, y.sum())

        train, test = next(s2.sample())
        y = train[1]
        self.assertEqual(2, y.shape[1])
        self.assertEqual(590, y.sum())

        for s in s1.sample():
            print("----")
            print(s[0][0])

        self.assertEqual(20, sum([len(s[0][0]) for s in s1.sample()]))
        self.assertEqual(20, sum([len(s[0][0]) for s in s2.sample()]))
Exemple #3
0
def predict(df: pd.DataFrame,
            model: Model,
            tail: int = None,
            samples: int = 1,
            **kwargs) -> pd.DataFrame:
    min_required_samples = model.features_and_labels.min_required_samples

    if tail is not None:
        if min_required_samples is not None:
            # just use the tail for feature engineering
            df = df[-(abs(tail) + (min_required_samples - 1)):]
        else:
            _log.warning(
                "could not determine the minimum required data from the model")

    kwargs = merge_kwargs(model.features_and_labels.kwargs, model.kwargs,
                          kwargs)
    columns, features, targets = extract(model.features_and_labels, df,
                                         extract_features, **kwargs)

    if samples > 1:
        print(f"draw {samples} samples")

    sampler = DataGenerator(DummySplitter(samples), features, None, targets,
                            None).complete_samples()
    predictions = model.predict(sampler, **kwargs)

    y_hat = to_pandas(predictions, index=features.index, columns=columns)
    return _assemble_result_frame(targets, y_hat, None, None, None, features)
Exemple #4
0
def backtest(df: pd.DataFrame, model: Model, summary_provider: Callable[[pd.DataFrame], Summary] = None, **kwargs) -> Summary:
    kwargs = merge_kwargs(model.features_and_labels.kwargs, model.kwargs, kwargs)
    (features, _), labels, targets, weights, gross_loss =\
        extract(model.features_and_labels, df, extract_feature_labels_weights, **kwargs)

    sampler = DataGenerator(DummySplitter(1), features, labels, targets, None).complete_samples()
    predictions = model.predict(sampler, **kwargs)

    y_hat = to_pandas(predictions, index=features.index, columns=labels.columns)
    df_backtest = _assemble_result_frame(targets, y_hat, labels, gross_loss, weights, features)
    return (summary_provider or model.summary_provider)(df_backtest, model, **kwargs)