def rise_benchmarking():
    for i in range(len(benchmark_datasets)):
        dataset = benchmark_datasets[i]
        print(str(i) + " problem = " + dataset)
        rise = fb.RandomIntervalSpectralForest(n_estimators=100)
        exp.run_experiment(overwrite=True,
                           problem_path=data_dir,
                           results_path=results_dir,
                           cls_name="PythonRISE",
                           classifier=rise,
                           dataset=dataset,
                           train_file=False)
        steps = [('segment',
                  RandomIntervalSegmenter(n_intervals=1, min_length=5)),
                 ('transform',
                  FeatureUnion([('acf',
                                 RowTransformer(
                                     FunctionTransformer(func=acf_coefs,
                                                         validate=False))),
                                ('ps',
                                 RowTransformer(
                                     FunctionTransformer(func=powerspectrum,
                                                         validate=False)))])),
                 ('tabularise', Tabularizer()),
                 ('clf', DecisionTreeClassifier())]
        base_estimator = Pipeline(steps)
        rise = TimeSeriesForestClassifier(estimator=base_estimator,
                                          n_estimators=100)
        exp.run_experiment(overwrite=True,
                           problem_path=data_dir,
                           results_path=results_dir,
                           cls_name="PythonRISEComposite",
                           classifier=rise,
                           dataset=dataset,
                           train_file=False)
def test_different_pipelines():
    random_state = 1233
    X_train, y_train = make_classification_problem()
    steps = [
        ('segment',
         RandomIntervalSegmenter(n_intervals='sqrt',
                                 random_state=random_state)),
        ('transform',
         FeatureUnion([
             ('mean',
              RowTransformer(FunctionTransformer(func=np.mean,
                                                 validate=False))),
             ('std',
              RowTransformer(FunctionTransformer(func=np.std,
                                                 validate=False))),
             ('slope',
              RowTransformer(
                  FunctionTransformer(func=time_series_slope,
                                      validate=False))),
         ])),
    ]
    pipe = Pipeline(steps)
    a = pipe.fit_transform(X_train)
    tran = RandomIntervalFeatureExtractor(
        n_intervals='sqrt',
        features=[np.mean, np.std, time_series_slope],
        random_state=random_state)
    b = tran.fit_transform(X_train)
    np.testing.assert_array_equal(a, b)
    np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
Exemple #3
0
def tsf_benchmarking():
    for i in range(0, len(benchmark_datasets)):
        dataset = benchmark_datasets[i]
        print(str(i) + " problem = " + dataset)
        tsf = ib.TimeSeriesForest(n_estimators=100)
        exp.run_experiment(
            overwrite=False,
            problem_path=data_dir,
            results_path=results_dir,
            cls_name="PythonTSF",
            classifier=tsf,
            dataset=dataset,
            train_file=False,
        )
        steps = [
            ("segment", RandomIntervalSegmenter(n_intervals="sqrt")),
            (
                "transform",
                FeatureUnion(
                    [
                        (
                            "mean",
                            make_row_transformer(
                                FunctionTransformer(func=np.mean, validate=False)
                            ),
                        ),
                        (
                            "std",
                            make_row_transformer(
                                FunctionTransformer(func=np.std, validate=False)
                            ),
                        ),
                        (
                            "slope",
                            make_row_transformer(
                                FunctionTransformer(
                                    func=time_series_slope, validate=False
                                )
                            ),
                        ),
                    ]
                ),
            ),
            ("clf", DecisionTreeClassifier()),
        ]
        base_estimator = Pipeline(steps)
        tsf = TimeSeriesForestClassifier(estimator=base_estimator, n_estimators=100)
        exp.run_experiment(
            overwrite=False,
            problem_path=data_dir,
            results_path=results_dir,
            cls_name="PythonTSFComposite",
            classifier=tsf,
            dataset=dataset,
            train_file=False,
        )
Exemple #4
0
def test_different_pipelines():
    random_state = 1233
    X_train, y_train = make_classification_problem()
    steps = [
        (
            "segment",
            RandomIntervalSegmenter(n_intervals=1, random_state=random_state),
        ),
        (
            "transform",
            FeatureUnion([
                (
                    "mean",
                    SeriesToPrimitivesRowTransformer(
                        FunctionTransformer(func=np.mean, validate=False),
                        check_transformer=False,
                    ),
                ),
                (
                    "std",
                    SeriesToPrimitivesRowTransformer(
                        FunctionTransformer(func=np.std, validate=False),
                        check_transformer=False,
                    ),
                ),
                (
                    "slope",
                    SeriesToPrimitivesRowTransformer(
                        FunctionTransformer(func=time_series_slope,
                                            validate=False),
                        check_transformer=False,
                    ),
                ),
            ]),
        ),
    ]
    pipe = Pipeline(steps)
    a = pipe.fit_transform(X_train)
    tran = RandomIntervalFeatureExtractor(
        n_intervals=1,
        features=[np.mean, np.std, time_series_slope],
        random_state=random_state,
    )
    b = tran.fit_transform(X_train)
    np.testing.assert_array_equal(a, b)
    np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
Exemple #5
0
def test_equivalent_model_specifications(n_intervals, n_estimators):
    """Test composable TSF vs an equivalent model."""
    random_state = 1234
    X_train, y_train = load_unit_test(split="train")
    X_test, y_test = load_unit_test(split="test")

    # Due to tie-breaking/floating point rounding in the final decision tree
    # classifier, the results depend on the
    # exact column order of the input data

    # Compare pipeline predictions outside of ensemble.
    steps = [
        (
            "segment",
            RandomIntervalSegmenter(n_intervals=n_intervals,
                                    random_state=random_state),
        ),
        (
            "transform",
            FeatureUnion([("mean", mean_transformer),
                          ("std", std_transformer)]),
        ),
        ("clf", DecisionTreeClassifier(random_state=random_state)),
    ]
    clf1 = Pipeline(steps)
    clf1.fit(X_train, y_train)
    a = clf1.predict(X_test)

    steps = [
        (
            "transform",
            RandomIntervalFeatureExtractor(
                n_intervals=n_intervals,
                features=[np.mean, np.std],
                random_state=random_state,
            ),
        ),
        ("clf", DecisionTreeClassifier(random_state=random_state)),
    ]
    clf2 = Pipeline(steps)
    clf2.fit(X_train, y_train)
    b = clf2.predict(X_test)
    np.array_equal(a, b)
def tsf_benchmarking():
    for i in range(len(benchmark_datasets)):
        dataset = benchmark_datasets[i]
        print(str(i) + " problem = " + dataset)
        tsf = ib.TimeSeriesForest(n_estimators=100)
        exp.run_experiment(overwrite=False,
                           problem_path=data_dir,
                           results_path=results_dir,
                           cls_name="PythonTSF",
                           classifier=tsf,
                           dataset=dataset,
                           train_file=False)
        steps = [
            ('segment', RandomIntervalSegmenter(n_intervals='sqrt')),
            ('transform',
             FeatureUnion([('mean',
                            RowTransformer(
                                FunctionTransformer(func=np.mean,
                                                    validate=False))),
                           ('std',
                            RowTransformer(
                                FunctionTransformer(func=np.std,
                                                    validate=False))),
                           ('slope',
                            RowTransformer(
                                FunctionTransformer(func=time_series_slope,
                                                    validate=False)))])),
            ('clf', DecisionTreeClassifier())
        ]
        base_estimator = Pipeline(steps)
        tsf = TimeSeriesForestClassifier(estimator=base_estimator,
                                         n_estimators=100)
        exp.run_experiment(overwrite=False,
                           problem_path=data_dir,
                           results_path=results_dir,
                           cls_name="PythonTSFComposite",
                           classifier=tsf,
                           dataset=dataset,
                           train_file=False)
Exemple #7
0
def main():
    #1. Loading and splitting the dataset
    X_train, y_train = load_italy_power_demand(split='train', return_X_y=True)
    X_test, y_test = load_italy_power_demand(split='test', return_X_y=True)
    print('Shape of X, y train and test dataset', X_train.shape, y_train.shape,
          X_test.shape, y_test.shape, '\n')
    print('X_train:', X_train.head(), '\n')
    print('\nX_train info', X_train.info(), '\n')

    labels, counts = np.unique(y_train, return_counts=True)
    print(
        '\nThere are', labels,
        'labels in this dataset, one corresponds to winter and the other to summer. The counter of each one is',
        counts, '\n')

    #2. Creating a Model, Fit and Predict Sklearn Classifier
    #Sktime Tabularizing the data
    X_train_tab = tabularize(X_train)
    X_test_tab = tabularize(X_test)
    print('\n X_train tabularized\n', X_train_tab.head(), '\n')

    #2.1 SKlearn RandomForest Classifier
    classifier = RandomForestClassifier(n_estimators=100)
    classifier.fit(X_train_tab, y_train)
    y_pred = classifier.predict(X_test_tab)
    print('Accuracy sklearn RandomForestClassifier',
          round(accuracy_score(y_test, y_pred), 4), '\n')

    #2.2 Same SKlearn as above but using make_pipeline w/ Sktime Tabularizer
    classifier = make_pipeline(Tabularizer(),
                               RandomForestClassifier(n_estimators=100),
                               verbose=True)
    classifier.fit(X_train, y_train)
    print(
        'Accuracy sklearn RandomForestClassifier using sklearn make_pipeline in which the first step is to sktime Tabularize()',
        round(classifier.score(X_test, y_test), 4), '\n')

    #3 Sklearn using make_pipeline w/ Sktime TSFreshFeatureExtractor
    classifier = make_pipeline(TSFreshFeatureExtractor(show_warnings=False),
                               RandomForestClassifier(n_estimators=100))
    classifier.fit(X_train, y_train)
    print(
        'Accuracy sklearn RandomForestClassifier using sklearn make_pipeline in which the first step is to sktime TSFreshFeatureExtractor that automatically extracts and filters several key statistical features from the nested X_train time series',
        round(classifier.score(X_test, y_test), 4), '\n')

    #4. Using Time series algorithms and classifiers from sklearn/sktime
    steps = [
        ('segment', RandomIntervalSegmenter(n_intervals='sqrt')),  #Sktime
        (
            'transform',
            FeatureUnion([  #Sklearn
                ('mean',
                 RowTransformer(
                     FunctionTransformer(func=np.mean,
                                         validate=False))),  #sktime
                ('std',
                 RowTransformer(
                     FunctionTransformer(func=np.std,
                                         validate=False))),  #sktime
                ('slope',
                 RowTransformer(
                     FunctionTransformer(func=time_series_slope,
                                         validate=False)))  #sktime
            ])),
        ('clf', DecisionTreeClassifier())  #From Sklearn
    ]
    time_series_tree = Pipeline(steps, verbose=True)  #sklearn
    time_series_tree.fit(X_train, y_train)
    print(
        'Accuracy sklearn DecisionTreeClassifier using sklearn Pipeline() as well as segmentation and transformation techniques from sktime and sklearn',
        round(time_series_tree.score(X_test, y_test), 4))

    #5. Using Time series Sktime
    tsf = TimeSeriesForestClassifier(n_estimators=100, verbose=True)
    tsf.fit(X_train, y_train)
    print('Accuracy sktime TimeSeriesForestClassifier',
          round(tsf.score(X_test, y_test), 4))