예제 #1
0
def test_make_reduction_infer_scitype_raises_error():
    """Test make_reduction.

    The scitype of pipeline cannot be inferred here, as it may be used together
    with a tabular or time series regressor.
    """
    estimator = make_pipeline(Tabularizer(), LinearRegression())
    with pytest.raises(ValueError):
        make_reduction(estimator, scitype="infer")
예제 #2
0
def rise_benchmarking():
    for i in range(0, len(benchmark_datasets)):
        dataset = benchmark_datasets[i]
        print(str(i) + " problem = " + dataset)
        rise = fb.RandomIntervalSpectralForest(n_estimators=100)
        exp.run_experiment(
            overwrite=True,
            problem_path=data_dir,
            results_path=results_dir,
            cls_name="PythonRISE",
            classifier=rise,
            dataset=dataset,
            train_file=False,
        )
        steps = [
            ("segment", RandomIntervalSegmenter(n_intervals=1, min_length=5)),
            (
                "transform",
                FeatureUnion(
                    [
                        (
                            "acf",
                            make_row_transformer(
                                FunctionTransformer(func=acf_coefs, validate=False)
                            ),
                        ),
                        (
                            "ps",
                            make_row_transformer(
                                FunctionTransformer(func=powerspectrum, validate=False)
                            ),
                        ),
                    ]
                ),
            ),
            ("tabularise", Tabularizer()),
            ("clf", DecisionTreeClassifier()),
        ]
        base_estimator = Pipeline(steps)
        rise = TimeSeriesForestClassifier(estimator=base_estimator, n_estimators=100)
        exp.run_experiment(
            overwrite=True,
            problem_path=data_dir,
            results_path=results_dir,
            cls_name="PythonRISEComposite",
            classifier=rise,
            dataset=dataset,
            train_file=False,
        )
예제 #3
0
def test_factory_method_ts_direct():
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y, test_size=24)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    ts_regressor = Pipeline([("tabularize", Tabularizer()),
                             ("model", LinearRegression())])
    f1 = ReducedForecaster(ts_regressor,
                           scitype="ts_regressor",
                           strategy="direct")
    f2 = DirectTimeSeriesRegressionForecaster(ts_regressor)

    actual = f1.fit(y_train, fh=fh).predict(fh)
    expected = f2.fit(y_train, fh=fh).predict(fh)

    np.testing.assert_array_equal(actual, expected)
예제 #4
0
def test_ColumnTransformer_pipeline():
    X_train, y_train = load_basic_motions(split="train", return_X_y=True)
    X_test, y_test = load_basic_motions(split="test", return_X_y=True)

    # using Identity function transformations (transform series to series)
    def id_func(X):
        return X

    column_transformer = ColumnTransformer([
        ("id0", FunctionTransformer(func=id_func, validate=False), ["dim_0"]),
        ("id1", FunctionTransformer(func=id_func, validate=False), ["dim_1"]),
    ])
    steps = [
        ("extract", column_transformer),
        ("tabularise", Tabularizer()),
        ("classify", RandomForestClassifier(n_estimators=2, random_state=1)),
    ]
    model = Pipeline(steps=steps)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    assert y_pred.shape[0] == y_test.shape[0]
    np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test))
예제 #5
0
     "forecasters": FORECASTERS
 },
 FeatureUnion: {
     "transformer_list": TRANSFORMERS
 },
 DirectRegressionForecaster: {
     "regressor": REGRESSOR
 },
 MultioutputRegressionForecaster: {
     "regressor": REGRESSOR
 },
 RecursiveRegressionForecaster: {
     "regressor": REGRESSOR
 },
 DirectTimeSeriesRegressionForecaster: {
     "regressor": make_pipeline(Tabularizer(), REGRESSOR)
 },
 RecursiveTimeSeriesRegressionForecaster: {
     "regressor": make_pipeline(Tabularizer(), REGRESSOR)
 },
 TransformedTargetForecaster: {
     "steps": STEPS
 },
 EnsembleForecaster: {
     "forecasters": FORECASTERS
 },
 StackingForecaster: {
     "forecasters": FORECASTERS,
     "final_regressor": REGRESSOR
 },
 Detrender: {
예제 #6
0
def set_classifier(cls, resampleId):
    """
    Basic way of determining the classifier to build. To differentiate settings just and another elif. So, for example, if
    you wanted tuned TSF, you just pass TuneTSF and set up the tuning mechanism in the elif.
    This may well get superceded, it is just how e have always done it
    :param cls: String indicating which classifier you want
    :return: A classifier.

    """
    if cls.lower() == "pf":
        return pf.ProximityForest(random_state=resampleId)
    elif cls.lower() == "pt":
        return pf.ProximityTree(random_state=resampleId)
    elif cls.lower() == "ps":
        return pf.ProximityStump(random_state=resampleId)
    elif cls.lower() == "rise":
        return fb.RandomIntervalSpectralForest(random_state=resampleId)
    elif cls.lower() == "tsf":
        return ib.TimeSeriesForest(random_state=resampleId)
    elif cls.lower() == "boss":
        return db.BOSSEnsemble()
    elif cls.lower() == "st":
        return st.ShapeletTransformClassifier(time_contract_in_mins=1500)
    elif cls.lower() == "dtw":
        return nn.KNeighborsTimeSeriesClassifier(metric="dtw")
    elif cls.lower() == "ee" or cls.lower() == "elasticensemble":
        return dist.ElasticEnsemble()
    elif cls.lower() == "shapedtw_raw":
        return ShapeDTW(subsequence_length=30,
                        shape_descriptor_function="raw",
                        metric_params=None)
    elif cls.lower() == "shapedtw_dwt":
        return ShapeDTW(
            subsequence_length=30,
            shape_descriptor_function="dwt",
            metric_params={"num_levels_dwt": 3},
        )
    elif cls.lower() == "shapedtw_paa":
        return ShapeDTW(
            subsequence_length=30,
            shape_descriptor_function="paa",
            metric_params={"num_intervals_paa": 5},
        )
    elif cls.lower() == "shapedtw_slope":
        return ShapeDTW(
            subsequence_length=30,
            shape_descriptor_function="slope",
            metric_params={"num_intervals_slope": 5},
        )
    elif cls.lower() == "shapedtw_hog1d":
        return ShapeDTW(
            subsequence_length=30,
            shape_descriptor_function="hog1d",
            metric_params={
                "num_bins_hog1d": 8,
                "num_intervals_hog1d": 2,
                "scaling_factor_hog1d": 0.1,
            },
        )
    elif cls.lower() == "tsfcomposite":
        # It defaults to TSF
        return ensemble.TimeSeriesForestClassifier()
    elif cls.lower() == "risecomposite":
        steps = [
            ("segment", RandomIntervalSegmenter(n_intervals=1, min_length=5)),
            (
                "transform",
                FeatureUnion([
                    (
                        "acf",
                        make_row_transformer(
                            FunctionTransformer(func=acf_coefs,
                                                validate=False)),
                    ),
                    (
                        "ps",
                        make_row_transformer(
                            FunctionTransformer(func=powerspectrum,
                                                validate=False)),
                    ),
                ]),
            ),
            ("tabularise", Tabularizer()),
            ("clf", DecisionTreeClassifier()),
        ]
        base_estimator = Pipeline(steps)
        return ensemble.TimeSeriesForestClassifier(estimator=base_estimator,
                                                   n_estimators=100)
    else:
        raise Exception("UNKNOWN CLASSIFIER")
예제 #7
0
     "transformer_list": TRANSFORMERS
 },
 DirectTabularRegressionForecaster: {
     "estimator": REGRESSOR
 },
 MultioutputTabularRegressionForecaster: {
     "estimator": REGRESSOR
 },
 RecursiveTabularRegressionForecaster: {
     "estimator": REGRESSOR
 },
 DirRecTabularRegressionForecaster: {
     "estimator": REGRESSOR
 },
 DirectTimeSeriesRegressionForecaster: {
     "estimator": make_pipeline(Tabularizer(), REGRESSOR)
 },
 RecursiveTimeSeriesRegressionForecaster: {
     "estimator": make_pipeline(Tabularizer(), REGRESSOR)
 },
 MultioutputTimeSeriesRegressionForecaster: {
     "estimator": make_pipeline(Tabularizer(), REGRESSOR)
 },
 DirRecTimeSeriesRegressionForecaster: {
     "estimator": make_pipeline(Tabularizer(), REGRESSOR)
 },
 TransformedTargetForecaster: {
     "steps": STEPS
 },
 ForecastingPipeline: {
     "steps": STEPS
    def StartTrain(self): 
        train_files = glob.glob(self.lineEdit.text() + "\\*.csv")
        test_files = glob.glob(self.lineEdit_2.text() + "\\*.csv")
        train_li = []
        for filename in train_files:                
            df = pd.read_csv(filename, index_col=None, header=None,usecols=[2])
            train_li.append(df)
        X_df = pd.concat(train_li, axis=1, ignore_index=True)
        X_df = X_df.T  

        test_li = []        
        for filename in test_files:
            df = pd.read_csv(filename, index_col=None, header=None,usecols=[2])
            test_li.append(df)
        X_df_ng = pd.concat(test_li, axis=1, ignore_index=True)
        X_df_ng = X_df_ng.T

        X_df = X_df.append(X_df_ng)           
        X_df_tab = from_2d_array_to_nested(X_df)

        Y_df_ok = np.zeros(len(test_li), dtype="int32")
        Y_df_ng = np.ones(len(train_li), dtype="int32")
        Y_df = np.concatenate([Y_df_ok, Y_df_ng], 0)
        
        X_train, X_test, y_train, y_test = train_test_split(X_df_tab, Y_df, test_size= (100 - self.horizontalSlider.value()) / 100)
        self.tableWidget.setRowCount(0)
        selectedModel = self.comboBox.currentText()
        if(selectedModel == "RandomForestClassifier"):
            classifier = make_pipeline(Tabularizer(), RandomForestClassifier())
            classifier.fit(X_train, y_train)
            self.lineEdit_5.setText(str(classifier.score(X_train, y_train)))  
            self.lineEdit_6.setText(str(classifier.score(X_test, y_test)))          
            for i in range(len(X_test)): 
                row = self.tableWidget.rowCount()
                self.tableWidget.setRowCount(row)                
                classifier_preds = classifier.predict(X_test.iloc[i].to_frame())                
                self.addTableRow(self.tableWidget, [str(i),str(y_test[i]), str(classifier_preds)])   
            
        elif(selectedModel == "RocketClassifier"):
            rocket = RocketClassifier()
            rocket.fit(X_train, y_train)
            self.lineEdit_5.setText(str(rocket.score(X_train, y_train)))  
            self.lineEdit_6.setText(str(rocket.score(X_test, y_test))) 
            for i in range(len(X_test)): 
                row = self.tableWidget.rowCount()
                self.tableWidget.setRowCount(row)                
                rocket_preds = rocket.predict(X_test.iloc[i].to_frame())                
                self.addTableRow(self.tableWidget, [str(i),str(y_test[i]), str(rocket_preds)]) 
        
        elif(selectedModel == "TimeSeriesForestClassifier"):
            tsf = TimeSeriesForestClassifier(n_estimators=50, random_state=47)
            tsf.fit(X_train, y_train)
            self.lineEdit_5.setText(str(tsf.score(X_train, y_train)))  
            self.lineEdit_6.setText(str(tsf.score(X_test, y_test))) 
            for i in range(len(X_test)): 
                row = self.tableWidget.rowCount()
                self.tableWidget.setRowCount(row)                
                tsf_preds = tsf.predict(X_test.iloc[i].to_frame())                
                self.addTableRow(self.tableWidget, [str(i),str(y_test[i]), str(tsf_preds)]) 

        elif(selectedModel == "RandomIntervalSpectralEnsemble"):
            rise = RandomIntervalSpectralEnsemble(n_estimators=50, random_state=47)
            rise.fit(X_train, y_train)
            self.lineEdit_5.setText(str(rise.score(X_train, y_train)))  
            self.lineEdit_6.setText(str(rise.score(X_test, y_test))) 
            for i in range(len(X_test)): 
                row = self.tableWidget.rowCount()
                self.tableWidget.setRowCount(row)                
                rise_preds = rise.predict(X_test.iloc[i].to_frame())                
                self.addTableRow(self.tableWidget, [str(i),str(y_test[i]), str(rise_preds)]) 

        elif(selectedModel == "SupervisedTimeSeriesForest"):
            stsf = SupervisedTimeSeriesForest(n_estimators=50, random_state=47)
            stsf.fit(X_train, y_train)
            self.lineEdit_5.setText(str(stsf.score(X_train, y_train)))  
            self.lineEdit_6.setText(str(stsf.score(X_test, y_test))) 
            for i in range(len(X_test)): 
                row = self.tableWidget.rowCount()
                self.tableWidget.setRowCount(row)                
                stsf_preds = rise.predict(X_test.iloc[i].to_frame())                
                self.addTableRow(self.tableWidget, [str(i),str(y_test[i]), str(stsf_preds)]) 
        else:
            print("None")
예제 #9
0
def make_reduction_pipeline(estimator):
    """Use tabular estimators in time series setting."""
    pipeline = Pipeline([("transform", Tabularizer()), ("clf", estimator)])
    return pipeline
예제 #10
0
    # generate test data
    if method == "linear-trend":
        y = np.arange(start, end) * slope
    else:
        raise ValueError("`method` not understood")
    return y


@pytest.mark.parametrize("fh", TEST_OOS_FHS)
@pytest.mark.parametrize("window_length", TEST_WINDOW_LENGTHS_INT)
@pytest.mark.parametrize("strategy", STRATEGIES)
@pytest.mark.parametrize(
    "regressor, scitype",
    [
        (LinearRegression(), "tabular-regressor"),
        (make_pipeline(Tabularizer(), LinearRegression()), "time-series-regressor"),
    ],
)
@pytest.mark.parametrize(
    "method, slope",
    [
        ("linear-trend", 1),
        ("linear-trend", -3),
        ("linear-trend", 0),  # constant
    ],
)
def test_linear_extrapolation_endogenous_only(
    fh, window_length, strategy, method, slope, regressor, scitype
):
    """Test linear extrapolation endogenous only."""
    n_timepoints = 13
예제 #11
0
    # generate test data
    if method == "linear-trend":
        y = np.arange(start, end) * slope
    else:
        raise ValueError("`method` not understood")
    return y


@pytest.mark.parametrize("fh", TEST_OOS_FHS)
@pytest.mark.parametrize("window_length", TEST_WINDOW_LENGTHS)
@pytest.mark.parametrize("strategy", ["recursive", "direct", "multioutput"])
@pytest.mark.parametrize(
    "regressor, scitype",
    [
        (LinearRegression(), "tabular-regressor"),
        (make_pipeline(Tabularizer(),
                       LinearRegression()), "time-series-regressor"),
    ],
)
@pytest.mark.parametrize(
    "method, slope",
    [
        ("linear-trend", 1),
        ("linear-trend", -3),
        ("linear-trend", 0),  # constant
    ],
)
def test_linear_extrapolation(fh, window_length, strategy, method, slope,
                              regressor, scitype):
    n_timepoints = 13
    y = _make_y(0, n_timepoints, method=method, slope=slope)
예제 #12
0
def set_classifier(cls, resampleId):
    """
    Basic way of determining the classifier to build. To differentiate settings just and another elif. So, for example, if
    you wanted tuned TSF, you just pass TuneTSF and set up the tuning mechanism in the elif.
    This may well get superceded, it is just how e have always done it
    :param cls: String indicating which classifier you want
    :return: A classifier.

    """
    if cls.lower() == "pf":
        return pf.ProximityForest(random_state=resampleId)
    elif cls.lower() == "pt":
        return pf.ProximityTree(random_state=resampleId)
    elif cls.lower() == "ps":
        return pf.ProximityStump(random_state=resampleId)
    elif cls.lower() == "rise":
        return fb.RandomIntervalSpectralForest(random_state=resampleId)
    elif cls.lower() == "tsf":
        return ib.TimeSeriesForest(random_state=resampleId)
    elif cls.lower() == "cif":
        return CanonicalIntervalForest(random_state=resampleId)
    elif cls.lower() == "boss":
        return BOSSEnsemble(random_state=resampleId)
    elif cls.lower() == "cboss":
        return ContractableBOSS(random_state=resampleId)
    elif cls.lower() == "tde":
        return TemporalDictionaryEnsemble(random_state=resampleId)
    elif cls.lower() == "st":
        return st.ShapeletTransformClassifier(time_contract_in_mins=1500)
    elif cls.lower() == "dtwcv":
        return nn.KNeighborsTimeSeriesClassifier(metric="dtwcv")
    elif cls.lower() == "ee" or cls.lower() == "elasticensemble":
        return dist.ElasticEnsemble()
    elif cls.lower() == "tsfcomposite":
        # It defaults to TSF
        return ensemble.TimeSeriesForestClassifier()
    elif cls.lower() == "risecomposite":
        steps = [
            ("segment", RandomIntervalSegmenter(n_intervals=1, min_length=5)),
            (
                "transform",
                FeatureUnion([
                    (
                        "acf",
                        make_row_transformer(
                            FunctionTransformer(func=acf_coefs,
                                                validate=False)),
                    ),
                    (
                        "ps",
                        make_row_transformer(
                            FunctionTransformer(func=powerspectrum,
                                                validate=False)),
                    ),
                ]),
            ),
            ("tabularise", Tabularizer()),
            ("clf", DecisionTreeClassifier()),
        ]
        base_estimator = Pipeline(steps)
        return ensemble.TimeSeriesForestClassifier(estimator=base_estimator,
                                                   n_estimators=100)
    elif cls.lower() == "rocket":
        rocket_pipeline = make_pipeline(
            Rocket(random_state=resampleId),
            RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True),
        )
        return rocket_pipeline
    else:
        raise Exception("UNKNOWN CLASSIFIER")