Python Tabularizer 예제들, sktime.transformations.panel.reduce.Tabularizer Python 예제들

예제 #1

0

파일 보기

def test_make_reduction_infer_scitype_raises_error():
    """Test make_reduction.

    The scitype of pipeline cannot be inferred here, as it may be used together
    with a tabular or time series regressor.
    """
    estimator = make_pipeline(Tabularizer(), LinearRegression())
    with pytest.raises(ValueError):
        make_reduction(estimator, scitype="infer")

예제 #2

0

파일 보기

파일: basic_benchmarking.py 프로젝트: xcon2/sktime

def rise_benchmarking():
    for i in range(0, len(benchmark_datasets)):
        dataset = benchmark_datasets[i]
        print(str(i) + " problem = " + dataset)
        rise = fb.RandomIntervalSpectralForest(n_estimators=100)
        exp.run_experiment(
            overwrite=True,
            problem_path=data_dir,
            results_path=results_dir,
            cls_name="PythonRISE",
            classifier=rise,
            dataset=dataset,
            train_file=False,
        )
        steps = [
            ("segment", RandomIntervalSegmenter(n_intervals=1, min_length=5)),
            (
                "transform",
                FeatureUnion(
                    [
                        (
                            "acf",
                            make_row_transformer(
                                FunctionTransformer(func=acf_coefs, validate=False)
                            ),
                        ),
                        (
                            "ps",
                            make_row_transformer(
                                FunctionTransformer(func=powerspectrum, validate=False)
                            ),
                        ),
                    ]
                ),
            ),
            ("tabularise", Tabularizer()),
            ("clf", DecisionTreeClassifier()),
        ]
        base_estimator = Pipeline(steps)
        rise = TimeSeriesForestClassifier(estimator=base_estimator, n_estimators=100)
        exp.run_experiment(
            overwrite=True,
            problem_path=data_dir,
            results_path=results_dir,
            cls_name="PythonRISEComposite",
            classifier=rise,
            dataset=dataset,
            train_file=False,
        )

예제 #3

0

파일 보기

파일: test_reduce.py 프로젝트: preeti13456/sktime

def test_factory_method_ts_direct():
    y = load_airline()
    y_train, y_test = temporal_train_test_split(y, test_size=24)
    fh = ForecastingHorizon(y_test.index, is_relative=False)

    ts_regressor = Pipeline([("tabularize", Tabularizer()),
                             ("model", LinearRegression())])
    f1 = ReducedForecaster(ts_regressor,
                           scitype="ts_regressor",
                           strategy="direct")
    f2 = DirectTimeSeriesRegressionForecaster(ts_regressor)

    actual = f1.fit(y_train, fh=fh).predict(fh)
    expected = f2.fit(y_train, fh=fh).predict(fh)

    np.testing.assert_array_equal(actual, expected)

예제 #4

0

파일 보기

파일: test_compose.py 프로젝트: zhaoyun0071/sktime

def test_ColumnTransformer_pipeline():
    X_train, y_train = load_basic_motions(split="train", return_X_y=True)
    X_test, y_test = load_basic_motions(split="test", return_X_y=True)

    # using Identity function transformations (transform series to series)
    def id_func(X):
        return X

    column_transformer = ColumnTransformer([
        ("id0", FunctionTransformer(func=id_func, validate=False), ["dim_0"]),
        ("id1", FunctionTransformer(func=id_func, validate=False), ["dim_1"]),
    ])
    steps = [
        ("extract", column_transformer),
        ("tabularise", Tabularizer()),
        ("classify", RandomForestClassifier(n_estimators=2, random_state=1)),
    ]
    model = Pipeline(steps=steps)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    assert y_pred.shape[0] == y_test.shape[0]
    np.testing.assert_array_equal(np.unique(y_pred), np.unique(y_test))

예제 #5

0

파일 보기

파일: _config.py 프로젝트: sparkingdark/sktime

     "forecasters": FORECASTERS
 },
 FeatureUnion: {
     "transformer_list": TRANSFORMERS
 },
 DirectRegressionForecaster: {
     "regressor": REGRESSOR
 },
 MultioutputRegressionForecaster: {
     "regressor": REGRESSOR
 },
 RecursiveRegressionForecaster: {
     "regressor": REGRESSOR
 },
 DirectTimeSeriesRegressionForecaster: {
     "regressor": make_pipeline(Tabularizer(), REGRESSOR)
 },
 RecursiveTimeSeriesRegressionForecaster: {
     "regressor": make_pipeline(Tabularizer(), REGRESSOR)
 },
 TransformedTargetForecaster: {
     "steps": STEPS
 },
 EnsembleForecaster: {
     "forecasters": FORECASTERS
 },
 StackingForecaster: {
     "forecasters": FORECASTERS,
     "final_regressor": REGRESSOR
 },
 Detrender: {

예제 #6

0

파일 보기

파일: experiments_shape_dtw.py 프로젝트: xcon2/sktime

def set_classifier(cls, resampleId):
    """
    Basic way of determining the classifier to build. To differentiate settings just and another elif. So, for example, if
    you wanted tuned TSF, you just pass TuneTSF and set up the tuning mechanism in the elif.
    This may well get superceded, it is just how e have always done it
    :param cls: String indicating which classifier you want
    :return: A classifier.

    """
    if cls.lower() == "pf":
        return pf.ProximityForest(random_state=resampleId)
    elif cls.lower() == "pt":
        return pf.ProximityTree(random_state=resampleId)
    elif cls.lower() == "ps":
        return pf.ProximityStump(random_state=resampleId)
    elif cls.lower() == "rise":
        return fb.RandomIntervalSpectralForest(random_state=resampleId)
    elif cls.lower() == "tsf":
        return ib.TimeSeriesForest(random_state=resampleId)
    elif cls.lower() == "boss":
        return db.BOSSEnsemble()
    elif cls.lower() == "st":
        return st.ShapeletTransformClassifier(time_contract_in_mins=1500)
    elif cls.lower() == "dtw":
        return nn.KNeighborsTimeSeriesClassifier(metric="dtw")
    elif cls.lower() == "ee" or cls.lower() == "elasticensemble":
        return dist.ElasticEnsemble()
    elif cls.lower() == "shapedtw_raw":
        return ShapeDTW(subsequence_length=30,
                        shape_descriptor_function="raw",
                        metric_params=None)
    elif cls.lower() == "shapedtw_dwt":
        return ShapeDTW(
            subsequence_length=30,
            shape_descriptor_function="dwt",
            metric_params={"num_levels_dwt": 3},
        )
    elif cls.lower() == "shapedtw_paa":
        return ShapeDTW(
            subsequence_length=30,
            shape_descriptor_function="paa",
            metric_params={"num_intervals_paa": 5},
        )
    elif cls.lower() == "shapedtw_slope":
        return ShapeDTW(
            subsequence_length=30,
            shape_descriptor_function="slope",
            metric_params={"num_intervals_slope": 5},
        )
    elif cls.lower() == "shapedtw_hog1d":
        return ShapeDTW(
            subsequence_length=30,
            shape_descriptor_function="hog1d",
            metric_params={
                "num_bins_hog1d": 8,
                "num_intervals_hog1d": 2,
                "scaling_factor_hog1d": 0.1,
            },
        )
    elif cls.lower() == "tsfcomposite":
        # It defaults to TSF
        return ensemble.TimeSeriesForestClassifier()
    elif cls.lower() == "risecomposite":
        steps = [
            ("segment", RandomIntervalSegmenter(n_intervals=1, min_length=5)),
            (
                "transform",
                FeatureUnion([
                    (
                        "acf",
                        make_row_transformer(
                            FunctionTransformer(func=acf_coefs,
                                                validate=False)),
                    ),
                    (
                        "ps",
                        make_row_transformer(
                            FunctionTransformer(func=powerspectrum,
                                                validate=False)),
                    ),
                ]),
            ),
            ("tabularise", Tabularizer()),
            ("clf", DecisionTreeClassifier()),
        ]
        base_estimator = Pipeline(steps)
        return ensemble.TimeSeriesForestClassifier(estimator=base_estimator,
                                                   n_estimators=100)
    else:
        raise Exception("UNKNOWN CLASSIFIER")

예제 #7

0

파일 보기

     "transformer_list": TRANSFORMERS
 },
 DirectTabularRegressionForecaster: {
     "estimator": REGRESSOR
 },
 MultioutputTabularRegressionForecaster: {
     "estimator": REGRESSOR
 },
 RecursiveTabularRegressionForecaster: {
     "estimator": REGRESSOR
 },
 DirRecTabularRegressionForecaster: {
     "estimator": REGRESSOR
 },
 DirectTimeSeriesRegressionForecaster: {
     "estimator": make_pipeline(Tabularizer(), REGRESSOR)
 },
 RecursiveTimeSeriesRegressionForecaster: {
     "estimator": make_pipeline(Tabularizer(), REGRESSOR)
 },
 MultioutputTimeSeriesRegressionForecaster: {
     "estimator": make_pipeline(Tabularizer(), REGRESSOR)
 },
 DirRecTimeSeriesRegressionForecaster: {
     "estimator": make_pipeline(Tabularizer(), REGRESSOR)
 },
 TransformedTargetForecaster: {
     "steps": STEPS
 },
 ForecastingPipeline: {
     "steps": STEPS

예제 #8

0

파일 보기

파일: sktime_classificationTrain.py 프로젝트: jeremyjong/sktime-classification

    def StartTrain(self): 
        train_files = glob.glob(self.lineEdit.text() + "\\*.csv")
        test_files = glob.glob(self.lineEdit_2.text() + "\\*.csv")
        train_li = []
        for filename in train_files:                
            df = pd.read_csv(filename, index_col=None, header=None,usecols=[2])
            train_li.append(df)
        X_df = pd.concat(train_li, axis=1, ignore_index=True)
        X_df = X_df.T  

        test_li = []        
        for filename in test_files:
            df = pd.read_csv(filename, index_col=None, header=None,usecols=[2])
            test_li.append(df)
        X_df_ng = pd.concat(test_li, axis=1, ignore_index=True)
        X_df_ng = X_df_ng.T

        X_df = X_df.append(X_df_ng)           
        X_df_tab = from_2d_array_to_nested(X_df)

        Y_df_ok = np.zeros(len(test_li), dtype="int32")
        Y_df_ng = np.ones(len(train_li), dtype="int32")
        Y_df = np.concatenate([Y_df_ok, Y_df_ng], 0)
        
        X_train, X_test, y_train, y_test = train_test_split(X_df_tab, Y_df, test_size= (100 - self.horizontalSlider.value()) / 100)
        self.tableWidget.setRowCount(0)
        selectedModel = self.comboBox.currentText()
        if(selectedModel == "RandomForestClassifier"):
            classifier = make_pipeline(Tabularizer(), RandomForestClassifier())
            classifier.fit(X_train, y_train)
            self.lineEdit_5.setText(str(classifier.score(X_train, y_train)))  
            self.lineEdit_6.setText(str(classifier.score(X_test, y_test)))          
            for i in range(len(X_test)): 
                row = self.tableWidget.rowCount()
                self.tableWidget.setRowCount(row)                
                classifier_preds = classifier.predict(X_test.iloc[i].to_frame())                
                self.addTableRow(self.tableWidget, [str(i),str(y_test[i]), str(classifier_preds)])   
            
        elif(selectedModel == "RocketClassifier"):
            rocket = RocketClassifier()
            rocket.fit(X_train, y_train)
            self.lineEdit_5.setText(str(rocket.score(X_train, y_train)))  
            self.lineEdit_6.setText(str(rocket.score(X_test, y_test))) 
            for i in range(len(X_test)): 
                row = self.tableWidget.rowCount()
                self.tableWidget.setRowCount(row)                
                rocket_preds = rocket.predict(X_test.iloc[i].to_frame())                
                self.addTableRow(self.tableWidget, [str(i),str(y_test[i]), str(rocket_preds)]) 
        
        elif(selectedModel == "TimeSeriesForestClassifier"):
            tsf = TimeSeriesForestClassifier(n_estimators=50, random_state=47)
            tsf.fit(X_train, y_train)
            self.lineEdit_5.setText(str(tsf.score(X_train, y_train)))  
            self.lineEdit_6.setText(str(tsf.score(X_test, y_test))) 
            for i in range(len(X_test)): 
                row = self.tableWidget.rowCount()
                self.tableWidget.setRowCount(row)                
                tsf_preds = tsf.predict(X_test.iloc[i].to_frame())                
                self.addTableRow(self.tableWidget, [str(i),str(y_test[i]), str(tsf_preds)]) 

        elif(selectedModel == "RandomIntervalSpectralEnsemble"):
            rise = RandomIntervalSpectralEnsemble(n_estimators=50, random_state=47)
            rise.fit(X_train, y_train)
            self.lineEdit_5.setText(str(rise.score(X_train, y_train)))  
            self.lineEdit_6.setText(str(rise.score(X_test, y_test))) 
            for i in range(len(X_test)): 
                row = self.tableWidget.rowCount()
                self.tableWidget.setRowCount(row)                
                rise_preds = rise.predict(X_test.iloc[i].to_frame())                
                self.addTableRow(self.tableWidget, [str(i),str(y_test[i]), str(rise_preds)]) 

        elif(selectedModel == "SupervisedTimeSeriesForest"):
            stsf = SupervisedTimeSeriesForest(n_estimators=50, random_state=47)
            stsf.fit(X_train, y_train)
            self.lineEdit_5.setText(str(stsf.score(X_train, y_train)))  
            self.lineEdit_6.setText(str(stsf.score(X_test, y_test))) 
            for i in range(len(X_test)): 
                row = self.tableWidget.rowCount()
                self.tableWidget.setRowCount(row)                
                stsf_preds = rise.predict(X_test.iloc[i].to_frame())                
                self.addTableRow(self.tableWidget, [str(i),str(y_test[i]), str(stsf_preds)]) 
        else:
            print("None")

예제 #9

0

파일 보기

def make_reduction_pipeline(estimator):
    """Use tabular estimators in time series setting."""
    pipeline = Pipeline([("transform", Tabularizer()), ("clf", estimator)])
    return pipeline

예제 #10

0

파일 보기

    # generate test data
    if method == "linear-trend":
        y = np.arange(start, end) * slope
    else:
        raise ValueError("`method` not understood")
    return y


@pytest.mark.parametrize("fh", TEST_OOS_FHS)
@pytest.mark.parametrize("window_length", TEST_WINDOW_LENGTHS_INT)
@pytest.mark.parametrize("strategy", STRATEGIES)
@pytest.mark.parametrize(
    "regressor, scitype",
    [
        (LinearRegression(), "tabular-regressor"),
        (make_pipeline(Tabularizer(), LinearRegression()), "time-series-regressor"),
    ],
)
@pytest.mark.parametrize(
    "method, slope",
    [
        ("linear-trend", 1),
        ("linear-trend", -3),
        ("linear-trend", 0),  # constant
    ],
)
def test_linear_extrapolation_endogenous_only(
    fh, window_length, strategy, method, slope, regressor, scitype
):
    """Test linear extrapolation endogenous only."""
    n_timepoints = 13

예제 #11

0

파일 보기

    # generate test data
    if method == "linear-trend":
        y = np.arange(start, end) * slope
    else:
        raise ValueError("`method` not understood")
    return y


@pytest.mark.parametrize("fh", TEST_OOS_FHS)
@pytest.mark.parametrize("window_length", TEST_WINDOW_LENGTHS)
@pytest.mark.parametrize("strategy", ["recursive", "direct", "multioutput"])
@pytest.mark.parametrize(
    "regressor, scitype",
    [
        (LinearRegression(), "tabular-regressor"),
        (make_pipeline(Tabularizer(),
                       LinearRegression()), "time-series-regressor"),
    ],
)
@pytest.mark.parametrize(
    "method, slope",
    [
        ("linear-trend", 1),
        ("linear-trend", -3),
        ("linear-trend", 0),  # constant
    ],
)
def test_linear_extrapolation(fh, window_length, strategy, method, slope,
                              regressor, scitype):
    n_timepoints = 13
    y = _make_y(0, n_timepoints, method=method, slope=slope)

예제 #12

0

파일 보기

파일: experiments.py 프로젝트: zeta1999/sktime

def set_classifier(cls, resampleId):
    """
    Basic way of determining the classifier to build. To differentiate settings just and another elif. So, for example, if
    you wanted tuned TSF, you just pass TuneTSF and set up the tuning mechanism in the elif.
    This may well get superceded, it is just how e have always done it
    :param cls: String indicating which classifier you want
    :return: A classifier.

    """
    if cls.lower() == "pf":
        return pf.ProximityForest(random_state=resampleId)
    elif cls.lower() == "pt":
        return pf.ProximityTree(random_state=resampleId)
    elif cls.lower() == "ps":
        return pf.ProximityStump(random_state=resampleId)
    elif cls.lower() == "rise":
        return fb.RandomIntervalSpectralForest(random_state=resampleId)
    elif cls.lower() == "tsf":
        return ib.TimeSeriesForest(random_state=resampleId)
    elif cls.lower() == "cif":
        return CanonicalIntervalForest(random_state=resampleId)
    elif cls.lower() == "boss":
        return BOSSEnsemble(random_state=resampleId)
    elif cls.lower() == "cboss":
        return ContractableBOSS(random_state=resampleId)
    elif cls.lower() == "tde":
        return TemporalDictionaryEnsemble(random_state=resampleId)
    elif cls.lower() == "st":
        return st.ShapeletTransformClassifier(time_contract_in_mins=1500)
    elif cls.lower() == "dtwcv":
        return nn.KNeighborsTimeSeriesClassifier(metric="dtwcv")
    elif cls.lower() == "ee" or cls.lower() == "elasticensemble":
        return dist.ElasticEnsemble()
    elif cls.lower() == "tsfcomposite":
        # It defaults to TSF
        return ensemble.TimeSeriesForestClassifier()
    elif cls.lower() == "risecomposite":
        steps = [
            ("segment", RandomIntervalSegmenter(n_intervals=1, min_length=5)),
            (
                "transform",
                FeatureUnion([
                    (
                        "acf",
                        make_row_transformer(
                            FunctionTransformer(func=acf_coefs,
                                                validate=False)),
                    ),
                    (
                        "ps",
                        make_row_transformer(
                            FunctionTransformer(func=powerspectrum,
                                                validate=False)),
                    ),
                ]),
            ),
            ("tabularise", Tabularizer()),
            ("clf", DecisionTreeClassifier()),
        ]
        base_estimator = Pipeline(steps)
        return ensemble.TimeSeriesForestClassifier(estimator=base_estimator,
                                                   n_estimators=100)
    elif cls.lower() == "rocket":
        rocket_pipeline = make_pipeline(
            Rocket(random_state=resampleId),
            RidgeClassifierCV(alphas=np.logspace(-3, 3, 10), normalize=True),
        )
        return rocket_pipeline
    else:
        raise Exception("UNKNOWN CLASSIFIER")