def fit(self, data): if data.kfold > 1: cv_eval = {} for k, cv_fold in enumerate(data.Xy_train.keys()): [(X_train, y_train), (X_val, y_val)] = data.Xy_train[cv_fold] X_train, X_val = from_2d_array_to_nested(X_train), from_2d_array_to_nested(X_val) tsf = ComposableTimeSeriesForestRegressor( n_jobs=-1) if data.tasktype=='regression' else ComposableTimeSeriesForestClassifier( n_jobs=-1) tsf.fit(X_train, y_train) eval_metrics = weareval.eval_output(tsf.predict(X_val), y_val, tasktype=data.tasktype) cv_eval[cv_fold] = {'model': tsf, # 'data': [(X_train, y_train), (X_val, y_val)], # store just IDs? 'metric': eval_metrics['mae'] if data.tasktype=='regression' else eval_metrics['balanced_acc_adj'], 'metrics': eval_metrics} # retain only best model tmp = {cv_fold:cv_eval[cv_fold]['metric'] for cv_fold in cv_eval.keys()} bst_fold = min(tmp, key=tmp.get) if data.tasktype=='regression' else max(tmp, key=tmp.get) self.tsf = cv_eval[bst_fold]['model'] return {'model': self.tsf, 'metrics': cv_eval[bst_fold]['metrics']} else: X_train, y_train = data.Xy_train X_val, y_val = data.Xy_val X_train, X_val = from_2d_array_to_nested(X_train), from_2d_array_to_nested(X_val) self.tsf = ComposableTimeSeriesForestRegressor( n_jobs=-1) if data.tasktype=='regression' else ComposableTimeSeriesForestClassifier( n_jobs=-1) self.tsf.fit(X_train, y_train) eval_metrics = weareval.eval_output(self.tsf.predict(X_val), y_val, tasktype=data.tasktype) return {'model': self.tsf, 'metrics': eval_metrics}
def test_TimeSeriesForest_predictions(n_estimators, n_intervals): random_state = 1234 X_train, y_train = load_gunpoint(split="train", return_X_y=True) X_test, y_test = load_gunpoint(split="test", return_X_y=True) features = [np.mean, np.std, _slope] steps = [ ( "transform", RandomIntervalFeatureExtractor( random_state=random_state, features=features ), ), ("clf", DecisionTreeClassifier()), ] estimator = Pipeline(steps) clf1 = ComposableTimeSeriesForestClassifier( estimator=estimator, random_state=random_state, n_estimators=n_estimators ) clf1.fit(X_train, y_train) a = clf1.predict_proba(X_test) # default, semi-modular implementation using # RandomIntervalFeatureExtractor internally clf2 = ComposableTimeSeriesForestClassifier( random_state=random_state, n_estimators=n_estimators ) clf2.fit(X_train, y_train) b = clf2.predict_proba(X_test) np.testing.assert_array_equal(a, b)
def tsf_benchmarking(): for i in range(0, len(benchmark_datasets)): dataset = benchmark_datasets[i] print(str(i) + " problem = " + dataset) tsf = ib.TimeSeriesForest(n_estimators=100) exp.run_experiment( overwrite=False, problem_path=data_dir, results_path=results_dir, cls_name="PythonTSF", classifier=tsf, dataset=dataset, train_file=False, ) steps = [ ("segment", RandomIntervalSegmenter(n_intervals="sqrt")), ( "transform", FeatureUnion([ ( "mean", make_row_transformer( FunctionTransformer(func=np.mean, validate=False)), ), ( "std", make_row_transformer( FunctionTransformer(func=np.std, validate=False)), ), ( "slope", make_row_transformer( FunctionTransformer(func=_slope, validate=False)), ), ]), ), ("clf", DecisionTreeClassifier()), ] base_estimator = Pipeline(steps) tsf = ComposableTimeSeriesForestClassifier(estimator=base_estimator, n_estimators=100) exp.run_experiment( overwrite=False, problem_path=data_dir, results_path=results_dir, cls_name="PythonTSFComposite", classifier=tsf, dataset=dataset, train_file=False, )
def rise_benchmarking(): for i in range(0, len(benchmark_datasets)): dataset = benchmark_datasets[i] print(str(i) + " problem = " + dataset) rise = fb.RandomIntervalSpectralForest(n_estimators=100) exp.run_experiment( overwrite=True, problem_path=data_dir, results_path=results_dir, cls_name="PythonRISE", classifier=rise, dataset=dataset, train_file=False, ) steps = [ ("segment", RandomIntervalSegmenter(n_intervals=1, min_length=5)), ( "transform", FeatureUnion([ ( "acf", make_row_transformer( FunctionTransformer(func=acf_coefs, validate=False)), ), ( "ps", make_row_transformer( FunctionTransformer(func=powerspectrum, validate=False)), ), ]), ), ("tabularise", Tabularizer()), ("clf", DecisionTreeClassifier()), ] base_estimator = Pipeline(steps) rise = ComposableTimeSeriesForestClassifier(estimator=base_estimator, n_estimators=100) exp.run_experiment( overwrite=True, problem_path=data_dir, results_path=results_dir, cls_name="PythonRISEComposite", classifier=rise, dataset=dataset, train_file=False, )
def test_predict_proba(): clf = ComposableTimeSeriesForestClassifier(n_estimators=2) clf.fit(X, y) proba = clf.predict_proba(X) assert proba.shape == (X.shape[0], n_classes) np.testing.assert_array_equal(np.ones(X.shape[0]), np.sum(proba, axis=1)) # test single row input y_proba = clf.predict_proba(X.iloc[[0], :]) assert y_proba.shape == (1, n_classes) y_pred = clf.predict(X.iloc[[0], :]) assert y_pred.shape == (1,)
def test_stat(): """Test sign ranks.""" data = load_gunpoint(split="train", return_X_y=False) dataset = RAMDataset(dataset=data, name="gunpoint") task = TSCTask(target="class_val") fc = ComposableTimeSeriesForestClassifier(n_estimators=1, random_state=1) strategy_fc = TSCStrategy(fc, name="tsf") pf = KNeighborsTimeSeriesClassifier() strategy_pf = TSCStrategy(pf, name="pf") # result backend results = RAMResults() orchestrator = Orchestrator( datasets=[dataset], tasks=[task], strategies=[strategy_pf, strategy_fc], cv=SingleSplit(random_state=1), results=results, ) orchestrator.fit_predict(save_fitted_strategies=False) analyse = Evaluator(results) metric = PairwiseMetric(func=accuracy_score, name="accuracy") _ = analyse.evaluate(metric=metric) ranks = analyse.rank(ascending=True) pf_rank = ranks.loc[ranks.strategy == "pf", "accuracy_mean_rank"].item() # 1 fc_rank = ranks.loc[ranks.strategy == "tsf", "accuracy_mean_rank"].item() # 2 rank_array = [pf_rank, fc_rank] rank_array_test = [1, 2] _, sign_test_df = analyse.sign_test() sign_array = [ [sign_test_df["pf"][0], sign_test_df["pf"][1]], [sign_test_df["tsf"][0], sign_test_df["tsf"][1]], ] sign_array_test = [[1, 1], [1, 1]] np.testing.assert_equal([rank_array, sign_array], [rank_array_test, sign_array_test])
# -*- coding: utf-8 -*- import pytest from sktime.benchmarking.strategies import TSCStrategy from sktime.benchmarking.tasks import TSCTask from sktime.datasets import load_gunpoint from sktime.datasets import load_italy_power_demand from sktime.classification.compose import ComposableTimeSeriesForestClassifier classifier = ComposableTimeSeriesForestClassifier(n_estimators=2) DATASET_LOADERS = (load_gunpoint, load_italy_power_demand) # Test output of time-series classification strategies @pytest.mark.parametrize("dataset", DATASET_LOADERS) def test_TSCStrategy(dataset): train = dataset(split="train") test = dataset(split="test") s = TSCStrategy(classifier) task = TSCTask(target="class_val") s.fit(task, train) y_pred = s.predict(test) assert y_pred.shape == test[task.target].shape
norm_data = norm_data.apply(lambda x: (x - x.min()) / (x.max() - x.min()), axis=1) X_norm = norm_data.values #label binário lb = LabelBinarizer() y = lb.fit_transform(label) y = y.reshape(-1)[:] #será necessário converter os dados de tabular para nested para aplicar algoritmos da sktime X_nested = from_2d_array_to_nested(X_norm)[:] #definição dos modelos e parametros model_params = { 'ComposableTSF': { 'model': ComposableTimeSeriesForestClassifier(), 'params': { 'n_estimators': [200, 300, 350, 400, 500] } } } #definição das métricas e parametros scoring = { 'acc': 'accuracy', 'prec': make_scorer(precision_score, pos_label=pos_label), 'avg_prec': make_scorer(average_precision_score, pos_label=pos_label), 'recall': make_scorer(recall_score, pos_label=pos_label), 'f1': make_scorer(f1_score, pos_label=pos_label), 'bal_acc': 'balanced_accuracy' }