def test_single_dataset_single_strategy_against_sklearn( dataset, cv, metric_func, estimator, results_cls, tmpdir): """Test against sklearn.""" # set up orchestration task = TSCTask(target="class_val") # create strategies clf = make_reduction_pipeline(estimator) strategy = TSCStrategy(clf) # result backend if results_cls in [HDDResults]: # for hard drive results, create temporary directory using pytest's # tmpdir fixture tempdir = tmpdir.mkdir("results/") path = tempdir.dirpath() results = results_cls(path=path) elif results_cls in [RAMResults]: results = results_cls() else: raise ValueError() orchestrator = Orchestrator(datasets=[dataset], tasks=[task], strategies=[strategy], cv=cv, results=results) orchestrator.fit_predict(save_fitted_strategies=False) evaluator = Evaluator(results) # create metric classes for evaluation and set metric kwargs if metric_func in [accuracy_score]: kwargs = {} # empty kwargs for simple pairwise metrics metric = PairwiseMetric(func=metric_func, name="metric") elif metric_func in [f1_score]: kwargs = {"average": "macro"} # set kwargs for composite metrics metric = AggregateMetric(func=metric_func, name="metric", **kwargs) else: raise ValueError() metrics = evaluator.evaluate(metric=metric) actual = metrics["metric_mean"].iloc[0] # compare against sklearn cross_val_score data = dataset.load() # load data X = data.loc[:, task.features] y = data.loc[:, task.target] expected = cross_val_score(clf, X, y, scoring=make_scorer(metric_func, **kwargs), cv=cv).mean() # compare results np.testing.assert_array_equal(actual, expected)
def test_stat(): """Test sign ranks.""" data = load_gunpoint(split="train", return_X_y=False) dataset = RAMDataset(dataset=data, name="gunpoint") task = TSCTask(target="class_val") fc = ComposableTimeSeriesForestClassifier(n_estimators=1, random_state=1) strategy_fc = TSCStrategy(fc, name="tsf") pf = KNeighborsTimeSeriesClassifier() strategy_pf = TSCStrategy(pf, name="pf") # result backend results = RAMResults() orchestrator = Orchestrator( datasets=[dataset], tasks=[task], strategies=[strategy_pf, strategy_fc], cv=SingleSplit(random_state=1), results=results, ) orchestrator.fit_predict(save_fitted_strategies=False) analyse = Evaluator(results) metric = PairwiseMetric(func=accuracy_score, name="accuracy") _ = analyse.evaluate(metric=metric) ranks = analyse.rank(ascending=True) pf_rank = ranks.loc[ranks.strategy == "pf", "accuracy_mean_rank"].item() # 1 fc_rank = ranks.loc[ranks.strategy == "tsf", "accuracy_mean_rank"].item() # 2 rank_array = [pf_rank, fc_rank] rank_array_test = [1, 2] _, sign_test_df = analyse.sign_test() sign_array = [ [sign_test_df["pf"][0], sign_test_df["pf"][1]], [sign_test_df["tsf"][0], sign_test_df["tsf"][1]], ] sign_array_test = [[1, 1], [1, 1]] np.testing.assert_equal([rank_array, sign_array], [rank_array_test, sign_array_test])
def test_automated_orchestration_vs_manual(data_loader): """Test orchestration.""" data = data_loader(return_X_y=False) dataset = RAMDataset(dataset=data, name="data") task = TSCTask(target="class_val") # create strategies # clf = TimeSeriesForestClassifier(n_estimators=1, random_state=1) clf = make_reduction_pipeline( RandomForestClassifier(n_estimators=2, random_state=1)) strategy = TSCStrategy(clf) # result backend results = RAMResults() orchestrator = Orchestrator( datasets=[dataset], tasks=[task], strategies=[strategy], cv=SingleSplit(random_state=1), results=results, ) orchestrator.fit_predict(save_fitted_strategies=False) result = next(results.load_predictions(cv_fold=0, train_or_test="test")) # get # only first item of iterator actual = result.y_pred # expected output task = TSCTask(target="class_val") cv = SingleSplit(random_state=1) train_idx, test_idx = next(cv.split(data)) train = data.iloc[train_idx, :] test = data.iloc[test_idx, :] strategy.fit(task, train) expected = strategy.predict(test) # compare results np.testing.assert_array_equal(actual, expected)
HOME = os.path.expanduser("~") DATA_PATH = os.path.join(HOME, "Documents/Research/data/Univariate_ts") RESULTS_PATH = "results" # Alternatively, we can use a helper function to create them automatically datasets = make_datasets(path=DATA_PATH, dataset_cls=UEADataset, names=UNIVARIATE_DATASETS) tasks = [TSCTask(target="target") for _ in range(len(datasets))] results = HDDResults(path=RESULTS_PATH) orchestrator = Orchestrator( datasets=datasets, tasks=tasks, strategies=STRATEGIES, cv=PresplitFilesCV(cv=UEAStratifiedCV(n_splits=30)), results=results) orchestrator.fit_predict(save_fitted_strategies=False, verbose=True, overwrite_predictions=True, save_timings=True) evaluator = Evaluator(results=results) metric = PairwiseMetric(func=accuracy_score, name="accuracy") evaluator.evaluate(metric) evaluator.metrics_by_strategy_dataset.to_csv(os.path.join( RESULTS_PATH, "accuracy.csv"), header=True)
# define results output results = HDDResults(path=RESULTS_PATH) # results = RAMResults() # run orchestrator orchestrator = Orchestrator(datasets=datasets, tasks=tasks, strategies=strategies, cv=PresplitFilesCV(), results=results) start = time.time() orchestrator.fit_predict(save_fitted_strategies=False, overwrite_fitted_strategies=False, overwrite_predictions=True, predict_on_train=False, verbose=True) elapsed = time.time() - start print(elapsed) # evaluate predictions evaluator = Evaluator(results=results) metric = PairwiseMetric(func=accuracy_score, name="accuracy") metrics_by_strategy = evaluator.evaluate(metric=metric) # save scores evaluator.metrics_by_strategy_dataset.to_csv(os.path.join( RESULTS_PATH, "accuracy.csv"), header=True) print(evaluator.metrics_by_strategy_dataset)