def test_highLevelsktime(network=CNNClassifier(nb_epochs=SMALL_NB_EPOCHS)): """ truly generalised test with sktime tasks/strategies load data, build task construct classifier, build strategy fit, score """ print("start test_highLevelsktime()") from sktime.benchmarking.tasks import TSCTask from sktime.benchmarking.strategies import TSCStrategy from sklearn.metrics import accuracy_score train = load_italy_power_demand(split="train") test = load_italy_power_demand(split="test") task = TSCTask(target="class_val", metadata=train) strategy = TSCStrategy(network) strategy.fit(task, train.iloc[:10]) y_pred = strategy.predict(test.iloc[:10]) y_test = test.iloc[:10][task.target] print(accuracy_score(y_test, y_pred)) print("End test_highLevelsktime()")
def test_highLevelsktime(network=ShapeletForestClassifier()): ''' truly generalised test with sktime tasks/strategies load data, build task construct classifier, build strategy fit, score ''' print("start test_highLevelsktime()") from sktime.benchmarking.tasks import TSCTask from sktime.benchmarking.strategies import TSCStrategy from sklearn.metrics import accuracy_score train = load_gunpoint(split='train') test = load_gunpoint(split='test') task = TSCTask(target='class_val', metadata=train) strategy = TSCStrategy(network) strategy.fit(task, train.iloc[:10]) y_pred = strategy.predict(test.iloc[:10]).astype(np.float) y_test = test.iloc[:10][task.target].values.astype(np.float) print(accuracy_score(y_test, y_pred)) print("End test_highLevelsktime()")
def test_TSCStrategy(dataset): train = dataset(split="train") test = dataset(split="test") s = TSCStrategy(classifier) task = TSCTask(target="class_val") s.fit(task, train) y_pred = s.predict(test) assert y_pred.shape == test[task.target].shape
def test_TSCStrategy(dataset): """Test strategy.""" train = dataset(split="train", return_X_y=False) test = dataset(split="test", return_X_y=False) s = TSCStrategy(classifier) task = TSCTask(target="class_val") s.fit(task, train) y_pred = s.predict(test) assert y_pred.shape == test[task.target].shape
def test_single_dataset_single_strategy_against_sklearn( dataset, cv, metric_func, estimator, results_cls, tmpdir): """Test against sklearn.""" # set up orchestration task = TSCTask(target="class_val") # create strategies clf = make_reduction_pipeline(estimator) strategy = TSCStrategy(clf) # result backend if results_cls in [HDDResults]: # for hard drive results, create temporary directory using pytest's # tmpdir fixture tempdir = tmpdir.mkdir("results/") path = tempdir.dirpath() results = results_cls(path=path) elif results_cls in [RAMResults]: results = results_cls() else: raise ValueError() orchestrator = Orchestrator(datasets=[dataset], tasks=[task], strategies=[strategy], cv=cv, results=results) orchestrator.fit_predict(save_fitted_strategies=False) evaluator = Evaluator(results) # create metric classes for evaluation and set metric kwargs if metric_func in [accuracy_score]: kwargs = {} # empty kwargs for simple pairwise metrics metric = PairwiseMetric(func=metric_func, name="metric") elif metric_func in [f1_score]: kwargs = {"average": "macro"} # set kwargs for composite metrics metric = AggregateMetric(func=metric_func, name="metric", **kwargs) else: raise ValueError() metrics = evaluator.evaluate(metric=metric) actual = metrics["metric_mean"].iloc[0] # compare against sklearn cross_val_score data = dataset.load() # load data X = data.loc[:, task.features] y = data.loc[:, task.target] expected = cross_val_score(clf, X, y, scoring=make_scorer(metric_func, **kwargs), cv=cv).mean() # compare results np.testing.assert_array_equal(actual, expected)
def test_automated_orchestration_vs_manual(data_loader): """Test orchestration.""" data = data_loader(return_X_y=False) dataset = RAMDataset(dataset=data, name="data") task = TSCTask(target="class_val") # create strategies # clf = TimeSeriesForestClassifier(n_estimators=1, random_state=1) clf = make_reduction_pipeline( RandomForestClassifier(n_estimators=2, random_state=1)) strategy = TSCStrategy(clf) # result backend results = RAMResults() orchestrator = Orchestrator( datasets=[dataset], tasks=[task], strategies=[strategy], cv=SingleSplit(random_state=1), results=results, ) orchestrator.fit_predict(save_fitted_strategies=False) result = next(results.load_predictions(cv_fold=0, train_or_test="test")) # get # only first item of iterator actual = result.y_pred # expected output task = TSCTask(target="class_val") cv = SingleSplit(random_state=1) train_idx, test_idx = next(cv.split(data)) train = data.iloc[train_idx, :] test = data.iloc[test_idx, :] strategy.fit(task, train) expected = strategy.predict(test) # compare results np.testing.assert_array_equal(actual, expected)
def test_stat(): """Test sign ranks.""" data = load_gunpoint(split="train", return_X_y=False) dataset = RAMDataset(dataset=data, name="gunpoint") task = TSCTask(target="class_val") fc = ComposableTimeSeriesForestClassifier(n_estimators=1, random_state=1) strategy_fc = TSCStrategy(fc, name="tsf") pf = KNeighborsTimeSeriesClassifier() strategy_pf = TSCStrategy(pf, name="pf") # result backend results = RAMResults() orchestrator = Orchestrator( datasets=[dataset], tasks=[task], strategies=[strategy_pf, strategy_fc], cv=SingleSplit(random_state=1), results=results, ) orchestrator.fit_predict(save_fitted_strategies=False) analyse = Evaluator(results) metric = PairwiseMetric(func=accuracy_score, name="accuracy") _ = analyse.evaluate(metric=metric) ranks = analyse.rank(ascending=True) pf_rank = ranks.loc[ranks.strategy == "pf", "accuracy_mean_rank"].item() # 1 fc_rank = ranks.loc[ranks.strategy == "tsf", "accuracy_mean_rank"].item() # 2 rank_array = [pf_rank, fc_rank] rank_array_test = [1, 2] _, sign_test_df = analyse.sign_test() sign_array = [ [sign_test_df["pf"][0], sign_test_df["pf"][1]], [sign_test_df["tsf"][0], sign_test_df["tsf"][1]], ] sign_array_test = [[1, 1], [1, 1]] np.testing.assert_equal([rank_array, sign_array], [rank_array_test, sign_array_test])
def main(args): # Load and wrangle data raw_data_df = run.input_datasets["rawdata"].to_pandas_dataframe() processed_data_df = prepare_dataframe( raw_data_df, time_series_length=args.timeserieslength, threshold=args.threshold) # Split data train = processed_data_df.sample(frac=args.train_data_split, random_state=42) test = processed_data_df.drop(train.index) # Example for logging run.log( "data_split_fraction", args.train_data_split, "Fraction of samples used for training", ) run.log("train_samples", train.shape[0], "Number of samples used for training") run.log("test_samples", test.shape[0], "Number of samples used for testing") # Train task = TSCTask(target="label", metadata=train) clf = TimeSeriesForestClassifier(n_estimators=args.n_estimators) strategy = TSCStrategy(clf) strategy.fit(task, train) run.log("n_estimators", args.n_estimators, "Number of tree estimators used in the model") # Metrics y_pred = strategy.predict(test) y_test = test[task.target] accuracy = accuracy_score(y_test, y_pred) run.log("Accuracy", f"{accuracy:1.3f}", "Accuracy of model") # Persist model os.makedirs("outputs", exist_ok=True) model_path = os.path.join("outputs", args.model_filename) dump(strategy, model_path)
X_train, y_train, X_test, y_test, i) yield X_train.index.to_numpy(), X_test.index.to_numpy() def get_n_splits(self): return self.n_splits HOME = os.path.expanduser("~") DATA_PATH = os.path.join(HOME, "Documents/Research/data/Univariate_ts") RESULTS_PATH = "results" # Alternatively, we can use a helper function to create them automatically datasets = make_datasets(path=DATA_PATH, dataset_cls=UEADataset, names=UNIVARIATE_DATASETS) tasks = [TSCTask(target="target") for _ in range(len(datasets))] results = HDDResults(path=RESULTS_PATH) orchestrator = Orchestrator( datasets=datasets, tasks=tasks, strategies=STRATEGIES, cv=PresplitFilesCV(cv=UEAStratifiedCV(n_splits=30)), results=results) orchestrator.fit_predict(save_fitted_strategies=False, verbose=True, overwrite_predictions=True, save_timings=True) evaluator = Evaluator(results=results)