def test_TimeSeriesForest_predictions(n_estimators, n_intervals): random_state = 1234 X_train, y_train = load_gunpoint(split="train", return_X_y=True) X_test, y_test = load_gunpoint(split="test", return_X_y=True) features = [np.mean, np.std, _slope] steps = [ ( "transform", RandomIntervalFeatureExtractor( random_state=random_state, features=features ), ), ("clf", DecisionTreeClassifier()), ] estimator = Pipeline(steps) clf1 = ComposableTimeSeriesForestClassifier( estimator=estimator, random_state=random_state, n_estimators=n_estimators ) clf1.fit(X_train, y_train) a = clf1.predict_proba(X_test) # default, semi-modular implementation using # RandomIntervalFeatureExtractor internally clf2 = ComposableTimeSeriesForestClassifier( random_state=random_state, n_estimators=n_estimators ) clf2.fit(X_train, y_train) b = clf2.predict_proba(X_test) np.testing.assert_array_equal(a, b)
def test_pipeline(network=catch22ForestClassifier()): ''' slightly more generalised test with sktime pipelines load data, construct pipeline with classifier, fit, score ''' print("Start test_pipeline()") from sktime.pipeline import Pipeline # just a simple (useless) pipeline steps = [('clf', network)] clf = Pipeline(steps) X_train, y_train = load_gunpoint(split='TRAIN', return_X_y=True) X_test, y_test = load_gunpoint(split='TEST', return_X_y=True) hist = clf.fit(X_train[:10], y_train[:10]) print(clf.score(X_test[:10], y_test[:10])) print("End test_pipeline()")
def test_highLevelsktime(network=catch22ForestClassifier()): ''' truly generalised test with sktime tasks/strategies load data, build task construct classifier, build strategy fit, score ''' print("start test_highLevelsktime()") from sktime.highlevel.tasks import TSCTask from sktime.highlevel.strategies import TSCStrategy from sklearn.metrics import accuracy_score train = load_gunpoint(split='TRAIN') test = load_gunpoint(split='TEST') task = TSCTask(target='class_val', metadata=train) strategy = TSCStrategy(network) strategy.fit(task, train.iloc[:10]) y_pred = strategy.predict(test.iloc[:10]) y_test = test.iloc[:10][task.target].values.astype(np.float) print(accuracy_score(y_test, y_pred)) print("End test_highLevelsktime()")
def test_mrseql_on_gunpoint(): # load training data X_train, y_train = load_gunpoint(split='train', return_X_y=True) X_test, y_test = load_gunpoint(split='test', return_X_y=True) sax_clf = MrSEQLClassifier(seql_mode='fs', symrep=['sax']) sfa_clf = MrSEQLClassifier(seql_mode='fs', symrep=['sfa']) ss_clf = MrSEQLClassifier(seql_mode='fs', symrep=['sax', 'sfa']) # fit training data sax_clf.fit(X_train, y_train) sfa_clf.fit(X_train, y_train) ss_clf.fit(X_train, y_train) # prediction sax_predicted = sax_clf.predict(X_test) sfa_predicted = sfa_clf.predict(X_test) ss_predicted = ss_clf.predict(X_test) # test feature space dimension # the multi-domain classifier (ss_clf) should produce as many features # as the others (sax_clf and sfa_clf) combine np.testing.assert_equal( ss_clf.ots_clf.coef_.shape[1], sfa_clf.ots_clf.coef_.shape[1] + sax_clf.ots_clf.coef_.shape[1]) # test number of correct predictions np.testing.assert_equal((sax_predicted == y_test).sum(), 148) np.testing.assert_equal((sfa_predicted == y_test).sum(), 150) np.testing.assert_equal((ss_predicted == y_test).sum(), 150)
def test_weasel_on_gunpoint(): # load gunpoint data X_train, y_train = load_gunpoint(split="train", return_X_y=True) X_test, y_test = load_gunpoint(split="test", return_X_y=True) # train WEASEL weasel = WEASEL(random_state=1, binning_strategy="equi-depth") weasel.fit(X_train, y_train) score = weasel.score(X_test, y_test) # print(score) assert score >= 0.99
def test_drcif_on_gunpoint(): # load gunpoint data X_train, y_train = load_gunpoint(split="train", return_X_y=True) X_test, y_test = load_gunpoint(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(10) # train DrCIF drcif = DrCIF(n_estimators=20, random_state=0) drcif.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = drcif.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, drcif_gunpoint_probas)
def test_cif_on_gunpoint(): # load gunpoint data X_train, y_train = load_gunpoint(split="train", return_X_y=True) X_test, y_test = load_gunpoint(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(10) # train CIF cif = CanonicalIntervalForest(n_estimators=100, random_state=0) cif.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = cif.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, cif_gunpoint_probas)
def test_arsenal_on_gunpoint(): # load gunpoint data X_train, y_train = load_gunpoint(split="train", return_X_y=True) X_test, y_test = load_gunpoint(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(10) # train Arsenal arsenal = Arsenal(num_kernels=1000, n_estimators=10, random_state=0) arsenal.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = arsenal.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, arsenal_gunpoint_probas)
def test_individual_tde_on_gunpoint(): # load gunpoint data X_train, y_train = load_gunpoint(split="train", return_X_y=True) X_test, y_test = load_gunpoint(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(10) # train IndividualTDE indiv_tde = IndividualTDE(random_state=0) indiv_tde.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = indiv_tde.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, individual_tde_gunpoint_probas)
def test_rocket_on_gunpoint(): # load gunpoint data X_train, y_train = load_gunpoint(split="train", return_X_y=True) X_test, y_test = load_gunpoint(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(10) # train ROCKET rocket = ROCKETClassifier(num_kernels=1000, random_state=0) rocket.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = rocket.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, rocket_gunpoint_probas)
def test_weasel_on_gunpoint(): # load gunpoint data X_train, y_train = load_gunpoint(split="train", return_X_y=True) X_test, y_test = load_gunpoint(split="test", return_X_y=True) # indices = np.random.RandomState(0).permutation(10) # train WEASEL weasel = WEASEL(random_state=1379) weasel.fit(X_train, y_train) score = weasel.score(X_test, y_test) # print(score) assert score >= 0.99
def test_signatures_on_gunpoint(): # Load data X_train, y_train = load_gunpoint(split="train", return_X_y=True) # Fit a simple sig classifier clf = SignatureClassifier(random_state=0) clf.fit(X_train, y_train) # Test and check accuracy X_test, y_test = load_gunpoint(split="test", return_X_y=True) preds_test = clf.predict(X_test) accuracy = accuracy_score(preds_test, y_test) assert accuracy == 0.96
def test_stsf_on_gunpoint(): """Test of STSF on gun point.""" # load gunpoint data X_train, y_train = load_gunpoint(split="train", return_X_y=True) X_test, y_test = load_gunpoint(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(10) stsf = SupervisedTimeSeriesForest(n_estimators=20, random_state=0) stsf.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = stsf.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, stsf_gunpoint_probas)
def test_tde_on_gunpoint(): # load gunpoint data X_train, y_train = load_gunpoint(split='train', return_X_y=True) X_test, y_test = load_gunpoint(split='test', return_X_y=True) indices = np.random.RandomState(0).permutation(10) # train tde tde = TemporalDictionaryEnsemble(random_state=0) tde.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = tde.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, tde_gunpoint_probas)
def test_catch22_forest_classifier_on_gunpoint(): # load gunpoint data X_train, y_train = load_gunpoint(split="train", return_X_y=True) X_test, y_test = load_gunpoint(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(10) # train c22f c22f = Catch22ForestClassifier(random_state=0) c22f.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = c22f.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, catch22_forest_classifier_gunpoint_probas)
def test_boss_on_gunpoint(): # load gunpoint data X_train, y_train = load_gunpoint(split='train', return_X_y=True) X_test, y_test = load_gunpoint(split='test', return_X_y=True) indices = np.random.RandomState(0).permutation(10) # train boss boss = BOSSEnsemble(random_state=0) boss.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = boss.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, boss_gunpoint_probas)
def test_matrix_profile_classifier_on_gunpoint(): """Test of MatrixProfileClassifier on gun point.""" # load gunpoint data X_train, y_train = load_gunpoint(split="train", return_X_y=True) X_test, y_test = load_gunpoint(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(10) # train matrix profile classifier mpc = MatrixProfileClassifier(random_state=0) mpc.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = mpc.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, matrix_profile_classifier_gunpoint_probas)
def test_cboss_on_gunpoint(): # load gunpoint data X_train, y_train = load_gunpoint(split="train", return_X_y=True) X_test, y_test = load_gunpoint(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(10) # train cBOSS cboss = ContractableBOSS(n_parameter_samples=50, max_ensemble_size=10, random_state=0) cboss.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = cboss.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, cboss_gunpoint_probas)
def test_catch22_classifier_on_gunpoint(): """Test of Catch22Classifier on gun point.""" # load gunpoint data X_train, y_train = load_gunpoint(split="train", return_X_y=True) X_test, y_test = load_gunpoint(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(10) # train catch22 classifier rf = RandomForestClassifier(n_estimators=20) c22c = Catch22Classifier(random_state=0, estimator=rf) c22c.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = c22c.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, catch22_classifier_gunpoint_probas)
def test_row_transformer_function_transformer_series_to_primitives(): X, y = load_gunpoint(return_X_y=True) ft = FunctionTransformer(func=np.mean, validate=False) t = RowTransformer(ft) Xt = t.fit_transform(X, y) assert Xt.shape == X.shape assert isinstance(Xt.iloc[0, 0], float) # check series-to-primitive transforms
def test_transformer(): # load training data X, Y = load_gunpoint(split="train", return_X_y=True) word_length = 6 alphabet_size = 4 p = SFA(word_length=word_length, alphabet_size=alphabet_size, binning_method="equi-depth").fit(X, Y) print("Equi Depth") print(p.breakpoints) assert p.breakpoints.shape == (word_length, alphabet_size) assert np.equal(0, p.breakpoints[1, :-1]).all() # imag component is 0 p = SFA(word_length=word_length, alphabet_size=alphabet_size, binning_method="equi-width").fit(X, Y) print("Equi Width") print(p.breakpoints) assert p.breakpoints.shape == (word_length, alphabet_size) assert np.equal(0, p.breakpoints[1, :-1]).all() # imag component is 0 p = SFA(word_length=word_length, alphabet_size=alphabet_size, binning_method="information-gain").fit(X, Y) print("Information Gain") print(p.breakpoints) assert p.breakpoints.shape == (word_length, alphabet_size) print(p.breakpoints[1, :-1]) assert np.equal(0, p.breakpoints[1, :-1]).all() # imaginary component is 0
def test_sfa_anova(): # load training data X, Y = load_gunpoint(split="train", return_X_y=True) word_length = 6 alphabet_size = 4 for binning in ["information-gain", "equi-depth"]: print("SFA with ANOVA one-sided test") window_size = 32 p = SFA(word_length=word_length, anova=True, alphabet_size=alphabet_size, window_size=window_size, binning_method=binning).fit(X, Y) print(p.breakpoints) print(p.support) print(p.dft_length) assert p.breakpoints.shape == (word_length, alphabet_size) print("SFA with first feq coefficients") p2 = SFA(word_length=word_length, anova=False, alphabet_size=alphabet_size, window_size=window_size, binning_method=binning).fit(X, Y) print(p2.breakpoints) print(p2.support) print(p2.dft_length) assert(p.dft_length != p2.dft_length) assert(p.breakpoints != p2.breakpoints).any()
def test_typed_dict(): # load training data X, y = load_gunpoint(split="train", return_X_y=True) word_length = 6 alphabet_size = 4 p = SFA( word_length=word_length, alphabet_size=alphabet_size, levels=2, typed_dict=True, ) p.fit(X, y) word_list = p.bag_to_string(p.transform(X, y)[0][0]) word_length = 6 alphabet_size = 4 p2 = SFA( word_length=word_length, alphabet_size=alphabet_size, levels=2, typed_dict=False, ) p2.fit(X, y) word_list2 = p2.bag_to_string(p2.transform(X, y)[0][0]) assert word_list == word_list2
def test_sfa_anova(binning_method): # load training data X, y = load_gunpoint(split="train", return_X_y=True) word_length = 6 alphabet_size = 4 # SFA with ANOVA one-sided test window_size = 32 p = SFA( word_length=word_length, anova=True, alphabet_size=alphabet_size, window_size=window_size, binning_method=binning_method, ).fit(X, y) assert p.breakpoints.shape == (word_length, alphabet_size) _ = p.transform(X, y) # SFA with first feq coefficients p2 = SFA( word_length=word_length, anova=False, alphabet_size=alphabet_size, window_size=window_size, binning_method=binning_method, ).fit(X, y) assert p.dft_length != p2.dft_length assert (p.breakpoints != p2.breakpoints).any() _ = p2.transform(X, y)
def test_different_implementations(): random_seed = 1233 X_train, y_train = load_gunpoint(return_X_y=True) # Compare with chained transformations. tran1 = RandomIntervalSegmenter(n_intervals='sqrt', random_state=random_seed) tran2 = RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False)) A = tran2.fit_transform(tran1.fit_transform(X_train)) tran = RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean], random_state=random_seed) B = tran.fit_transform(X_train) np.testing.assert_array_equal(A, B) # Compare with transformer pipeline using TSFeatureUnion. steps = [ ('segment', RandomIntervalSegmenter(n_intervals='sqrt', check_input=False)), ('transform', TSFeatureUnion([ ('mean', RowwiseTransformer(FunctionTransformer(func=np.mean, validate=False))), ('std', RowwiseTransformer(FunctionTransformer(func=np.std, validate=False))), ])), ] pipe = TSPipeline(steps, random_state=random_seed) a = pipe.fit_transform(X_train) n_ints = a.shape[1] // 2 # Rename columns for comparing re-ordered arrays. a.columns = [*a.columns[:n_ints] + '_mean', *a.columns[n_ints:n_ints * 2] + '_std'] a = a.reindex(np.sort(a.columns), axis=1) tran = RandomIntervalFeatureExtractor(n_intervals='sqrt', features=[np.mean, np.std], random_state=random_seed) b = tran.fit_transform(X_train) b = b.reindex(np.sort(b.columns), axis=1) np.testing.assert_array_equal(a, b)
def test_different_pipelines(): random_seed = 1233 X_train, y_train = load_gunpoint(return_X_y=True) steps = [ ('segment', RandomIntervalSegmenter(n_intervals='sqrt')), ('transform', FeatureUnion([ ('mean', RowwiseTransformer( FunctionTransformer(func=np.mean, validate=False))), ('std', RowwiseTransformer( FunctionTransformer(func=np.std, validate=False))), ('slope', RowwiseTransformer( FunctionTransformer(func=time_series_slope, validate=False))), ])), ] pipe = Pipeline(steps, random_state=random_seed) a = pipe.fit_transform(X_train) tran = RandomIntervalFeatureExtractor( n_intervals='sqrt', features=[np.mean, np.std, time_series_slope], random_state=random_seed) b = tran.fit_transform(X_train) np.testing.assert_array_equal(a, b) np.testing.assert_array_equal(pipe.steps[0][1].intervals_, tran.intervals_)
def test_FeatureUnion(): X, y = load_gunpoint(return_X_y=True) feature_union = FeatureUnion([("mean", mean_transformer), ("std", std_transformer)]) Xt = feature_union.fit_transform(X, y) assert Xt.shape == (X.shape[0], X.shape[1] * len(feature_union.transformer_list))
def test_dft_mft(): # load training data X, Y = load_gunpoint(split="train", return_X_y=True) X_tab = tabularize(X, return_array=True) word_length = 6 alphabet_size = 4 print("Single DFT transformation") window_size = np.shape(X_tab)[1] p = SFA(word_length=word_length, alphabet_size=alphabet_size, window_size=window_size, binning_method="equi-depth").fit(X, Y) dft = p._discrete_fourier_transform(X_tab[0]) mft = p._mft(X_tab[0]) assert ((mft-dft < 0.0001).all()) print("Windowed DFT transformation") for norm in [True, False]: for window_size in [140]: p = SFA(word_length=word_length, norm=norm, alphabet_size=alphabet_size, window_size=window_size, binning_method="equi-depth").fit(X, Y) mft = p._mft(X_tab[0]) for i in range(len(X_tab[0]) - window_size + 1): dft_transformed = p._discrete_fourier_transform( X_tab[0, i:window_size+i]) assert(mft[i] - dft_transformed < 0.001).all() assert(len(mft) == len(X_tab[0]) - window_size + 1) assert(len(mft[0]) == word_length)
def test_orchestration(): data = load_gunpoint() dataset = DatasetRAM(dataset=data, dataset_name='gunpoint') task = TSCTask(target='class_val') # create strategies clf = TimeSeriesForestClassifier(n_estimators=1, random_state=1) strategy = TSCStrategy(clf) # result backend resultRAM = ResultRAM() orchestrator = Orchestrator(datasets=[dataset], tasks=[task], strategies=[strategy], cv=SingleSplit(random_state=1), result=resultRAM) orchestrator.run(save_strategies=False) result = resultRAM.load() actual = result[0].y_pred # expected output task = TSCTask(target='class_val') cv = SingleSplit(random_state=1) for train_idx, test_idx in cv.split(data): train = data.iloc[train_idx, :] test = data.iloc[test_idx, :] clf = TimeSeriesForestClassifier(n_estimators=1, random_state=1) strategy = TSCStrategy(clf) strategy.fit(task, train) expected = strategy.predict(test) np.testing.assert_array_equal(actual, expected)
def test_tsfresh_classifier_on_gunpoint(): """Test of TSFreshClassifier on gun point.""" # load gunpoint data X_train, y_train = load_gunpoint(split="train", return_X_y=True) X_test, y_test = load_gunpoint(split="test", return_X_y=True) indices = np.random.RandomState(0).permutation(10) # train TSFresh classifier rf = RandomForestClassifier(n_estimators=20) tsfc = TSFreshClassifier(random_state=0, default_fc_parameters="minimal", estimator=rf) tsfc.fit(X_train.iloc[indices], y_train[indices]) # assert probabilities are the same probas = tsfc.predict_proba(X_test.iloc[indices]) testing.assert_array_equal(probas, tsfresh_classifier_gunpoint_probas)