y_func=last, forecast=200)), ('features', FeatureRep()), ('lin', LinearRegression())]) # fit and score clf.fit(X_train, y_train) score = clf.score(X_test, y_test) print("N series in train: ", len(X_train)) print("N series in test: ", len(X_test)) print("N segments in train: ", clf.N_train) print("N segments in test: ", clf.N_test) print("Score: ", score) # generate some predictions y, y_p = clf.transform_predict(X, y) # all predictions ytr, ytr_p = clf.transform_predict(X_train, y_train) # training predictions yte, yte_p = clf.transform_predict(X_test, y_test) # test predictions # note - the first few segments in the test set won't have predictions (gap) # we plot the 'gap' for the visualization to hopefully make the situation clear Ns = len(y) ts = np.arange(Ns) # segment number ttr = ts[0:len(ytr)] tte = ts[(Ns - len(yte)):Ns] tga = ts[len(ytr):(Ns - len(yte))] yga = y[len(ytr):(Ns - len(yte))] # plot the results plt.plot(ttr, ytr, '.', label="training") plt.plot(tga, yga, '.', label="gap")
pipe = Pype([('seg', Segment(width=200, overlap=0.5, y_func=last)), ('features', FeatureRep()), ('lin', LinearRegression())]) # fit and score pipe.fit(X_train, y_train) score = pipe.score(X_test, y_test) print("N series in train: ", len(X_train)) print("N series in test: ", len(X_test)) print("N segments in train: ", pipe.N_train) print("N segments in test: ", pipe.N_test) print("Score: ", score) # generate some predictions ytr, ytr_p = pipe.transform_predict(X_train, y_train) # training predictions yte, yte_p = pipe.transform_predict(X_test, y_test) # test predictions xtr = np.arange(len(ytr)) # segment number xte = np.arange(len(yte)) + len(xtr) # plot the amazing results plt.plot(xtr, ytr, '.', label="training") plt.plot(xte, yte, '.', label="actual") plt.plot(xte, yte_p, label="predicted") plt.xlabel("Segment Number") plt.ylabel("Target") plt.legend() plt.show() # # now try a cross validation # X = [np.arange(4000) / 100.]
verbose=10))) ]) #scorer=scorer1 X_train, X_test, y_train, y_test, matlab_train, matlab_test = train_test_split( new_features_seg_included, new_labels_seg_included, new_matlab_seg_included, test_size=0.10, random_state=10) clf.fit(X_train, y_train) #clf1.fit(X_train, y_train) score = clf.score(X_test, y_test) predict = clf.predict(X_test) test1, test2 = clf.transform_predict(X_test, y_test) train1, train2 = clf.transform_predict(X_train, y_train) test21 = inverse_transform(test2, y_test, chosenwidth, 1, order='F') test11 = inverse_transform(test1, y_test, chosenwidth, 1, order='F') train11 = inverse_transform(train1, y_train, chosenwidth, 1, order='F') train21 = inverse_transform(train2, y_train, chosenwidth, 1, order='F') # and do a cross validation scoring = make_scorer(f1_score, average='macro') if change_labels == 1 and conservative == 1: freeze2 = [np.argmax(test21[i]) for i in range(len(test21))] freeze2_conserve = [ test21[i].shape[0] - np.argmax(np.flip(np.int_(np.logical_not(test21[i])))) for i in range(len(test21))
test_size=0.25, random_state=42) ############################################## # OPTION 1: Use the score SegPipe score method ############################################## pipe.fit(X_train, y_train) score = pipe.score(X_test, y_test) print("Accuracy score: ", score) ###################################################################### # OPTION 2: generate true and predicted target values for the segments ###################################################################### y_true, y_pred = pipe.transform_predict(X_test, y_test) # use any of the sklearn scorers f1_macro = f1_score(y_true, y_pred, average='macro') print("F1 score: ", f1_macro) cm = confusion_matrix(y_true, y_pred) plot_confusion_matrix(cm, data['y_labels']) ########################################## # OPTION 3: scoring during model selection ########################################## # model selection using the built-in score method for the final estimator cv_scores = cross_validate(pipe, X, y, cv=4, return_train_score=True) print("CV Scores: ", pd.DataFrame(cv_scores))
def test_pipe_regression(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [np.random.rand(1000)] pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()), ('ridge', Ridge())]) pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = TS_Data(Xt, Xc) y = [np.random.rand(1000)] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # multiple time seres Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # cross val Xt = np.array([np.random.rand(1000, 10)] * 5) Xc = np.random.rand(5, 3) X = TS_Data(Xt, Xc) y = np.array([np.random.rand(1000)] * 5) cross_validate(pipe, X, y, cv=3) # transform pipe pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] pipe.fit(X, y) pipe.transform(X, y) pipe.fit_transform(X, y)
def test_pipe_PadTrunc(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [5] pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()), ('rf', RandomForestClassifier(n_estimators=10))]) pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = TS_Data(Xt, Xc) y = [5] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # multiple time series Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [1, 2, 3] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # univariate data Xt = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] Xc = np.random.rand(3) X = TS_Data(Xt, Xc) y = [1, 2, 3] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # transform pipe pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [1, 2, 3] pipe.fit(X, y) pipe.transform(X, y) pipe.fit_transform(X, y)