def test_pipe_transformation(): # SegmentX transform pipe pipe = Pype([('seg', SegmentX()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [1, 2, 3] transformation_test(pipe, X, y) X = pd.DataFrame(Xc) X['ts_data'] = Xt X = TS_Data.from_df(X) transformation_test(pipe, X, y) # SegmentXY transform pipe pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] transformation_test(pipe, X, y) X = pd.DataFrame(Xc) X['ts_data'] = Xt X = TS_Data.from_df(X) transformation_test(pipe, X, y) # Forecast transform pipe pipe = Pype([('seg', SegmentXYForecast()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] transformation_test(pipe, X, y) X = pd.DataFrame(Xc) X['ts_data'] = Xt X = TS_Data.from_df(X) transformation_test(pipe, X, y) # Padtrunc transform pipe pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [1, 2, 3] transformation_test(pipe, X, y) X = pd.DataFrame(Xc) X['ts_data'] = Xt X = TS_Data.from_df(X) transformation_test(pipe, X, y)
def test_pipe_regression(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [np.random.rand(1000)] pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()), ('ridge', Ridge())]) regression_test(pipe, X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = TS_Data(Xt, Xc) y = [np.random.rand(1000)] regression_test(pipe, X, y) # multiple time seres Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] regression_test(pipe, X, y) # cross val Xt = np.array([np.random.rand(1000, 10)] * 5) Xc = np.random.rand(5, 3) X = TS_Data(Xt, Xc) y = np.array([np.random.rand(1000)] * 5) cross_validate(pipe, X, y, cv=3)
def test_pipe_PadTrunc(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [5] pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()), ('rf', RandomForestClassifier(n_estimators=10))]) classifier_test(pipe, X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = TS_Data(Xt, Xc) y = [5] classifier_test(pipe, X, y) # multiple time series Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [1, 2, 3] classifier_test(pipe, X, y) # univariate data Xt = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] Xc = np.random.rand(3) X = TS_Data(Xt, Xc) y = [1, 2, 3] classifier_test(pipe, X, y)
def test_pipe_forecast(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [np.random.rand(1000)] pipe = Pype([('seg', SegmentXYForecast()), ('ftr', FeatureRep()), ('ridge', Ridge())]) forecast_test(pipe, X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = TS_Data(Xt, Xc) y = [np.random.rand(1000)] forecast_test(pipe, X, y) X = pd.DataFrame(Xc) X['ts_data'] = Xt X = TS_Data.from_df(X) forecast_test(pipe, X, y) # multiple time seres Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] forecast_test(pipe, X, y) X = pd.DataFrame(Xc) X['ts_data'] = Xt X = TS_Data.from_df(X) forecast_test(pipe, X, y) # cross val Xt = np.array([np.random.rand(1000, 10)] * 5) Xc = np.random.rand(5, 3) X = TS_Data(Xt, Xc) y = np.array([np.random.rand(1000)] * 5) cross_validate(pipe, X, y, cv=3) X = pd.DataFrame(Xc) Xt = [np.random.rand(1000, 10)] * 5 X['ts_data'] = Xt X = TS_Data.from_df(X) cross_validate(pipe, X, y, cv=3)
def test_pipe_classification(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [5] pipe = Pype([('seg', SegmentX()), ('ftr', FeatureRep()), ('rf', RandomForestClassifier(n_estimators=10))]) classifier_test(pipe, X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = TS_Data(Xt, Xc) y = [5] classifier_test(pipe, X, y) X = pd.DataFrame(Xc) X['ts_data'] = Xt X = TS_Data.from_df(X) classifier_test(pipe, X, y) # multiple time series Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [1, 2, 3] classifier_test(pipe, X, y) X = pd.DataFrame(Xc) X['ts_data'] = Xt X = TS_Data.from_df(X) classifier_test(pipe, X, y) # univariate data Xt = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] Xc = np.random.rand(3) X = TS_Data(Xt, Xc) y = [1, 2, 3] classifier_test(pipe, X, y) X = pd.DataFrame(Xc) X['ts_data'] = Xt X = TS_Data.from_df(X) classifier_test(pipe, X, y)
from seglearn.split import temporal_split, TemporalKFold from seglearn.transform import FeatureRep, Segment, last # for a single time series, we need to make it a list X = [np.arange(10000) / 100.] y = [np.sin(X[0]) * X[0] * 3 + X[0] * X[0]] # split the data along the time axis (our only option since we have only 1 time series) X_train, X_test, y_train, y_test = temporal_split(X, y) # setting y_func = last, selects the last value from each y segment as the target # other options include transform.middle, or you can make your own function # see the API documentation for further details pipe = Pype([('seg', Segment(width=200, overlap=0.5, y_func=last)), ('features', FeatureRep()), ('lin', LinearRegression())]) # fit and score pipe.fit(X_train, y_train) score = pipe.score(X_test, y_test) print("N series in train: ", len(X_train)) print("N series in test: ", len(X_test)) print("N segments in train: ", pipe.N_train) print("N segments in test: ", pipe.N_test) print("Score: ", score) # generate some predictions ytr, ytr_p = pipe.transform_predict(X_train, y_train) # training predictions yte, yte_p = pipe.transform_predict(X_test, y_test) # test predictions xtr = np.arange(len(ytr)) # segment number
print("Fitting X (flattened):", X.flatten(), "on y:", y) return super(VerboseDummyClassifier, self).fit(X, y, sample_weight) def predict(self, X): print("Predicting X (flattened):", X.flatten()) return super(VerboseDummyClassifier, self).predict(X) def score(self, X, y, sample_weight=None): print("Scoring X (flattened):", X.flatten(), "on y:", y) return super(VerboseDummyClassifier, self).score(X, y, sample_weight) pipe = Pype([ ('segment', Segment(width=1, overlap=0)), ('resample', patch_sampler(RandomUnderSampler)(shuffle=True)), ('feature', FeatureRep(features={"min": minimum})), ('estimator', VerboseDummyClassifier(strategy="constant", constant=True)), ]) print("Pipeline:", pipe) print("Split the data into half training and half test data:") X_train, X_test, y_train, y_test = temporal_split(X, y, 0.5) print("X_train:", X_train) print("y_train:", y_train) print("X_test:", X_test) print("y_test:", y_test) print() print("Fit on the training data (this includes resampling):") pipe.fit(X_train, y_train) print()
plt.ylabel('True label') plt.xlabel('Predicted label') plt.tight_layout() ############################################## # SETUP ############################################## # load the data data = load_watch() X = data['X'] y = data['y'] # create a feature representation pipeline steps = [('seg', Segment()), ('features', FeatureRep()), ('scaler', StandardScaler()), ('rf', RandomForestClassifier(n_estimators=20))] pipe = Pype(steps) # split the data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) ############################################## # OPTION 1: Use the score SegPipe score method ##############################################
t = np.arange(5000) / 100. y = np.sin(t) * t * 2.5 + t * t # with forecasting, X can include the target X = np.stack([t, y], axis=1) # remember for a single time series, we need to make a list X = [X] y = [y] # split the data along the time axis (our only option since we have only 1 time series) X_train, X_test, y_train, y_test = temporal_split(X, y, test_size=0.25) # create a feature representation pipeline est = Pipeline([('features', FeatureRep()), ('lin', LinearRegression())]) # setting y_func = last, and forecast = 200 makes us predict the value of y # 200 samples ahead of the segment # other reasonable options for y_func are ``mean``, ``all`` (or create your own function) # see the API documentation for further details segmenter = SegmentXYForecast(width=200, overlap=0.5, y_func=last, forecast=200) pipe = SegPipe(est, segmenter) # fit and score pipe.fit(X_train, y_train) score = pipe.score(X_test, y_test)
X = [X] y = [y] # split the data along the time axis (our only option since we have only 1 time series) X_train, X_test, y_train, y_test = temporal_split(X, y, test_size=0.25) # create a feature representation pipeline # setting y_func = last, and forecast = 200 makes us predict the value of y # 200 samples ahead of the segment # other reasonable options for y_func are ``mean``, ``all`` (or create your own function) # see the API documentation for further details clf = Pype([('segment', SegmentXYForecast(width=200, overlap=0.5, y_func=last, forecast=200)), ('features', FeatureRep()), ('lin', LinearRegression())]) # fit and score clf.fit(X_train, y_train) score = clf.score(X_test, y_test) print("N series in train: ", len(X_train)) print("N series in test: ", len(X_test)) print("N segments in train: ", clf.N_train) print("N segments in test: ", clf.N_test) print("Score: ", score) # generate some predictions y, y_p = clf.transform_predict(X, y) # all predictions ytr, ytr_p = clf.transform_predict(X_train, y_train) # training predictions
from seglearn.base import TS_Data from seglearn.datasets import load_watch from seglearn.pipe import Pype from seglearn.transform import FeatureRep, SegmentX # seed RNGESUS np.random.seed(123124) # load the data data = load_watch() X = data['X'] y = data['y'] # create a feature representation pipeline clf = Pype([('segment', SegmentX()), ('features', FeatureRep()), ('scaler', StandardScaler()), ('rf', RandomForestClassifier(n_estimators=20))]) # split the data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) clf.fit(X_train, y_train) score = clf.score(X_test, y_test) print("N series in train: ", len(X_train)) print("N series in test: ", len(X_test)) print("N segments in train: ", clf.N_train) print("N segments in test: ", clf.N_test) print("Accuracy score: ", score)
def test_pipe_regression(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [np.random.rand(1000)] est = Pipeline([('ftr', FeatureRep()), ('ridge', Ridge())]) pipe = SegPipe(est, segmenter=SegmentXY()) pipe.fit(X, y) pipe.predict(X, y) pipe.score(X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = make_ts_data(Xt, Xc) y = [np.random.rand(1000)] pipe.fit(X, y) pipe.predict(X, y) pipe.score(X, y) # multiple time seres Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = make_ts_data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] pipe.fit(X, y) pipe.predict(X, y) pipe.score(X, y) # cross val Xt = np.array([np.random.rand(1000, 10) for i in range(5)]) Xc = np.random.rand(5, 3) X = make_ts_data(Xt, Xc) y = np.array([np.random.rand(1000) for i in range(5)]) cross_validate(pipe, X, y) # transform pipe est = Pipeline([('ftr', FeatureRep()), ('scaler', StandardScaler())]) pipe = SegPipe(est, segmenter=SegmentXY()) Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = make_ts_data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] pipe.fit(X, y) pipe.transform(X, y) pipe.fit_transform(X, y)
def test_pipe_classification(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [5] est = Pipeline([('ftr', FeatureRep()), ('ridge', RandomForestClassifier())]) pipe = SegPipe(est, segmenter=SegmentX()) pipe.fit(X, y) pipe.predict(X, y) pipe.score(X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = make_ts_data(Xt, Xc) y = [5] pipe.fit(X, y) pipe.predict(X, y) pipe.score(X, y) # multiple time series Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = make_ts_data(Xt, Xc) y = [1, 2, 3] pipe.fit(X, y) pipe.predict(X, y) pipe.score(X, y) # univariate data Xt = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] Xc = np.random.rand(3) X = make_ts_data(Xt, Xc) y = [1, 2, 3] pipe.fit(X, y) pipe.predict(X, y) pipe.score(X, y) # transform pipe est = Pipeline([('ftr', FeatureRep()), ('scaler', StandardScaler())]) pipe = SegPipe(est, segmenter=SegmentX()) Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = make_ts_data(Xt, Xc) y = [1, 2, 3] pipe.fit(X, y) pipe.transform(X, y) pipe.fit_transform(X, y)
def test_pipe_regression(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [np.random.rand(1000)] pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()), ('ridge', Ridge())]) pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = TS_Data(Xt, Xc) y = [np.random.rand(1000)] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # multiple time seres Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # cross val Xt = np.array([np.random.rand(1000, 10)] * 5) Xc = np.random.rand(5, 3) X = TS_Data(Xt, Xc) y = np.array([np.random.rand(1000)] * 5) cross_validate(pipe, X, y, cv=3) # transform pipe pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] pipe.fit(X, y) pipe.transform(X, y) pipe.fit_transform(X, y)
def test_pipe_PadTrunc(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [5] pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()), ('rf', RandomForestClassifier(n_estimators=10))]) pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = TS_Data(Xt, Xc) y = [5] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # multiple time series Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [1, 2, 3] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # univariate data Xt = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] Xc = np.random.rand(3) X = TS_Data(Xt, Xc) y = [1, 2, 3] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # transform pipe pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [1, 2, 3] pipe.fit(X, y) pipe.transform(X, y) pipe.fit_transform(X, y)
from seglearn.feature_functions import minimum, maximum from seglearn.base import TS_Data import numpy as np import pandas as pd # Single multivariate time series with 3 samples of 4 variables X = [np.array([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]])] # Time series target y = [np.array([True, False, False])] segment = SegmentXY(width=3, overlap=1) X, y, _ = segment.fit_transform(X, y) print('After segmentation:') print("X:", X) print("y: ", y) union = FeatureRepMix([ ('a', FeatureRep(features={'min': minimum}), 0), ('b', FeatureRep(features={'min': minimum}), 1), ('c', FeatureRep(features={'min': minimum}), [2, 3]), ('d', FeatureRep(features={'max': maximum}), slice(0, 2)), ('e', FeatureRep(features={'max': maximum}), [False, False, True, True]), ]) X = union.fit_transform(X, y) print('After column-wise feature extraction:') df = pd.DataFrame(data=X, columns=union.f_labels) print(df)
plt.xlabel('Predicted label') plt.tight_layout() ############################################## # SETUP ############################################## # load the data data = load_watch() X = data['X'] y = data['y'] # create a feature representation pipeline steps = [('seg', SegmentX()), ('features', FeatureRep()), ('scaler', StandardScaler()), ('rf', RandomForestClassifier(n_estimators=20))] pipe = Pype(steps) # split the data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42) ############################################## # OPTION 1: Use the score SegPipe score method ############################################## pipe.fit(X_train, y_train) score = pipe.score(X_test, y_test) print("Accuracy score: ", score)
from sklearn.svm import LinearSVC from seglearn.datasets import load_watch from seglearn.pipe import Pype from seglearn.transform import FeatureRep, PadTrunc # load the data data = load_watch() X = data['X'] y = data['y'] # create a feature representation pipeline with PadTrunc segmentation # the time series are between 20-40 seconds # this truncates them all to the first 5 seconds (sampling rate is 50 Hz) pipe = Pype([('trunc', PadTrunc(width=250)), ('features', FeatureRep()), ('scaler', StandardScaler()), ('svc', LinearSVC())]) # split the data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=True, random_state=42) pipe.fit(X_train, y_train) score = pipe.score(X_test, y_test) print("N series in train: ", len(X_train)) print("N series in test: ", len(X_test)) print("N segments in train: ", pipe.N_train)
# seed RNGESUS np.random.seed(123124) # load the data data = load_watch() X = data['X'] y = data['y'] # I am adding in a column to represent time (50 Hz sampling), since my data doesn't include it # the Interp class assumes time is the first column in the series X = np.array([np.column_stack([np.arange(len(X[i])) / 50., X[i]]) for i in np.arange(len(X))]) clf = Pype([('interp', Interp(1. / 25., categorical_target=True)), ('segment', Segment(width=100)), ('features', FeatureRep()), ('scaler', StandardScaler()), ('rf', RandomForestClassifier(n_estimators=20))]) # split the data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) clf.fit(X_train, y_train) score = clf.score(X_test, y_test) print("N series in train: ", len(X_train)) print("N series in test: ", len(X_test)) print("N segments in train: ", clf.N_train) print("N segments in test: ", clf.N_test) print("Accuracy score: ", score)
]) # create the label vector and the corresponding semantic vector y = np.array([0, 1, 2, 3, 4, 5, 6, 7]) labels = [ 'LB_BC', 'RB_BC', 'LF_BC', 'RF_BC', 'LB_BP', 'RB_BP', 'LF_BP', 'RF_BP' ] # segment the data and labels segmenter = SegmentX(100, 0.5) X_new, y_new, _ = segmenter.fit_transform(X, y) ################################################################################################### # create a pipeline for LDA transformation of the feature representation est = Pipeline([('features', FeatureRep()), ('lda', LinearDiscriminantAnalysis(n_components=2))]) pipe = SegPipe(est) # plot embedding X2, y2 = pipe.fit_transform(X_new, y_new) plot_embedding(X2, y2.astype(int), labels) plt.show() ################################################################################################### # create a pipeline for feature representation est = Pipeline([('features', FeatureRep()), ('scaler', StandardScaler()), ('rf', RandomForestClassifier())]) pipe = SegPipe(est)
# load the data data = load_watch() X = data['X'] y = data['y'] # I am adding in a column to represent time (50 Hz sampling), since my data doesn't include it # the Interp class assumes time is the first column in the series X = np.array([ np.column_stack([np.arange(len(X[i])) / 50., X[i]]) for i in np.arange(len(X)) ]) clf = Pype([('interp', Interp(1. / 25., categorical_target=True)), ('segment', SegmentX(width=100)), ('features', FeatureRep()), ('scaler', StandardScaler()), ('rf', RandomForestClassifier(n_estimators=20))]) # split the data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) clf.fit(X_train, y_train) score = clf.score(X_test, y_test) print("N series in train: ", len(X_train)) print("N series in test: ", len(X_test)) print("N segments in train: ", clf.N_train) print("N segments in test: ", clf.N_test) print("Accuracy score: ", score)