# create a feature representation pipeline # setting y_func = last, and forecast = 200 makes us predict the value of y # 200 samples ahead of the segment # other reasonable options for y_func are ``mean``, ``all`` (or create your own function) # see the API documentation for further details clf = Pype([('segment', SegmentXYForecast(width=200, overlap=0.5, y_func=last, forecast=200)), ('features', FeatureRep()), ('lin', LinearRegression())]) # fit and score clf.fit(X_train, y_train) score = clf.score(X_test, y_test) print("N series in train: ", len(X_train)) print("N series in test: ", len(X_test)) print("N segments in train: ", clf.N_train) print("N segments in test: ", clf.N_test) print("Score: ", score) # generate some predictions y, y_p = clf.transform_predict(X, y) # all predictions ytr, ytr_p = clf.transform_predict(X_train, y_train) # training predictions yte, yte_p = clf.transform_predict(X_test, y_test) # test predictions # note - the first few segments in the test set won't have predictions (gap) # we plot the 'gap' for the visualization to hopefully make the situation clear Ns = len(y)
def test_pipe_regression(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [np.random.rand(1000)] pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()), ('ridge', Ridge())]) pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = TS_Data(Xt, Xc) y = [np.random.rand(1000)] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # multiple time seres Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # cross val Xt = np.array([np.random.rand(1000, 10)] * 5) Xc = np.random.rand(5, 3) X = TS_Data(Xt, Xc) y = np.array([np.random.rand(1000)] * 5) cross_validate(pipe, X, y, cv=3) # transform pipe pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] pipe.fit(X, y) pipe.transform(X, y) pipe.fit_transform(X, y)
# load the data data = load_watch() X = data['X'] y = data['y'] # create a feature representation pipeline with PadTrunc segmentation # the time series are between 20-40 seconds # this truncates them all to the first 5 seconds (sampling rate is 50 Hz) pipe = Pype([('trunc', PadTrunc(width=250)), ('features', FeatureRep()), ('scaler', StandardScaler()), ('svc', LinearSVC())]) # split the data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, shuffle=True, random_state=42) pipe.fit(X_train, y_train) score = pipe.score(X_test, y_test) print("N series in train: ", len(X_train)) print("N series in test: ", len(X_test)) print("N segments in train: ", pipe.N_train) print("N segments in test: ", pipe.N_test) print("Accuracy score: ", score) img = mpimg.imread('trunk.jpg') plt.imshow(img)
def test_pipe_PadTrunc(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [5] pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()), ('rf', RandomForestClassifier(n_estimators=10))]) pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = TS_Data(Xt, Xc) y = [5] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # multiple time series Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [1, 2, 3] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # univariate data Xt = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] Xc = np.random.rand(3) X = TS_Data(Xt, Xc) y = [1, 2, 3] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # transform pipe pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [1, 2, 3] pipe.fit(X, y) pipe.transform(X, y) pipe.fit_transform(X, y)