def demoDataset(): X, y = make_blobs(n_features=10, n_samples=20000, centers=3, shuffle=False, random_state=1) scaler = StandardScaler() ts = scaler.fit_transform(X) width = 12 ts = [ts] segment = SegmentXY(width=width, overlap=0) #, y_func='middle' X, y, _ = segment.fit_transform(ts, [y]) #,[y.reshape([-1,1])] X = X.reshape(X.shape[0], -1) X = Variable(torch.from_numpy(X).float()) return X, y
def test_pipe_regression(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [np.random.rand(1000)] pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()), ('ridge', Ridge())]) regression_test(pipe, X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = TS_Data(Xt, Xc) y = [np.random.rand(1000)] regression_test(pipe, X, y) # multiple time seres Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] regression_test(pipe, X, y) # cross val Xt = np.array([np.random.rand(1000, 10)] * 5) Xc = np.random.rand(5, 3) X = TS_Data(Xt, Xc) y = np.array([np.random.rand(1000)] * 5) cross_validate(pipe, X, y, cv=3)
def test_pipe_transformation(): # SegmentX transform pipe pipe = Pype([('seg', SegmentX()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [1, 2, 3] transformation_test(pipe, X, y) X = pd.DataFrame(Xc) X['ts_data'] = Xt X = TS_Data.from_df(X) transformation_test(pipe, X, y) # SegmentXY transform pipe pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] transformation_test(pipe, X, y) X = pd.DataFrame(Xc) X['ts_data'] = Xt X = TS_Data.from_df(X) transformation_test(pipe, X, y) # Forecast transform pipe pipe = Pype([('seg', SegmentXYForecast()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] transformation_test(pipe, X, y) X = pd.DataFrame(Xc) X['ts_data'] = Xt X = TS_Data.from_df(X) transformation_test(pipe, X, y) # Padtrunc transform pipe pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [1, 2, 3] transformation_test(pipe, X, y) X = pd.DataFrame(Xc) X['ts_data'] = Xt X = TS_Data.from_df(X) transformation_test(pipe, X, y)
# for investigation # create a feature representation pipeline chosenwidth = 6 fts = {'mean': mean, 'var': var, 'std': std, 'skew': skew, 'mnx': mean_crossings, 'minimum':minimum, 'maximum':maximum, \ 'mean_diff':mean_diff} scorer1 = make_scorer(f1_score, average='macro') C_chosen = 77.42 #12.915#77.42#grid1.best_params_["C"] gamma_chosen = 0.05994 #grid1.best_params_["gamma"] n_estimators = 20 #clf = Pype([('segment', SegmentXY(width=chosenwidth,step=1)), # in this context what is the difference with SegmentX? # ('features', FeatureRep(fts)), # ('scaler', StandardScaler()), # ('rf', RandomForestClassifier(n_estimators=20))], scorer=scorer1) clf = Pype([ ('segment', SegmentXY( width=chosenwidth, step=1)), # in this context what is the difference with SegmentX? ('features', FunctionTransformer(reshape_all)), ('scaler', StandardScaler()), ('bagg', OneVsRestClassifier( BaggingClassifier(SVC(kernel='rbf', gamma=gamma_chosen, C=C_chosen, probability=True, class_weight='balanced'), max_samples=1.0 / n_estimators, warm_start=True, n_estimators=n_estimators, n_jobs=6, verbose=10)))
# Author: Matthias Gazzari # License: BSD from seglearn.transform import SegmentXY, FeatureRep, FeatureRepMix from seglearn.feature_functions import minimum, maximum from seglearn.base import TS_Data import numpy as np import pandas as pd # Single multivariate time series with 3 samples of 4 variables X = [np.array([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]])] # Time series target y = [np.array([True, False, False])] segment = SegmentXY(width=3, overlap=1) X, y, _ = segment.fit_transform(X, y) print('After segmentation:') print("X:", X) print("y: ", y) union = FeatureRepMix([ ('a', FeatureRep(features={'min': minimum}), 0), ('b', FeatureRep(features={'min': minimum}), 1), ('c', FeatureRep(features={'min': minimum}), [2, 3]), ('d', FeatureRep(features={'max': maximum}), slice(0, 2)), ('e', FeatureRep(features={'max': maximum}), [False, False, True, True]), ]) X = union.fit_transform(X, y)
# Single univariate time series with 10 samples X = [ np.array([[0, 1], [1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [7, 8], [8, 9], [9, 10]]) ] # Time series target (imbalanced towards False) y = [ np.array( [True, False, False, False, False, False, True, False, False, False]) ] print("Implementation details: transform and fit_transform methods:") pipe = Pype([ ('segment', SegmentXY(width=1, overlap=0)), ('resample', patch_sampler(RandomUnderSampler)()), ]) print("Pipeline:", pipe) print("Calling a transform on the data does not change it ...") Xf, yf = pipe.transform(X, y) print("X (flattened):", Xf.flatten()) print("y", yf) print("... but calling fit_transform resamples the data.") Xf, yf = pipe.fit_transform(X, y) print("X (flattened):", Xf.flatten()) print("y", yf) print()
def test_pipe_regression(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [np.random.rand(1000)] est = Pipeline([('ftr', FeatureRep()), ('ridge', Ridge())]) pipe = SegPipe(est, segmenter=SegmentXY()) pipe.fit(X, y) pipe.predict(X, y) pipe.score(X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = make_ts_data(Xt, Xc) y = [np.random.rand(1000)] pipe.fit(X, y) pipe.predict(X, y) pipe.score(X, y) # multiple time seres Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = make_ts_data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] pipe.fit(X, y) pipe.predict(X, y) pipe.score(X, y) # cross val Xt = np.array([np.random.rand(1000, 10) for i in range(5)]) Xc = np.random.rand(5, 3) X = make_ts_data(Xt, Xc) y = np.array([np.random.rand(1000) for i in range(5)]) cross_validate(pipe, X, y) # transform pipe est = Pipeline([('ftr', FeatureRep()), ('scaler', StandardScaler())]) pipe = SegPipe(est, segmenter=SegmentXY()) Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = make_ts_data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] pipe.fit(X, y) pipe.transform(X, y) pipe.fit_transform(X, y)
def test_pipe_regression(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [np.random.rand(1000)] pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()), ('ridge', Ridge())]) pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = TS_Data(Xt, Xc) y = [np.random.rand(1000)] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # multiple time seres Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # cross val Xt = np.array([np.random.rand(1000, 10)] * 5) Xc = np.random.rand(5, 3) X = TS_Data(Xt, Xc) y = np.array([np.random.rand(1000)] * 5) cross_validate(pipe, X, y, cv=3) # transform pipe pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] pipe.fit(X, y) pipe.transform(X, y) pipe.fit_transform(X, y)
from seglearn.split import temporal_split, TemporalKFold from seglearn.transform import FeatureRep, SegmentXY, last # for a single time series, we need to make it a list X = [np.arange(10000) / 100.] y = [np.sin(X[0]) * X[0] * 3 + X[0] * X[0]] # split the data along the time axis (our only option since we have only 1 time series) X_train, X_test, y_train, y_test = temporal_split(X, y) # SegmentXY segments both X and y (as the name implies) # setting y_func = last, selects the last value from each y segment as the target # other options include transform.middle, or you can make your own function # see the API documentation for further details pipe = Pype([('seg', SegmentXY(width=200, overlap=0.5, y_func=last)), ('features', FeatureRep()), ('lin', LinearRegression())]) # fit and score pipe.fit(X_train, y_train) score = pipe.score(X_test, y_test) print("N series in train: ", len(X_train)) print("N series in test: ", len(X_test)) print("N segments in train: ", pipe.N_train) print("N segments in test: ", pipe.N_test) print("Score: ", score) # generate some predictions ytr, ytr_p = pipe.transform_predict(X_train, y_train) # training predictions yte, yte_p = pipe.transform_predict(X_test, y_test) # test predictions
# for a single time series, we need to make it a list X = [np.arange(10000) / 100.] y = [np.sin(X[0]) * X[0] * 3 + X[0] * X[0]] # split the data along the time axis (our only option since we have only 1 time series) X_train, X_test, y_train, y_test = temporal_split(X, y) # create a feature representation pipeline est = Pipeline([('features', FeatureRep()), ('lin', LinearRegression())]) # SegmentXY segments both X and y (as the name implies) # setting y_func = last, selects the last value from each y segment as the target # other options include transform.middle, or you can make your own function # see the API documentation for further details segmenter = SegmentXY(width=200, overlap=0.5, y_func=last) pipe = SegPipe(est, segmenter) # fit and score pipe.fit(X_train, y_train) score = pipe.score(X_test, y_test) print("N series in train: ", len(X_train)) print("N series in test: ", len(X_test)) print("N segments in train: ", pipe.N_train) print("N segments in test: ", pipe.N_test) print("Score: ", score) # generate some predictions ytr, ytr_p = pipe.predict(X_train, y_train) # training predictions yte, yte_p = pipe.predict(X_test, y_test) # test predictions