def test_pipe_regression(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [np.random.rand(1000)] pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()), ('ridge', Ridge())]) regression_test(pipe, X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = TS_Data(Xt, Xc) y = [np.random.rand(1000)] regression_test(pipe, X, y) # multiple time seres Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] regression_test(pipe, X, y) # cross val Xt = np.array([np.random.rand(1000, 10)] * 5) Xc = np.random.rand(5, 3) X = TS_Data(Xt, Xc) y = np.array([np.random.rand(1000)] * 5) cross_validate(pipe, X, y, cv=3)
def test_pipe_PadTrunc(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [5] pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()), ('rf', RandomForestClassifier(n_estimators=10))]) classifier_test(pipe, X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = TS_Data(Xt, Xc) y = [5] classifier_test(pipe, X, y) # multiple time series Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [1, 2, 3] classifier_test(pipe, X, y) # univariate data Xt = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] Xc = np.random.rand(3) X = TS_Data(Xt, Xc) y = [1, 2, 3] classifier_test(pipe, X, y)
def test_ts_data(): # time series data ts = np.array([ np.random.rand(100, 10), np.random.rand(200, 10), np.random.rand(20, 10) ]) c = np.random.rand(3, 10) data = TS_Data(ts, c) assert np.array_equal(data.context_data, c) assert np.array_equal(data.ts_data, ts) assert isinstance(data[1], TS_Data) assert np.array_equal(data[1].ts_data, ts[1]) assert np.array_equal(data[1].context_data, c[1]) # segmented time series data sts = np.random.rand(100, 10, 6) c = np.random.rand(100, 6) data = TS_Data(sts, c) assert isinstance(data[4:10], TS_Data) assert np.array_equal(data[4:10].ts_data, sts[4:10]) assert np.array_equal(data[4:10].context_data, c[4:10]) sts = np.random.rand(100, 10) c = np.random.rand(100) data = TS_Data(sts, c) assert isinstance(data[4:10], TS_Data) assert np.array_equal(data[4:10].ts_data, sts[4:10]) assert np.array_equal(data[4:10].context_data, c[4:10])
def test_pipe_transformation(): # SegmentX transform pipe pipe = Pype([('seg', SegmentX()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [1, 2, 3] transformation_test(pipe, X, y) # SegmentXY transform pipe pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] transformation_test(pipe, X, y) # Forecast transform pipe pipe = Pype([('seg', SegmentXYForecast()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] transformation_test(pipe, X, y) # Padtrunc transform pipe pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [1, 2, 3] transformation_test(pipe, X, y)
def test_segmentxyforecast(): Nt = 100 width = 5 nvars = 5 # lets do a forecast test seg = transform.SegmentXYForecast(width=width, forecast=5) Xt = [ np.random.rand(Nt, nvars), np.random.rand(2 * Nt, nvars), np.random.rand(Nt, nvars) ] Xc = np.random.rand(3, 4) y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)] X = TS_Data(Xt, Xc) seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) Xst, Xsc = get_ts_data_parts(Xs) N = len(ys) assert Xst.shape == (N, width, nvars) assert Xsc.shape == (N, 4) # univariate X nvars = 1 seg = transform.SegmentXYForecast(width=width, forecast=5) X = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)] y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)] seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) Xst, Xsc = get_ts_data_parts(Xs) N = len(ys) assert Xst.shape == (N, width)
def test_feature_rep_mix(): union = transform.FeatureRepMix([ ('a', transform.FeatureRep(features={'mean': mean}), 0), ('b', transform.FeatureRep(features={'mean': mean}), 1), ('c', transform.FeatureRep(features={'mean': mean}), [2, 3]), ('d', transform.FeatureRep(features={'mean': mean}), slice(0, 2)), ('e', transform.FeatureRep(features={'mean': mean}), [False, False, True, True]), ]) # multivariate ts X = np.random.rand(100, 10, 4) y = np.ones(100) union.fit(X, y) Xt = union.transform(X) assert Xt.shape[0] == len(X) assert len(union.f_labels) == Xt.shape[1] # ts with multivariate contextual data X = TS_Data(np.random.rand(100, 10, 4), np.random.rand(100, 3)) y = np.ones(100) union.fit(X, y) Xt = union.transform(X) assert Xt.shape[0] == len(X) assert len(union.f_labels) == Xt.shape[1] # ts with univariate contextual data X = TS_Data(np.random.rand(100, 10, 4), np.random.rand(100)) y = np.ones(100) union.fit(X, y) Xt = union.transform(X) assert Xt.shape[0] == len(X) assert len(union.f_labels) == Xt.shape[1] # univariate ts uni_union = transform.FeatureRepMix([ ('a', transform.FeatureRep(features={'mean': mean}), 0), ('b', transform.FeatureRep(features={'mean': mean}), [0]), ('c', transform.FeatureRep(features={'mean': mean}), slice(0, 1)), ('d', transform.FeatureRep(features={'mean': mean}), [True]), ]) X = np.random.rand(100, 10) y = np.ones(100) uni_union.fit(X, y) Xt = uni_union.transform(X) assert Xt.shape[0] == len(X) assert len(uni_union.f_labels) == Xt.shape[1]
def test_temporal_k_fold(): # test length 1 series splitter = TemporalKFold() X = [rand(100, 10)] y = [5] Xs, ys, cv = splitter.split(X, y) check_folds(Xs, ys, cv) X = [rand(100, 10)] y = [rand(100)] Xs, ys, cv = splitter.split(X, y) check_folds(Xs, ys, cv) Xt = [rand(100, 10)] Xc = [5] X = TS_Data(Xt, Xc) y = [rand(100)] Xs, ys, cv = splitter.split(X, y) check_folds(Xs, ys, cv) # test with lots of series splitter = TemporalKFold() Ns = 5 X = np.array([rand(100, 10)] * Ns) y = rand(Ns) Xs, ys, cv = splitter.split(X, y) check_folds(Xs, ys, cv) X = np.array([rand(100, 10)] * Ns) y = np.array([rand(100)] * Ns) Xs, ys, cv = splitter.split(X, y) check_folds(Xs, ys, cv) Xt = np.array([rand(100, 10)] * Ns) Xc = rand(Ns) X = TS_Data(Xt, Xc) y = np.array([rand(100)] * Ns) Xs, ys, cv = splitter.split(X, y) check_folds(Xs, ys, cv) Xt = np.array([rand(100, 10)] * Ns) Xc = rand(Ns) X = TS_Data(Xt, Xc) y = rand(Ns) Xs, ys, cv = splitter.split(X, y) check_folds(Xs, ys, cv)
def test_pd(): ts = np.array([np.random.rand(100, 10), np.random.rand(200, 10), np.random.rand(20, 10)], dtype=object) c = np.random.rand(3, 10) df = pd.DataFrame(c) df['ts_data'] = ts data = TS_Data.from_df(df) assert np.all([np.array_equal(data.ts_data[i], ts[i]) for i in range(len(ts))]) assert np.array_equal(data.context_data, c)
def test_temporal_split(): # test with length 1 series X = [rand(100, 10)] y = [5] Xtr, Xte, ytr, yte = temporal_split(X, y) assert len(Xtr) == len(ytr) assert len(Xte) == len(yte) X = [rand(100, 10)] y = [rand(100)] Xtr, Xte, ytr, yte = temporal_split(X, y) assert len(Xtr) == len(ytr) assert len(Xte) == len(yte) Xt = [rand(100, 10)] Xc = [5] X = TS_Data(Xt, Xc) y = [rand(100)] Xtr, Xte, ytr, yte = temporal_split(X, y) assert len(Xtr) == len(ytr) assert len(Xte) == len(yte) # test with lots of series Ns = 5 X = np.array([rand(100, 10) for i in range(Ns)]) y = rand(Ns) Xtr, Xte, ytr, yte = temporal_split(X, y) assert len(Xtr) == len(ytr) assert len(Xte) == len(yte) X = np.array([rand(100, 10) for i in range(Ns)]) y = np.array([rand(100) for i in range(Ns)]) Xtr, Xte, ytr, yte = temporal_split(X, y) assert len(Xtr) == len(ytr) assert len(Xte) == len(yte) Xt = np.array([rand(100, 10) for i in range(Ns)]) Xc = rand(Ns) X = TS_Data(Xt, Xc) y = np.arange(Ns) Xtr, Xte, ytr, yte = temporal_split(X, y) assert len(Xtr) == len(ytr) assert len(Xte) == len(yte)
def test_feature_rep(): # multivariate ts frep = transform.FeatureRep(features=all_features()) X = np.random.rand(100, 10, 5) y = np.ones(100) frep.fit(X, y) Xt = frep.transform(X) assert Xt.shape[0] == len(X) assert len(frep.f_labels) == Xt.shape[1] # univariate ts X = np.random.rand(100, 10) y = np.ones(100) frep.fit(X, y) Xt = frep.transform(X) assert Xt.shape[0] == len(X) assert len(frep.f_labels) == Xt.shape[1] # single feature frep = transform.FeatureRep(features={'mean': mean}) frep.fit(X, y) Xt = frep.transform(X) assert Xt.shape[0] == len(X) assert len(frep.f_labels) == Xt.shape[1] assert Xt.shape[1] == 1 # ts with multivariate contextual data frep = transform.FeatureRep(features=all_features()) X = TS_Data(np.random.rand(100, 10, 5), np.random.rand(100, 3)) y = np.ones(100) frep.fit(X, y) Xt = frep.transform(X) assert Xt.shape[0] == len(X) assert len(frep.f_labels) == Xt.shape[1] # ts with univariate contextual data X = TS_Data(np.random.rand(100, 10, 5), np.random.rand(100)) y = np.ones(100) frep.fit(X, y) Xt = frep.transform(X) assert Xt.shape[0] == len(X) assert len(frep.f_labels) == Xt.shape[1]
def test_util(): df = load_watch() data = TS_Data(df['X'], df['side']) Xt, Xc = util.get_ts_data_parts(data) assert np.array_equal(Xc, df['side']) assert np.all([np.array_equal(Xt[i], df['X'][i]) for i in range(len(df['X']))]) util.check_ts_data(data, df['y']) util.check_ts_data(df['X'], df['y']) util.ts_stats(df['X'], df['y'], fs=1., class_labels=df['y_labels'])
def test_ts_data(): # time series data ts = np.array([np.random.rand(100,10),np.random.rand(200,10),np.random.rand(20,10)]) c = np.random.rand(3,10) data = TS_Data(ts, c) assert type(data[1]) == TS_Data # segmented time series data sts = np.random.rand(100,10,6) c = np.random.rand(100, 6) data = TS_Data(sts, c) assert type(data[4:10]) == TS_Data sts = np.random.rand(100,10) c = np.random.rand(100) data = TS_Data(sts, c) assert type(data[4:10]) == TS_Data
def test_temporal_split(): # test with length 1 series X = [rand(100, 10)] y = [5] Xtr, Xte, ytr, yte = temporal_split(X, y) check_split(X, Xtr, Xte, y, ytr, yte) X = [rand(100, 10)] y = [rand(100)] Xtr, Xte, ytr, yte = temporal_split(X, y) check_split(X, Xtr, Xte, y, ytr, yte) Xt = [rand(100, 10)] Xc = [5] X = TS_Data(Xt, Xc) y = [rand(100)] Xtr, Xte, ytr, yte = temporal_split(X, y) check_split(X, Xtr, Xte, y, ytr, yte) # test with lots of series Ns = 5 X = np.array([rand(100, 10)] * Ns) y = rand(Ns) Xtr, Xte, ytr, yte = temporal_split(X, y) check_split(X, Xtr, Xte, y, ytr, yte) X = np.array([rand(100, 10)] * Ns) y = np.array([rand(100)] * Ns) Xtr, Xte, ytr, yte = temporal_split(X, y) check_split(X, Xtr, Xte, y, ytr, yte) Xt = np.array([rand(100, 10)] * Ns) Xc = rand(Ns) X = TS_Data(Xt, Xc) y = np.arange(Ns) Xtr, Xte, ytr, yte = temporal_split(X, y) check_split(X, Xtr, Xte, y, ytr, yte)
def test_trle(): # Multivariate data Nt = 100 nvars = 5 X = [np.random.rand(Nt, nvars)] y = [np.concatenate([np.full(3, 1), np.full(26, 2), np.full(1, 3), np.full(70, 4)])] rle = TargetRunLengthEncoder(min_length=5) rle.fit(X) Xt, yt, _ = rle.transform(X, y) assert len(Xt) == len(yt) and len(yt) == 2 assert yt[0] == 2 and yt[1] == 4 assert len(Xt[0]) == 26 and len(Xt[1]) == 70 # Nothing excluded Nt = 100 nvars = 5 X = [np.random.rand(Nt, nvars)] y = [np.concatenate([np.full(50,1), np.full(50,2)])] rle = TargetRunLengthEncoder(min_length=5) rle.fit(X, y) Xt, yt, _ = rle.transform(X, y) assert len(Xt) == len(yt) and len(yt) == 2 assert np.all(np.concatenate(Xt) == X) assert yt[0] == 1 and yt[1] == 2 assert len(Xt[0]) == 50 and len(Xt[1]) == 50 # Univariate data with sample weight and context Nt = 100 Xts = [np.random.rand(Nt)] Xc = [5] X = TS_Data(Xts, Xc) y = [np.concatenate([np.full(3, 1), np.full(26, 2), np.full(1, 3), np.full(70, 4)])] sw = [1] rle = TargetRunLengthEncoder(min_length=5) rle.fit(X) Xt, yt, swt = rle.transform(X, y, sw) Xtc = Xt.context_data assert len(Xt) == len(yt) and len(swt) == len(yt) and len(yt) == 2 assert yt[0] == 2 and yt[1] == 4 assert len(Xt[0]) == 26 and len(Xt[1]) == 70 assert swt[0] == 1 and swt[1] == 1 assert Xtc[0] == 5 and Xtc[1] == 5
def test_pipe_forecast(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [np.random.rand(1000)] pipe = Pype([('seg', SegmentXYForecast()), ('ftr', FeatureRep()), ('ridge', Ridge())]) forecast_test(pipe, X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = TS_Data(Xt, Xc) y = [np.random.rand(1000)] forecast_test(pipe, X, y) X = pd.DataFrame(Xc) X['ts_data'] = Xt X = TS_Data.from_df(X) forecast_test(pipe, X, y) # multiple time seres Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] forecast_test(pipe, X, y) X = pd.DataFrame(Xc) X['ts_data'] = Xt X = TS_Data.from_df(X) forecast_test(pipe, X, y) # cross val Xt = np.array([np.random.rand(1000, 10)] * 5) Xc = np.random.rand(5, 3) X = TS_Data(Xt, Xc) y = np.array([np.random.rand(1000)] * 5) cross_validate(pipe, X, y, cv=3) X = pd.DataFrame(Xc) Xt = [np.random.rand(1000, 10)] * 5 X['ts_data'] = Xt X = TS_Data.from_df(X) cross_validate(pipe, X, y, cv=3)
def test_pipe_classification(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [5] pipe = Pype([('seg', SegmentX()), ('ftr', FeatureRep()), ('rf', RandomForestClassifier(n_estimators=10))]) classifier_test(pipe, X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = TS_Data(Xt, Xc) y = [5] classifier_test(pipe, X, y) X = pd.DataFrame(Xc) X['ts_data'] = Xt X = TS_Data.from_df(X) classifier_test(pipe, X, y) # multiple time series Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [1, 2, 3] classifier_test(pipe, X, y) X = pd.DataFrame(Xc) X['ts_data'] = Xt X = TS_Data.from_df(X) classifier_test(pipe, X, y) # univariate data Xt = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] Xc = np.random.rand(3) X = TS_Data(Xt, Xc) y = [1, 2, 3] classifier_test(pipe, X, y) X = pd.DataFrame(Xc) X['ts_data'] = Xt X = TS_Data.from_df(X) classifier_test(pipe, X, y)
def make_ts_data(time_series, context_vars=None): ''' Combines time series data and relational contextual variables into an ``TS_Data`` object compatible with ``SegPipe`` and related classes. If context_vars are none, a numpy array is returned. Parameters ---------- time_series : array-like, shape [n_series, ] Time series data - each element (series) may have a different length context_vars : array-like, shape [n_series, n_context_variables] contextual relational data Returns ------- X : array-like [n_series, ] ``TS_Data object containing time series and contextual data ''' if context_vars is not None: return TS_Data(time_series, context_vars) else: return np.array(time_series)
def test_pad_trunc(): Nt = 100 width = 5 nvars = 5 seg = transform.PadTrunc(width=width) # multivariate ts data without context data X = [ np.random.rand(Nt, nvars), np.random.rand(Nt, nvars), np.random.rand(Nt, nvars) ] y = [np.random.rand(Nt), np.random.rand(Nt), np.random.rand(Nt)] seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) N = len(ys) assert Xs.shape == (N, width, nvars) assert np.all([np.equal(X[i][0:width], Xs[i]) for i in range(len(X))]) assert np.all([np.equal(y[i][0:width], ys[i]) for i in range(len(y))]) # univariate ts data without context data X = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(3 * Nt)] y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(3 * Nt)] seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) N = len(ys) assert Xs.shape == (N, width) assert np.all([np.equal(X[i][0:width], Xs[i]) for i in range(len(X))]) assert np.all([np.equal(y[i][0:width], ys[i]) for i in range(len(y))]) # multivariate ts data with context data Xt = [ np.random.rand(Nt, nvars), np.random.rand(2 * Nt, nvars), np.random.rand(Nt, nvars) ] Xc = np.random.rand(3, 4) y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)] X = TS_Data(Xt, Xc) seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) Xst, Xsc = get_ts_data_parts(Xs) N = len(ys) assert Xst.shape == (N, width, nvars) assert Xsc.shape == (N, 4) assert np.all([np.equal(Xt[i][0:width], Xst[i]) for i in range(len(Xt))]) assert np.all([np.equal(Xc[i], Xsc[i]) for i in range(len(Xt))]) assert np.all([np.equal(y[i][0:width], ys[i]) for i in range(len(y))]) # ts data with univariate context data Xt = [ np.random.rand(Nt, nvars), np.random.rand(2 * Nt, nvars), np.random.rand(Nt, nvars) ] Xc = np.random.rand(3) y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)] X = TS_Data(Xt, Xc) seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) Xst, Xsc = get_ts_data_parts(Xs) N = len(ys) assert Xst.shape == (N, width, nvars) assert Xsc.shape == (N, ) assert np.all([np.equal(Xt[i][0:width], Xst[i]) for i in range(len(Xt))]) assert np.all([np.equal(Xc[i], Xsc[i]) for i in range(len(Xt))]) assert np.all([np.equal(y[i][0:width], ys[i]) for i in range(len(y))]) # same number as context vars and time vars # this would cause broadcasting failure before implementation of TS_Data class Xt = [ np.random.rand(Nt, nvars), np.random.rand(2 * Nt, nvars), np.random.rand(Nt, nvars) ] Xc = np.random.rand(3, nvars) y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)] X = TS_Data(Xt, Xc) seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) Xst, Xsc = get_ts_data_parts(Xs) N = len(ys) assert Xst.shape == (N, width, nvars) assert Xsc.shape == (N, 5) assert np.all([np.equal(Xt[i][0:width], Xst[i]) for i in range(len(Xt))]) assert np.all([np.equal(Xc[i], Xsc[i]) for i in range(len(Xt))]) assert np.all([np.equal(y[i][0:width], ys[i]) for i in range(len(y))]) width = 5 nvars = 5 seg = transform.PadTrunc(width=width) # multivariate ts data without context data X = [ np.random.rand(100, nvars), np.random.rand(100, nvars), np.random.rand(100, nvars) ] y = np.random.rand(3) seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) N = len(ys) assert Xs.shape == (N, width, nvars) assert np.all([np.equal(X[i][0:width], Xs[i]) for i in range(len(Xt))]) assert np.all([np.equal(y[i], ys[i]) for i in range(len(y))]) # univariate ts data without context X = [np.random.rand(100), np.random.rand(100), np.random.rand(100)] y = np.random.rand(3) seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) N = len(ys) assert Xs.shape == (N, width) assert np.all([np.equal(X[i][0:width], Xs[i]) for i in range(len(Xt))]) assert np.all([np.equal(y[i], ys[i]) for i in range(len(y))]) # multivariate ts data with context data Xt = [ np.random.rand(100, nvars), np.random.rand(200, nvars), np.random.rand(50, nvars) ] Xc = np.random.rand(3, 4) y = np.random.rand(3) X = TS_Data(Xt, Xc) seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) Xst, Xsc = get_ts_data_parts(Xs) N = len(ys) assert Xst.shape == (N, width, nvars) assert Xsc.shape == (N, 4) assert np.all([np.equal(Xt[i][0:width], Xst[i]) for i in range(len(Xt))]) assert np.all([np.equal(Xc[i], Xsc[i]) for i in range(len(Xt))]) assert np.all([np.equal(y[i], ys[i]) for i in range(len(y))])
def test_pipe_PadTrunc(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [5] pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()), ('rf', RandomForestClassifier(n_estimators=10))]) pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = TS_Data(Xt, Xc) y = [5] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # multiple time series Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [1, 2, 3] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # univariate data Xt = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] Xc = np.random.rand(3) X = TS_Data(Xt, Xc) y = [1, 2, 3] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # transform pipe pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [1, 2, 3] pipe.fit(X, y) pipe.transform(X, y) pipe.fit_transform(X, y)
def test_segmentxy(): # test illegal parameter settings with pytest.raises(ValueError): transform.SegmentXY(width=0) # illegal width value with pytest.raises(ValueError): transform.SegmentXY( overlap=None, step=None) # either overlap or step must be defined with pytest.raises(ValueError): transform.SegmentXY(overlap=-1, step=None) # illegal overlap value with pytest.raises(ValueError): transform.SegmentXY(step=0) # illegal step value with pytest.raises(ValueError): transform.SegmentXY(order=None) # illegal order # test _step property working as expected seg = transform.SegmentXY(width=10, overlap=0.5) assert seg._step == 5 # test precedence of step over overlap seg = transform.SegmentXY(width=10, overlap=1, step=1) assert seg._step == 1 # illegal overlap value, but valid step value seg = transform.SegmentXY(overlap=-1, step=1) assert seg._step == 1 # test shape of segmented data Nt = 100 width = 5 nvars = 5 seg = transform.SegmentXY(width=width) # multivariate ts data without context data X = [ np.random.rand(Nt, nvars), np.random.rand(Nt, nvars), np.random.rand(Nt, nvars) ] y = [np.random.rand(Nt), np.random.rand(Nt), np.random.rand(Nt)] seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) N = len(ys) assert Xs.shape == (N, width, nvars) # univariate ts data without context data X = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(3 * Nt)] y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(3 * Nt)] seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) N = len(ys) assert Xs.shape == (N, width) # multivariate ts data with context data Xt = [ np.random.rand(Nt, nvars), np.random.rand(2 * Nt, nvars), np.random.rand(Nt, nvars) ] Xc = np.random.rand(3, 4) y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)] X = TS_Data(Xt, Xc) seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) Xst, Xsc = get_ts_data_parts(Xs) N = len(ys) assert Xst.shape == (N, width, nvars) assert Xsc.shape == (N, 4) # ts data with univariate context data Xt = [ np.random.rand(Nt, nvars), np.random.rand(2 * Nt, nvars), np.random.rand(Nt, nvars) ] Xc = np.random.rand(3) y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)] X = TS_Data(Xt, Xc) seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) Xst, Xsc = get_ts_data_parts(Xs) N = len(ys) assert Xst.shape == (N, width, nvars) assert Xsc.shape == (N, 1) # same number as context vars and time vars # this would cause broadcasting failure before implementation of TS_Data class Xt = [ np.random.rand(Nt, nvars), np.random.rand(2 * Nt, nvars), np.random.rand(Nt, nvars) ] Xc = np.random.rand(3, nvars) y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)] X = TS_Data(Xt, Xc) seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) Xst, Xsc = get_ts_data_parts(Xs) N = len(ys) assert Xst.shape == (N, width, nvars) assert Xsc.shape == (N, 5)
def test_segmentxyforecast(): # test illegal parameter settings with pytest.raises(ValueError): transform.SegmentXYForecast(width=0) # illegal width value with pytest.raises(ValueError): transform.SegmentXYForecast( overlap=None, step=None) # either overlap or step must be defined with pytest.raises(ValueError): transform.SegmentXYForecast(overlap=-1, step=None) # illegal overlap value with pytest.raises(ValueError): transform.SegmentXYForecast(step=0) # illegal step value with pytest.raises(ValueError): transform.SegmentXYForecast(order=None) # illegal order with pytest.raises(ValueError): transform.SegmentXYForecast(forecast=0) # illegal forecast value # test _step property working as expected seg = transform.SegmentXYForecast(width=10, overlap=0.5) assert seg._step == 5 # test precedence of step over overlap seg = transform.SegmentXYForecast(width=10, overlap=1, step=1) assert seg._step == 1 # illegal overlap value, but valid step value seg = transform.SegmentXYForecast(overlap=-1, step=1) assert seg._step == 1 # test shape of segmented data Nt = 100 width = 5 nvars = 5 # lets do a forecast test seg = transform.SegmentXYForecast(width=width, forecast=5) Xt = [ np.random.rand(Nt, nvars), np.random.rand(2 * Nt, nvars), np.random.rand(Nt, nvars) ] Xc = np.random.rand(3, 4) y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)] X = TS_Data(Xt, Xc) seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) Xst, Xsc = get_ts_data_parts(Xs) N = len(ys) assert Xst.shape == (N, width, nvars) assert Xsc.shape == (N, 4) # univariate X nvars = 1 seg = transform.SegmentXYForecast(width=width, forecast=5) X = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)] y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)] seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) Xst, Xsc = get_ts_data_parts(Xs) N = len(ys) assert Xst.shape == (N, width)
import pandas as pd import matplotlib.pyplot as plt data = load_watch() y = data['y'] Xt = data['X'] fs = 50 # sampling frequency # create time series data object with no contextual variables check_ts_data(Xt) # create time series data object with 2 contextual variables Xs = np.column_stack([data['side'], data['subject']]) X = TS_Data(Xt, Xs) check_ts_data(X) # recover time series and contextual variables Xt = X.ts_data Xs = X.context_data # generate some statistics from the time series data results = ts_stats(X, y, fs=fs, class_labels=data['y_labels']) print("DATA STATS - AGGREGATED") print(results['total']) print("") print("DATA STATS - BY CLASS") print(pd.DataFrame(results['by_class'])) # plot an instance from the data set
def test_patch_sampler(): # test patch_sampler on a class without a fit_resample function class EmptyClass(object): pass with pytest.raises(TypeError): transform.patch_sampler(EmptyClass) # test patch_sampler on a mocked imbalanced-learn Sampler class unpatched_sampler = MockImblearnSampler() patched_sampler = transform.patch_sampler(MockImblearnSampler)( shuffle=True, random_state=0) assert str(patched_sampler.__class__) != str(unpatched_sampler.__class__) pickled_sampler = pickle.dumps(patched_sampler) unpickled_sampler = pickle.loads(pickled_sampler) assert str(patched_sampler.__class__) == str(unpickled_sampler.__class__) # test representation assert "mocked_param" in repr(patched_sampler) assert "random_state" in repr(patched_sampler) assert "shuffle" in repr(patched_sampler) # multivariate ts X = np.random.rand(100, 10, 4) y = np.ones(100) Xt, yt, _ = patched_sampler.transform(X, y) assert Xt is X assert yt is y Xt, yt, _ = patched_sampler.fit_transform(X, y) X, y = shuffle(mock_resample(X), mock_resample(y), random_state=0) assert np.array_equal(Xt, X) assert np.array_equal(yt, y) # ts with multivariate contextual data X = TS_Data(np.random.rand(100, 10, 4), np.random.rand(100, 3)) Xt_orig, _ = get_ts_data_parts(X) y = np.ones(100) Xt, yt, _ = patched_sampler.transform(X, y) assert Xt is X assert yt is y Xt, yt, _ = patched_sampler.fit_transform(X, y) Xtt, Xtc = get_ts_data_parts(Xt) Xt_orig, y = shuffle(mock_resample(Xt_orig), mock_resample(y), random_state=0) assert np.array_equal(Xtt, Xt_orig) assert np.array_equal(yt, y) # ts with univariate contextual data X = TS_Data(np.random.rand(100, 10, 4), np.random.rand(100)) Xt_orig, _ = get_ts_data_parts(X) y = np.ones(100) Xt, yt, _ = patched_sampler.transform(X, y) assert Xt is X assert yt is y Xt, yt, _ = patched_sampler.fit_transform(X, y) Xtt, Xtc = get_ts_data_parts(Xt) Xt_orig, y = shuffle(mock_resample(Xt_orig), mock_resample(y), random_state=0) assert np.array_equal(Xtt, Xt_orig) assert np.array_equal(yt, y) # univariate ts X = np.random.rand(100, 10) y = np.ones(100) Xt, yt, _ = patched_sampler.transform(X, y) assert Xt is X assert yt is y Xt, yt, _ = patched_sampler.fit_transform(X, y) X, y = shuffle(mock_resample(X), mock_resample(y), random_state=0) assert np.array_equal(Xt, X) assert np.array_equal(yt, y)
def transform(self, X, y, sample_weight=None): check_is_fitted(self, ['reference_windows_', 'num_new_ts_']) Xt, Xc = get_ts_data_parts(X) yt = y N = len(Xt) # Number of time series # preallocate new time series data Xt_trans = [None] * self.num_new_ts_ y_trans = [None] * self.num_new_ts_ Xc_trans = np.recarray(shape=(self.num_new_ts_, ), dtype=contextual_recarray_dtype) k = 0 pbar = tqdm(total=self.num_new_ts_, desc="Segment", disable=(not self._VERBOSE), file=sys.stdout) # get time series which should be segmented together for window, selection_idx in zip( self.reference_windows_, moving_window(range(N), window_size=self.n)): # get reference windows for segmentation for starting_timestamp, ending_timestamp in window: # segment each time series for idx in selection_idx: ts = Xt[idx] start_idx = find_nearest(ts[:, 0], starting_timestamp) if self.enforce_size and Xc is not None: stop_idx = start_idx + round2int( self.window_length * Xc[idx].sr) else: stop_idx = find_nearest(ts[:, 0], ending_timestamp) if stop_idx < start_idx: raise ValueError Xt_trans[k] = ts[start_idx:stop_idx, :] if Xc is not None: Xc_trans[k] = Xc[idx] if yt is not None and len(yt[0].shape) >= 2: start_idx = find_nearest(yt[idx][:, 0], starting_timestamp) if self.enforce_size: stop_idx = start_idx + round2int( self.window_length * get_sampling_rate( yt[idx][:, 0], t_unit=self._T_UNIT)) else: stop_idx = find_nearest(yt[idx][:, 0], ending_timestamp) if stop_idx < start_idx: raise ValueError y_trans[k] = yt[idx][start_idx:stop_idx, :] elif yt is not None: y_trans[k] = yt[idx][start_idx:stop_idx] pbar.update(1) k += 1 pbar.close() assert len([ 1 for ts, y in zip(Xt_trans, y_trans) if ts is None or y is None ]) == 0, "[CRITICAL] Missing segments." # --- find empty windows and delete them --- empty_windows = [] for i, (ts, y) in enumerate(zip(Xt_trans, y_trans)): if len(ts) <= 1 or len(y) <= 1: empty_windows.append(i) if len(empty_windows) > 0: Xt_trans = np.delete(Xt_trans, empty_windows) if Xc is not None: Xc_trans = np.delete(Xc_trans, empty_windows) if y is not None: y_trans = np.delete(y_trans, empty_windows) num_empty_windows = len(empty_windows) if num_empty_windows > 0: logger.warning( f"[{self.__class__.__name__}] {num_empty_windows} windows could not be processed " f"and thus removed") # --- finalize --- if Xc is not None: Xt = TS_Data(Xt_trans, Xc_trans) else: Xt = Xt_trans return Xt, y_trans, sample_weight
def test_watch(): df = load_watch() data = TS_Data(df['X'], df['side'])
('rf', RandomForestClassifier(n_estimators=20))]) # split the data X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25) clf.fit(X_train, y_train) score = clf.score(X_test, y_test) print("N series in train: ", len(X_train)) print("N series in test: ", len(X_test)) print("N segments in train: ", clf.N_train) print("N segments in test: ", clf.N_test) print("Accuracy score: ", score) # now lets add some contextual data Xc = np.column_stack((data['side'], data['subject'])) Xt = np.array(data['X']) X = TS_Data(Xt, Xc) y = np.array(data['y']) # and do a cross validation scoring = make_scorer(f1_score, average='macro') cv_scores = cross_validate(clf, X, y, cv=4, return_train_score=True) print("CV Scores: ", pd.DataFrame(cv_scores)) # lets see what feature we used print("Features: ", clf.steps[1][1].f_labels) img = mpimg.imread('feet.jpg') plt.imshow(img)
def test_function_transform(): constant = 10 identity = transform.FunctionTransformer() def replace(Xt, value): return np.ones(Xt.shape) * value custom = transform.FunctionTransformer(replace, func_kwargs={"value": constant}) # univariate ts X = np.random.rand(100, 10) y = np.ones(100) identity.fit(X, y) Xtrans = identity.transform(X) assert Xtrans is X custom.fit(X, y) Xtrans = custom.transform(X) assert np.array_equal(Xtrans, np.ones(X.shape) * constant) # multivariate ts X = np.random.rand(100, 10, 4) y = np.ones(100) identity.fit(X, y) Xtrans = identity.transform(X) assert Xtrans is X custom.fit(X, y) Xtrans = custom.transform(X) assert np.array_equal(Xtrans, np.ones(X.shape) * constant) # ts with univariate contextual data Xt = np.random.rand(100, 10, 4) Xc = np.random.rand(100) X = TS_Data(Xt, Xc) y = np.ones(100) identity.fit(X, y) Xtrans = identity.transform(X) assert Xtrans is X custom.fit(X, y) Xtrans = custom.transform(X) Xtt, Xtc = get_ts_data_parts(Xtrans) assert np.array_equal(Xtt, np.ones(Xt.shape) * constant) assert Xtc is Xc # ts with multivariate contextual data Xt = np.random.rand(100, 10, 4) Xc = np.random.rand(100, 3) X = TS_Data(Xt, Xc) y = np.ones(100) identity.fit(X, y) Xtrans = identity.transform(X) assert Xtrans is X custom.fit(X, y) Xtrans = custom.transform(X) Xtt, Xtc = get_ts_data_parts(Xtrans) assert np.array_equal(Xtt, np.ones(Xt.shape) * constant) assert Xtc is Xc # test resampling def resample(Xt): return Xt.reshape(1, -1) illegal_resampler = transform.FunctionTransformer(resample) X = np.random.rand(100, 10) y = np.ones(100) illegal_resampler.fit(X, y) with pytest.raises(ValueError): Xtrans = illegal_resampler.transform(X)
def test_segmentxy(): Nt = 100 width = 5 nvars = 5 seg = transform.SegmentXY(width=width) # multivariate ts data without context data X = [ np.random.rand(Nt, nvars), np.random.rand(Nt, nvars), np.random.rand(Nt, nvars) ] y = [np.random.rand(Nt), np.random.rand(Nt), np.random.rand(Nt)] seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) N = len(ys) assert Xs.shape == (N, width, nvars) # univariate ts data without context data X = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(3 * Nt)] y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(3 * Nt)] seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) N = len(ys) assert Xs.shape == (N, width) # multivariate ts data with context data Xt = [ np.random.rand(Nt, nvars), np.random.rand(2 * Nt, nvars), np.random.rand(Nt, nvars) ] Xc = np.random.rand(3, 4) y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)] X = TS_Data(Xt, Xc) seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) Xst, Xsc = get_ts_data_parts(Xs) N = len(ys) assert Xst.shape == (N, width, nvars) assert Xsc.shape == (N, 4) # ts data with univariate context data Xt = [ np.random.rand(Nt, nvars), np.random.rand(2 * Nt, nvars), np.random.rand(Nt, nvars) ] Xc = np.random.rand(3) y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)] X = TS_Data(Xt, Xc) seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) Xst, Xsc = get_ts_data_parts(Xs) N = len(ys) assert Xst.shape == (N, width, nvars) assert Xsc.shape == (N, 1) # same number as context vars and time vars # this would cause broadcasting failure before implementation of TS_Data class Xt = [ np.random.rand(Nt, nvars), np.random.rand(2 * Nt, nvars), np.random.rand(Nt, nvars) ] Xc = np.random.rand(3, nvars) y = [np.random.rand(Nt), np.random.rand(2 * Nt), np.random.rand(Nt)] X = TS_Data(Xt, Xc) seg.fit(X, y) Xs, ys, _ = seg.transform(X, y) Xst, Xsc = get_ts_data_parts(Xs) N = len(ys) assert Xst.shape == (N, width, nvars) assert Xsc.shape == (N, 5)
def test_pipe_regression(): # no context data, single time series X = [np.random.rand(1000, 10)] y = [np.random.rand(1000)] pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()), ('ridge', Ridge())]) pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # context data, single time seres Xt = [np.random.rand(1000, 10)] Xc = [np.random.rand(3)] X = TS_Data(Xt, Xc) y = [np.random.rand(1000)] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # multiple time seres Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] pipe.fit(X, y) pipe.transform_predict(X, y) pipe.predict(X) pipe.score(X, y) # cross val Xt = np.array([np.random.rand(1000, 10)] * 5) Xc = np.random.rand(5, 3) X = TS_Data(Xt, Xc) y = np.array([np.random.rand(1000)] * 5) cross_validate(pipe, X, y, cv=3) # transform pipe pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()), ('scaler', StandardScaler())]) Xt = [ np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10) ] Xc = np.random.rand(3, 3) X = TS_Data(Xt, Xc) y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)] pipe.fit(X, y) pipe.transform(X, y) pipe.fit_transform(X, y)
def test_watch(): df = load_watch() data = TS_Data(df['X'], df['side']) assert isinstance(data, TS_Data)