def demoDataset():
    X, y = make_blobs(n_features=10,
                      n_samples=20000,
                      centers=3,
                      shuffle=False,
                      random_state=1)
    scaler = StandardScaler()
    ts = scaler.fit_transform(X)
    width = 12
    ts = [ts]
    segment = SegmentXY(width=width, overlap=0)  #, y_func='middle'
    X, y, _ = segment.fit_transform(ts, [y])  #,[y.reshape([-1,1])]
    X = X.reshape(X.shape[0], -1)
    X = Variable(torch.from_numpy(X).float())
    return X, y
Exemple #2
0
def test_pipe_regression():
    # no context data, single time series
    X = [np.random.rand(1000, 10)]
    y = [np.random.rand(1000)]
    pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()),
                 ('ridge', Ridge())])
    regression_test(pipe, X, y)

    # context data, single time seres
    Xt = [np.random.rand(1000, 10)]
    Xc = [np.random.rand(3)]
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000)]
    regression_test(pipe, X, y)

    # multiple time seres
    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]
    regression_test(pipe, X, y)

    # cross val
    Xt = np.array([np.random.rand(1000, 10)] * 5)
    Xc = np.random.rand(5, 3)
    X = TS_Data(Xt, Xc)
    y = np.array([np.random.rand(1000)] * 5)
    cross_validate(pipe, X, y, cv=3)
Exemple #3
0
def test_pipe_transformation():
    # SegmentX transform pipe
    pipe = Pype([('seg', SegmentX()),
                 ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])
    Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [1, 2, 3]
    transformation_test(pipe, X, y)

    X = pd.DataFrame(Xc)
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    transformation_test(pipe, X, y)

    # SegmentXY transform pipe
    pipe = Pype([('seg', SegmentXY()),
                 ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])
    Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]
    transformation_test(pipe, X, y)

    X = pd.DataFrame(Xc)
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    transformation_test(pipe, X, y)

    # Forecast transform pipe
    pipe = Pype([('seg', SegmentXYForecast()),
                 ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])
    Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]
    transformation_test(pipe, X, y)

    X = pd.DataFrame(Xc)
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    transformation_test(pipe, X, y)

    # Padtrunc transform pipe
    pipe = Pype([('trunc', PadTrunc()),
                 ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])
    Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [1, 2, 3]
    transformation_test(pipe, X, y)

    X = pd.DataFrame(Xc)
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    transformation_test(pipe, X, y)
 # for investigation
 # create a feature representation pipeline
 chosenwidth = 6
 fts = {'mean': mean, 'var': var, 'std': std, 'skew': skew, 'mnx': mean_crossings, 'minimum':minimum, 'maximum':maximum, \
        'mean_diff':mean_diff}
 scorer1 = make_scorer(f1_score, average='macro')
 C_chosen = 77.42  #12.915#77.42#grid1.best_params_["C"]
 gamma_chosen = 0.05994  #grid1.best_params_["gamma"]
 n_estimators = 20
 #clf = Pype([('segment', SegmentXY(width=chosenwidth,step=1)), # in this context what is the difference with SegmentX?
 #        ('features', FeatureRep(fts)),
 #        ('scaler', StandardScaler()),
 #        ('rf', RandomForestClassifier(n_estimators=20))], scorer=scorer1)
 clf = Pype([
     ('segment', SegmentXY(
         width=chosenwidth,
         step=1)),  # in this context what is the difference with SegmentX?
     ('features', FunctionTransformer(reshape_all)),
     ('scaler', StandardScaler()),
     ('bagg',
      OneVsRestClassifier(
          BaggingClassifier(SVC(kernel='rbf',
                                gamma=gamma_chosen,
                                C=C_chosen,
                                probability=True,
                                class_weight='balanced'),
                            max_samples=1.0 / n_estimators,
                            warm_start=True,
                            n_estimators=n_estimators,
                            n_jobs=6,
                            verbose=10)))
# Author: Matthias Gazzari
# License: BSD

from seglearn.transform import SegmentXY, FeatureRep, FeatureRepMix
from seglearn.feature_functions import minimum, maximum
from seglearn.base import TS_Data

import numpy as np
import pandas as pd

# Single multivariate time series with 3 samples of 4 variables
X = [np.array([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]])]
# Time series target
y = [np.array([True, False, False])]

segment = SegmentXY(width=3, overlap=1)
X, y, _ = segment.fit_transform(X, y)

print('After segmentation:')
print("X:", X)
print("y: ", y)

union = FeatureRepMix([
    ('a', FeatureRep(features={'min': minimum}), 0),
    ('b', FeatureRep(features={'min': minimum}), 1),
    ('c', FeatureRep(features={'min': minimum}), [2, 3]),
    ('d', FeatureRep(features={'max': maximum}), slice(0, 2)),
    ('e', FeatureRep(features={'max': maximum}), [False, False, True, True]),
])

X = union.fit_transform(X, y)
# Single univariate time series with 10 samples
X = [
    np.array([[0, 1], [1, 2], [2, 3], [3, 4], [4, 5], [5, 6], [6, 7], [7, 8],
              [8, 9], [9, 10]])
]
# Time series target (imbalanced towards False)
y = [
    np.array(
        [True, False, False, False, False, False, True, False, False, False])
]

print("Implementation details: transform and fit_transform methods:")

pipe = Pype([
    ('segment', SegmentXY(width=1, overlap=0)),
    ('resample', patch_sampler(RandomUnderSampler)()),
])
print("Pipeline:", pipe)

print("Calling a transform on the data does not change it ...")
Xf, yf = pipe.transform(X, y)
print("X (flattened):", Xf.flatten())
print("y", yf)

print("... but calling fit_transform resamples the data.")
Xf, yf = pipe.fit_transform(X, y)
print("X (flattened):", Xf.flatten())
print("y", yf)

print()
Exemple #7
0
def test_pipe_regression():
    # no context data, single time series
    X = [np.random.rand(1000, 10)]
    y = [np.random.rand(1000)]
    est = Pipeline([('ftr', FeatureRep()), ('ridge', Ridge())])

    pipe = SegPipe(est, segmenter=SegmentXY())

    pipe.fit(X, y)
    pipe.predict(X, y)
    pipe.score(X, y)

    # context data, single time seres
    Xt = [np.random.rand(1000, 10)]
    Xc = [np.random.rand(3)]
    X = make_ts_data(Xt, Xc)
    y = [np.random.rand(1000)]

    pipe.fit(X, y)
    pipe.predict(X, y)
    pipe.score(X, y)

    # multiple time seres
    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = make_ts_data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]

    pipe.fit(X, y)
    pipe.predict(X, y)
    pipe.score(X, y)

    # cross val
    Xt = np.array([np.random.rand(1000, 10) for i in range(5)])
    Xc = np.random.rand(5, 3)
    X = make_ts_data(Xt, Xc)
    y = np.array([np.random.rand(1000) for i in range(5)])

    cross_validate(pipe, X, y)

    # transform pipe
    est = Pipeline([('ftr', FeatureRep()), ('scaler', StandardScaler())])

    pipe = SegPipe(est, segmenter=SegmentXY())

    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = make_ts_data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]

    pipe.fit(X, y)
    pipe.transform(X, y)
    pipe.fit_transform(X, y)
Exemple #8
0
def test_pipe_regression():
    # no context data, single time series
    X = [np.random.rand(1000, 10)]
    y = [np.random.rand(1000)]

    pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()),
                 ('ridge', Ridge())])

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # context data, single time seres
    Xt = [np.random.rand(1000, 10)]
    Xc = [np.random.rand(3)]
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000)]

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # multiple time seres
    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # cross val
    Xt = np.array([np.random.rand(1000, 10)] * 5)
    Xc = np.random.rand(5, 3)
    X = TS_Data(Xt, Xc)
    y = np.array([np.random.rand(1000)] * 5)

    cross_validate(pipe, X, y, cv=3)

    # transform pipe
    pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])

    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]

    pipe.fit(X, y)
    pipe.transform(X, y)
    pipe.fit_transform(X, y)
from seglearn.split import temporal_split, TemporalKFold
from seglearn.transform import FeatureRep, SegmentXY, last

# for a single time series, we need to make it a list
X = [np.arange(10000) / 100.]
y = [np.sin(X[0]) * X[0] * 3 + X[0] * X[0]]

# split the data along the time axis (our only option since we have only 1 time series)
X_train, X_test, y_train, y_test = temporal_split(X, y)

# SegmentXY segments both X and y (as the name implies)
# setting y_func = last, selects the last value from each y segment as the target
# other options include transform.middle, or you can make your own function
# see the API documentation for further details

pipe = Pype([('seg', SegmentXY(width=200, overlap=0.5, y_func=last)),
             ('features', FeatureRep()), ('lin', LinearRegression())])

# fit and score
pipe.fit(X_train, y_train)
score = pipe.score(X_test, y_test)

print("N series in train: ", len(X_train))
print("N series in test: ", len(X_test))
print("N segments in train: ", pipe.N_train)
print("N segments in test: ", pipe.N_test)
print("Score: ", score)

# generate some predictions
ytr, ytr_p = pipe.transform_predict(X_train, y_train)  # training predictions
yte, yte_p = pipe.transform_predict(X_test, y_test)  # test predictions
Exemple #10
0
# for a single time series, we need to make it a list
X = [np.arange(10000) / 100.]
y = [np.sin(X[0]) * X[0] * 3 + X[0] * X[0]]

# split the data along the time axis (our only option since we have only 1 time series)
X_train, X_test, y_train, y_test = temporal_split(X, y)

# create a feature representation pipeline
est = Pipeline([('features', FeatureRep()), ('lin', LinearRegression())])

# SegmentXY segments both X and y (as the name implies)
# setting y_func = last, selects the last value from each y segment as the target
# other options include transform.middle, or you can make your own function
# see the API documentation for further details
segmenter = SegmentXY(width=200, overlap=0.5, y_func=last)
pipe = SegPipe(est, segmenter)

# fit and score
pipe.fit(X_train, y_train)
score = pipe.score(X_test, y_test)

print("N series in train: ", len(X_train))
print("N series in test: ", len(X_test))
print("N segments in train: ", pipe.N_train)
print("N segments in test: ", pipe.N_test)
print("Score: ", score)

# generate some predictions
ytr, ytr_p = pipe.predict(X_train, y_train)  # training predictions
yte, yte_p = pipe.predict(X_test, y_test)  # test predictions