예제 #1
0
def test_pipe_transformation():
    # SegmentX transform pipe
    pipe = Pype([('seg', SegmentX()),
                 ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])
    Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [1, 2, 3]
    transformation_test(pipe, X, y)

    X = pd.DataFrame(Xc)
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    transformation_test(pipe, X, y)

    # SegmentXY transform pipe
    pipe = Pype([('seg', SegmentXY()),
                 ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])
    Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]
    transformation_test(pipe, X, y)

    X = pd.DataFrame(Xc)
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    transformation_test(pipe, X, y)

    # Forecast transform pipe
    pipe = Pype([('seg', SegmentXYForecast()),
                 ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])
    Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]
    transformation_test(pipe, X, y)

    X = pd.DataFrame(Xc)
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    transformation_test(pipe, X, y)

    # Padtrunc transform pipe
    pipe = Pype([('trunc', PadTrunc()),
                 ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])
    Xt = [np.random.rand(1000, 10), np.random.rand(100, 10), np.random.rand(500, 10)]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [1, 2, 3]
    transformation_test(pipe, X, y)

    X = pd.DataFrame(Xc)
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    transformation_test(pipe, X, y)
예제 #2
0
def test_pipe_regression():
    # no context data, single time series
    X = [np.random.rand(1000, 10)]
    y = [np.random.rand(1000)]
    pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()),
                 ('ridge', Ridge())])
    regression_test(pipe, X, y)

    # context data, single time seres
    Xt = [np.random.rand(1000, 10)]
    Xc = [np.random.rand(3)]
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000)]
    regression_test(pipe, X, y)

    # multiple time seres
    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]
    regression_test(pipe, X, y)

    # cross val
    Xt = np.array([np.random.rand(1000, 10)] * 5)
    Xc = np.random.rand(5, 3)
    X = TS_Data(Xt, Xc)
    y = np.array([np.random.rand(1000)] * 5)
    cross_validate(pipe, X, y, cv=3)
예제 #3
0
def test_pipe_PadTrunc():
    # no context data, single time series
    X = [np.random.rand(1000, 10)]
    y = [5]
    pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()),
                 ('rf', RandomForestClassifier(n_estimators=10))])
    classifier_test(pipe, X, y)

    # context data, single time seres
    Xt = [np.random.rand(1000, 10)]
    Xc = [np.random.rand(3)]
    X = TS_Data(Xt, Xc)
    y = [5]
    classifier_test(pipe, X, y)

    # multiple time series
    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [1, 2, 3]
    classifier_test(pipe, X, y)

    # univariate data
    Xt = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]
    Xc = np.random.rand(3)
    X = TS_Data(Xt, Xc)
    y = [1, 2, 3]
    classifier_test(pipe, X, y)
예제 #4
0
def test_pipe_forecast():
    # no context data, single time series
    X = [np.random.rand(1000, 10)]
    y = [np.random.rand(1000)]

    pipe = Pype([('seg', SegmentXYForecast()), ('ftr', FeatureRep()),
                 ('ridge', Ridge())])

    forecast_test(pipe, X, y)

    # context data, single time seres
    Xt = [np.random.rand(1000, 10)]
    Xc = [np.random.rand(3)]
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000)]

    forecast_test(pipe, X, y)

    X = pd.DataFrame(Xc)
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    forecast_test(pipe, X, y)

    # multiple time seres
    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]

    forecast_test(pipe, X, y)

    X = pd.DataFrame(Xc)
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    forecast_test(pipe, X, y)

    # cross val

    Xt = np.array([np.random.rand(1000, 10)] * 5)
    Xc = np.random.rand(5, 3)
    X = TS_Data(Xt, Xc)
    y = np.array([np.random.rand(1000)] * 5)

    cross_validate(pipe, X, y, cv=3)

    X = pd.DataFrame(Xc)
    Xt = [np.random.rand(1000, 10)] * 5
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    cross_validate(pipe, X, y, cv=3)
예제 #5
0
def test_pipe_classification():
    # no context data, single time series
    X = [np.random.rand(1000, 10)]
    y = [5]

    pipe = Pype([('seg', SegmentX()), ('ftr', FeatureRep()),
                 ('rf', RandomForestClassifier(n_estimators=10))])

    classifier_test(pipe, X, y)

    # context data, single time seres
    Xt = [np.random.rand(1000, 10)]
    Xc = [np.random.rand(3)]
    X = TS_Data(Xt, Xc)
    y = [5]
    classifier_test(pipe, X, y)

    X = pd.DataFrame(Xc)
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    classifier_test(pipe, X, y)

    # multiple time series
    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [1, 2, 3]
    classifier_test(pipe, X, y)

    X = pd.DataFrame(Xc)
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    classifier_test(pipe, X, y)

    # univariate data
    Xt = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]
    Xc = np.random.rand(3)
    X = TS_Data(Xt, Xc)
    y = [1, 2, 3]
    classifier_test(pipe, X, y)

    X = pd.DataFrame(Xc)
    X['ts_data'] = Xt
    X = TS_Data.from_df(X)
    classifier_test(pipe, X, y)
예제 #6
0
from seglearn.split import temporal_split, TemporalKFold
from seglearn.transform import FeatureRep, Segment, last

# for a single time series, we need to make it a list
X = [np.arange(10000) / 100.]
y = [np.sin(X[0]) * X[0] * 3 + X[0] * X[0]]

# split the data along the time axis (our only option since we have only 1 time series)
X_train, X_test, y_train, y_test = temporal_split(X, y)

# setting y_func = last, selects the last value from each y segment as the target
# other options include transform.middle, or you can make your own function
# see the API documentation for further details

pipe = Pype([('seg', Segment(width=200, overlap=0.5, y_func=last)),
             ('features', FeatureRep()), ('lin', LinearRegression())])

# fit and score
pipe.fit(X_train, y_train)
score = pipe.score(X_test, y_test)

print("N series in train: ", len(X_train))
print("N series in test: ", len(X_test))
print("N segments in train: ", pipe.N_train)
print("N segments in test: ", pipe.N_test)
print("Score: ", score)

# generate some predictions
ytr, ytr_p = pipe.transform_predict(X_train, y_train)  # training predictions
yte, yte_p = pipe.transform_predict(X_test, y_test)  # test predictions
xtr = np.arange(len(ytr))  # segment number
예제 #7
0
        print("Fitting X (flattened):", X.flatten(), "on y:", y)
        return super(VerboseDummyClassifier, self).fit(X, y, sample_weight)

    def predict(self, X):
        print("Predicting X (flattened):", X.flatten())
        return super(VerboseDummyClassifier, self).predict(X)

    def score(self, X, y, sample_weight=None):
        print("Scoring X (flattened):", X.flatten(), "on y:", y)
        return super(VerboseDummyClassifier, self).score(X, y, sample_weight)


pipe = Pype([
    ('segment', Segment(width=1, overlap=0)),
    ('resample', patch_sampler(RandomUnderSampler)(shuffle=True)),
    ('feature', FeatureRep(features={"min": minimum})),
    ('estimator', VerboseDummyClassifier(strategy="constant", constant=True)),
])
print("Pipeline:", pipe)

print("Split the data into half training and half test data:")
X_train, X_test, y_train, y_test = temporal_split(X, y, 0.5)
print("X_train:", X_train)
print("y_train:", y_train)
print("X_test:", X_test)
print("y_test:", y_test)
print()

print("Fit on the training data (this includes resampling):")
pipe.fit(X_train, y_train)
print()
예제 #8
0
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()


##############################################
# SETUP
##############################################

# load the data
data = load_watch()
X = data['X']
y = data['y']

# create a feature representation pipeline
steps = [('seg', Segment()), ('features', FeatureRep()),
         ('scaler', StandardScaler()),
         ('rf', RandomForestClassifier(n_estimators=20))]

pipe = Pype(steps)

# split the data
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    random_state=42)

##############################################
# OPTION 1: Use the score SegPipe score method
##############################################
예제 #9
0
t = np.arange(5000) / 100.
y = np.sin(t) * t * 2.5 + t * t

# with forecasting, X can include the target
X = np.stack([t, y], axis=1)

# remember for a single time series, we need to make a list
X = [X]
y = [y]

# split the data along the time axis (our only option since we have only 1 time series)
X_train, X_test, y_train, y_test = temporal_split(X, y, test_size=0.25)

# create a feature representation pipeline
est = Pipeline([('features', FeatureRep()), ('lin', LinearRegression())])

# setting y_func = last, and forecast = 200 makes us predict the value of y
# 200 samples ahead of the segment
# other reasonable options for y_func are ``mean``, ``all`` (or create your own function)
# see the API documentation for further details
segmenter = SegmentXYForecast(width=200,
                              overlap=0.5,
                              y_func=last,
                              forecast=200)
pipe = SegPipe(est, segmenter)

# fit and score
pipe.fit(X_train, y_train)
score = pipe.score(X_test, y_test)
예제 #10
0
X = [X]
y = [y]

# split the data along the time axis (our only option since we have only 1 time series)
X_train, X_test, y_train, y_test = temporal_split(X, y, test_size=0.25)

# create a feature representation pipeline
# setting y_func = last, and forecast = 200 makes us predict the value of y
# 200 samples ahead of the segment
# other reasonable options for y_func are ``mean``, ``all`` (or create your own function)
# see the API documentation for further details
clf = Pype([('segment',
             SegmentXYForecast(width=200,
                               overlap=0.5,
                               y_func=last,
                               forecast=200)), ('features', FeatureRep()),
            ('lin', LinearRegression())])

# fit and score
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)

print("N series in train: ", len(X_train))
print("N series in test: ", len(X_test))
print("N segments in train: ", clf.N_train)
print("N segments in test: ", clf.N_test)
print("Score: ", score)

# generate some predictions
y, y_p = clf.transform_predict(X, y)  # all predictions
ytr, ytr_p = clf.transform_predict(X_train, y_train)  # training predictions
예제 #11
0
from seglearn.base import TS_Data
from seglearn.datasets import load_watch
from seglearn.pipe import Pype
from seglearn.transform import FeatureRep, SegmentX

# seed RNGESUS
np.random.seed(123124)

# load the data
data = load_watch()
X = data['X']
y = data['y']

# create a feature representation pipeline
clf = Pype([('segment', SegmentX()), ('features', FeatureRep()),
            ('scaler', StandardScaler()),
            ('rf', RandomForestClassifier(n_estimators=20))])

# split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)

print("N series in train: ", len(X_train))
print("N series in test: ", len(X_test))
print("N segments in train: ", clf.N_train)
print("N segments in test: ", clf.N_test)
print("Accuracy score: ", score)
예제 #12
0
def test_pipe_regression():
    # no context data, single time series
    X = [np.random.rand(1000, 10)]
    y = [np.random.rand(1000)]
    est = Pipeline([('ftr', FeatureRep()), ('ridge', Ridge())])

    pipe = SegPipe(est, segmenter=SegmentXY())

    pipe.fit(X, y)
    pipe.predict(X, y)
    pipe.score(X, y)

    # context data, single time seres
    Xt = [np.random.rand(1000, 10)]
    Xc = [np.random.rand(3)]
    X = make_ts_data(Xt, Xc)
    y = [np.random.rand(1000)]

    pipe.fit(X, y)
    pipe.predict(X, y)
    pipe.score(X, y)

    # multiple time seres
    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = make_ts_data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]

    pipe.fit(X, y)
    pipe.predict(X, y)
    pipe.score(X, y)

    # cross val
    Xt = np.array([np.random.rand(1000, 10) for i in range(5)])
    Xc = np.random.rand(5, 3)
    X = make_ts_data(Xt, Xc)
    y = np.array([np.random.rand(1000) for i in range(5)])

    cross_validate(pipe, X, y)

    # transform pipe
    est = Pipeline([('ftr', FeatureRep()), ('scaler', StandardScaler())])

    pipe = SegPipe(est, segmenter=SegmentXY())

    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = make_ts_data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]

    pipe.fit(X, y)
    pipe.transform(X, y)
    pipe.fit_transform(X, y)
예제 #13
0
def test_pipe_classification():
    # no context data, single time series
    X = [np.random.rand(1000, 10)]
    y = [5]
    est = Pipeline([('ftr', FeatureRep()),
                    ('ridge', RandomForestClassifier())])

    pipe = SegPipe(est, segmenter=SegmentX())

    pipe.fit(X, y)
    pipe.predict(X, y)
    pipe.score(X, y)

    # context data, single time seres
    Xt = [np.random.rand(1000, 10)]
    Xc = [np.random.rand(3)]
    X = make_ts_data(Xt, Xc)
    y = [5]

    pipe.fit(X, y)
    pipe.predict(X, y)
    pipe.score(X, y)

    # multiple time series
    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = make_ts_data(Xt, Xc)
    y = [1, 2, 3]

    pipe.fit(X, y)
    pipe.predict(X, y)
    pipe.score(X, y)

    # univariate data
    Xt = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]
    Xc = np.random.rand(3)
    X = make_ts_data(Xt, Xc)
    y = [1, 2, 3]

    pipe.fit(X, y)
    pipe.predict(X, y)
    pipe.score(X, y)

    # transform pipe
    est = Pipeline([('ftr', FeatureRep()), ('scaler', StandardScaler())])

    pipe = SegPipe(est, segmenter=SegmentX())

    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = make_ts_data(Xt, Xc)
    y = [1, 2, 3]

    pipe.fit(X, y)
    pipe.transform(X, y)
    pipe.fit_transform(X, y)
예제 #14
0
def test_pipe_regression():
    # no context data, single time series
    X = [np.random.rand(1000, 10)]
    y = [np.random.rand(1000)]

    pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()),
                 ('ridge', Ridge())])

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # context data, single time seres
    Xt = [np.random.rand(1000, 10)]
    Xc = [np.random.rand(3)]
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000)]

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # multiple time seres
    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # cross val
    Xt = np.array([np.random.rand(1000, 10)] * 5)
    Xc = np.random.rand(5, 3)
    X = TS_Data(Xt, Xc)
    y = np.array([np.random.rand(1000)] * 5)

    cross_validate(pipe, X, y, cv=3)

    # transform pipe
    pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])

    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]

    pipe.fit(X, y)
    pipe.transform(X, y)
    pipe.fit_transform(X, y)
예제 #15
0
def test_pipe_PadTrunc():
    # no context data, single time series
    X = [np.random.rand(1000, 10)]
    y = [5]

    pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()),
                 ('rf', RandomForestClassifier(n_estimators=10))])

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # context data, single time seres
    Xt = [np.random.rand(1000, 10)]
    Xc = [np.random.rand(3)]
    X = TS_Data(Xt, Xc)
    y = [5]

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # multiple time series
    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [1, 2, 3]

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # univariate data
    Xt = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]
    Xc = np.random.rand(3)
    X = TS_Data(Xt, Xc)
    y = [1, 2, 3]

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # transform pipe
    pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])

    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [1, 2, 3]

    pipe.fit(X, y)
    pipe.transform(X, y)
    pipe.fit_transform(X, y)
from seglearn.feature_functions import minimum, maximum
from seglearn.base import TS_Data

import numpy as np
import pandas as pd

# Single multivariate time series with 3 samples of 4 variables
X = [np.array([[0, 1, 2, 3], [4, 5, 6, 7], [8, 9, 10, 11]])]
# Time series target
y = [np.array([True, False, False])]

segment = SegmentXY(width=3, overlap=1)
X, y, _ = segment.fit_transform(X, y)

print('After segmentation:')
print("X:", X)
print("y: ", y)

union = FeatureRepMix([
    ('a', FeatureRep(features={'min': minimum}), 0),
    ('b', FeatureRep(features={'min': minimum}), 1),
    ('c', FeatureRep(features={'min': minimum}), [2, 3]),
    ('d', FeatureRep(features={'max': maximum}), slice(0, 2)),
    ('e', FeatureRep(features={'max': maximum}), [False, False, True, True]),
])

X = union.fit_transform(X, y)
print('After column-wise feature extraction:')
df = pd.DataFrame(data=X, columns=union.f_labels)
print(df)
예제 #17
0
    plt.xlabel('Predicted label')
    plt.tight_layout()


##############################################
# SETUP
##############################################

# load the data
data = load_watch()
X = data['X']
y = data['y']

# create a feature representation pipeline
steps = [('seg', SegmentX()),
         ('features', FeatureRep()),
         ('scaler', StandardScaler()),
         ('rf', RandomForestClassifier(n_estimators=20))]

pipe = Pype(steps)

# split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

##############################################
# OPTION 1: Use the score SegPipe score method
##############################################

pipe.fit(X_train, y_train)
score = pipe.score(X_test, y_test)
print("Accuracy score: ", score)
예제 #18
0
from sklearn.svm import LinearSVC

from seglearn.datasets import load_watch
from seglearn.pipe import Pype
from seglearn.transform import FeatureRep, PadTrunc

# load the data
data = load_watch()
X = data['X']
y = data['y']

# create a feature representation pipeline with PadTrunc segmentation
# the time series are between 20-40 seconds
# this truncates them all to the first 5 seconds (sampling rate is 50 Hz)

pipe = Pype([('trunc', PadTrunc(width=250)), ('features', FeatureRep()),
             ('scaler', StandardScaler()), ('svc', LinearSVC())])

# split the data
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.25,
                                                    shuffle=True,
                                                    random_state=42)

pipe.fit(X_train, y_train)
score = pipe.score(X_test, y_test)

print("N series in train: ", len(X_train))
print("N series in test: ", len(X_test))
print("N segments in train: ", pipe.N_train)
예제 #19
0
# seed RNGESUS
np.random.seed(123124)

# load the data
data = load_watch()

X = data['X']
y = data['y']

# I am adding in a column to represent time (50 Hz sampling), since my data doesn't include it
# the Interp class assumes time is the first column in the series
X = np.array([np.column_stack([np.arange(len(X[i])) / 50., X[i]]) for i in np.arange(len(X))])

clf = Pype([('interp', Interp(1. / 25., categorical_target=True)),
            ('segment', Segment(width=100)),
            ('features', FeatureRep()),
            ('scaler', StandardScaler()),
            ('rf', RandomForestClassifier(n_estimators=20))])

# split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)

print("N series in train: ", len(X_train))
print("N series in test: ", len(X_test))
print("N segments in train: ", clf.N_train)
print("N segments in test: ", clf.N_test)
print("Accuracy score: ", score)
예제 #20
0
])

# create the label vector and the corresponding semantic vector
y = np.array([0, 1, 2, 3, 4, 5, 6, 7])
labels = [
    'LB_BC', 'RB_BC', 'LF_BC', 'RF_BC', 'LB_BP', 'RB_BP', 'LF_BP', 'RF_BP'
]

# segment the data and labels
segmenter = SegmentX(100, 0.5)
X_new, y_new, _ = segmenter.fit_transform(X, y)

###################################################################################################

# create a pipeline for LDA transformation of the feature representation
est = Pipeline([('features', FeatureRep()),
                ('lda', LinearDiscriminantAnalysis(n_components=2))])
pipe = SegPipe(est)

# plot embedding
X2, y2 = pipe.fit_transform(X_new, y_new)
plot_embedding(X2, y2.astype(int), labels)
plt.show()

###################################################################################################

# create a pipeline for feature representation
est = Pipeline([('features', FeatureRep()), ('scaler', StandardScaler()),
                ('rf', RandomForestClassifier())])
pipe = SegPipe(est)
예제 #21
0
# load the data
data = load_watch()

X = data['X']
y = data['y']

# I am adding in a column to represent time (50 Hz sampling), since my data doesn't include it
# the Interp class assumes time is the first column in the series
X = np.array([
    np.column_stack([np.arange(len(X[i])) / 50., X[i]])
    for i in np.arange(len(X))
])

clf = Pype([('interp', Interp(1. / 25., categorical_target=True)),
            ('segment', SegmentX(width=100)), ('features', FeatureRep()),
            ('scaler', StandardScaler()),
            ('rf', RandomForestClassifier(n_estimators=20))])

# split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)

print("N series in train: ", len(X_train))
print("N series in test: ", len(X_test))
print("N segments in train: ", clf.N_train)
print("N segments in test: ", clf.N_test)
print("Accuracy score: ", score)