Ejemplo n.º 1
0
                               y_func=last,
                               forecast=200)), ('features', FeatureRep()),
            ('lin', LinearRegression())])

# fit and score
clf.fit(X_train, y_train)
score = clf.score(X_test, y_test)

print("N series in train: ", len(X_train))
print("N series in test: ", len(X_test))
print("N segments in train: ", clf.N_train)
print("N segments in test: ", clf.N_test)
print("Score: ", score)

# generate some predictions
y, y_p = clf.transform_predict(X, y)  # all predictions
ytr, ytr_p = clf.transform_predict(X_train, y_train)  # training predictions
yte, yte_p = clf.transform_predict(X_test, y_test)  # test predictions

# note - the first few segments in the test set won't have predictions (gap)
# we plot the 'gap' for the visualization to hopefully make the situation clear
Ns = len(y)
ts = np.arange(Ns)  # segment number
ttr = ts[0:len(ytr)]
tte = ts[(Ns - len(yte)):Ns]
tga = ts[len(ytr):(Ns - len(yte))]
yga = y[len(ytr):(Ns - len(yte))]

# plot the results
plt.plot(ttr, ytr, '.', label="training")
plt.plot(tga, yga, '.', label="gap")
Ejemplo n.º 2
0
pipe = Pype([('seg', Segment(width=200, overlap=0.5, y_func=last)),
             ('features', FeatureRep()), ('lin', LinearRegression())])

# fit and score
pipe.fit(X_train, y_train)
score = pipe.score(X_test, y_test)

print("N series in train: ", len(X_train))
print("N series in test: ", len(X_test))
print("N segments in train: ", pipe.N_train)
print("N segments in test: ", pipe.N_test)
print("Score: ", score)

# generate some predictions
ytr, ytr_p = pipe.transform_predict(X_train, y_train)  # training predictions
yte, yte_p = pipe.transform_predict(X_test, y_test)  # test predictions
xtr = np.arange(len(ytr))  # segment number
xte = np.arange(len(yte)) + len(xtr)

# plot the amazing results
plt.plot(xtr, ytr, '.', label="training")
plt.plot(xte, yte, '.', label="actual")
plt.plot(xte, yte_p, label="predicted")
plt.xlabel("Segment Number")
plt.ylabel("Target")
plt.legend()
plt.show()

# # now try a cross validation
# X = [np.arange(4000) / 100.]
                               verbose=10)))
    ])
    #scorer=scorer1
    X_train, X_test, y_train, y_test, matlab_train, matlab_test = train_test_split(
        new_features_seg_included,
        new_labels_seg_included,
        new_matlab_seg_included,
        test_size=0.10,
        random_state=10)

    clf.fit(X_train, y_train)
    #clf1.fit(X_train, y_train)

    score = clf.score(X_test, y_test)
    predict = clf.predict(X_test)
    test1, test2 = clf.transform_predict(X_test, y_test)
    train1, train2 = clf.transform_predict(X_train, y_train)
    test21 = inverse_transform(test2, y_test, chosenwidth, 1, order='F')
    test11 = inverse_transform(test1, y_test, chosenwidth, 1, order='F')
    train11 = inverse_transform(train1, y_train, chosenwidth, 1, order='F')
    train21 = inverse_transform(train2, y_train, chosenwidth, 1, order='F')

    # and do a cross validation

    scoring = make_scorer(f1_score, average='macro')
    if change_labels == 1 and conservative == 1:
        freeze2 = [np.argmax(test21[i]) for i in range(len(test21))]
        freeze2_conserve = [
            test21[i].shape[0] -
            np.argmax(np.flip(np.int_(np.logical_not(test21[i]))))
            for i in range(len(test21))
Ejemplo n.º 4
0
                                                    test_size=0.25,
                                                    random_state=42)

##############################################
# OPTION 1: Use the score SegPipe score method
##############################################

pipe.fit(X_train, y_train)
score = pipe.score(X_test, y_test)
print("Accuracy score: ", score)

######################################################################
# OPTION 2: generate true and predicted target values for the segments
######################################################################

y_true, y_pred = pipe.transform_predict(X_test, y_test)
# use any of the sklearn scorers
f1_macro = f1_score(y_true, y_pred, average='macro')
print("F1 score: ", f1_macro)

cm = confusion_matrix(y_true, y_pred)
plot_confusion_matrix(cm, data['y_labels'])

##########################################
# OPTION 3: scoring during model selection
##########################################

# model selection using the built-in score method for the final estimator
cv_scores = cross_validate(pipe, X, y, cv=4, return_train_score=True)
print("CV Scores: ", pd.DataFrame(cv_scores))
Ejemplo n.º 5
0
def test_pipe_regression():
    # no context data, single time series
    X = [np.random.rand(1000, 10)]
    y = [np.random.rand(1000)]

    pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()),
                 ('ridge', Ridge())])

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # context data, single time seres
    Xt = [np.random.rand(1000, 10)]
    Xc = [np.random.rand(3)]
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000)]

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # multiple time seres
    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # cross val
    Xt = np.array([np.random.rand(1000, 10)] * 5)
    Xc = np.random.rand(5, 3)
    X = TS_Data(Xt, Xc)
    y = np.array([np.random.rand(1000)] * 5)

    cross_validate(pipe, X, y, cv=3)

    # transform pipe
    pipe = Pype([('seg', SegmentXY()), ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])

    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]

    pipe.fit(X, y)
    pipe.transform(X, y)
    pipe.fit_transform(X, y)
Ejemplo n.º 6
0
def test_pipe_PadTrunc():
    # no context data, single time series
    X = [np.random.rand(1000, 10)]
    y = [5]

    pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()),
                 ('rf', RandomForestClassifier(n_estimators=10))])

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # context data, single time seres
    Xt = [np.random.rand(1000, 10)]
    Xc = [np.random.rand(3)]
    X = TS_Data(Xt, Xc)
    y = [5]

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # multiple time series
    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [1, 2, 3]

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # univariate data
    Xt = [np.random.rand(1000), np.random.rand(100), np.random.rand(500)]
    Xc = np.random.rand(3)
    X = TS_Data(Xt, Xc)
    y = [1, 2, 3]

    pipe.fit(X, y)
    pipe.transform_predict(X, y)
    pipe.predict(X)
    pipe.score(X, y)

    # transform pipe
    pipe = Pype([('trunc', PadTrunc()), ('ftr', FeatureRep()),
                 ('scaler', StandardScaler())])

    Xt = [
        np.random.rand(1000, 10),
        np.random.rand(100, 10),
        np.random.rand(500, 10)
    ]
    Xc = np.random.rand(3, 3)
    X = TS_Data(Xt, Xc)
    y = [1, 2, 3]

    pipe.fit(X, y)
    pipe.transform(X, y)
    pipe.fit_transform(X, y)