Exemple #1
0
def test_temporal_k_fold():
    # test length 1 series
    splitter = TemporalKFold()
    X = [rand(100, 10)]
    y = [5]
    Xs, ys, cv = splitter.split(X, y)
    check_folds(Xs, ys, cv)

    X = [rand(100, 10)]
    y = [rand(100)]
    Xs, ys, cv = splitter.split(X, y)
    check_folds(Xs, ys, cv)

    Xt = [rand(100, 10)]
    Xc = [5]
    X = TS_Data(Xt, Xc)
    y = [rand(100)]
    Xs, ys, cv = splitter.split(X, y)
    check_folds(Xs, ys, cv)

    # test with lots of series
    splitter = TemporalKFold()
    Ns = 5
    X = np.array([rand(100, 10)] * Ns)
    y = rand(Ns)
    Xs, ys, cv = splitter.split(X, y)
    check_folds(Xs, ys, cv)

    X = np.array([rand(100, 10)] * Ns)
    y = np.array([rand(100)] * Ns)
    Xs, ys, cv = splitter.split(X, y)
    check_folds(Xs, ys, cv)

    Xt = np.array([rand(100, 10)] * Ns)
    Xc = rand(Ns)
    X = TS_Data(Xt, Xc)
    y = np.array([rand(100)] * Ns)
    Xs, ys, cv = splitter.split(X, y)
    check_folds(Xs, ys, cv)

    Xt = np.array([rand(100, 10)] * Ns)
    Xc = rand(Ns)
    X = TS_Data(Xt, Xc)
    y = rand(Ns)
    Xs, ys, cv = splitter.split(X, y)
    check_folds(Xs, ys, cv)
    model.add(Dense(n_classes, activation="softmax"))

    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    return model


# load the data
data = load_watch()
X = data['X']
y = data['y']

# temporal splitting of data
splitter = TemporalKFold(n_splits=3)
Xs, ys, cv = splitter.split(X, y)

# create a segment learning pipeline
width = 100
pipe = Pype([('seg', SegmentX(order='C')),
             ('crnn',
              KerasClassifier(build_fn=crnn_model,
                              epochs=1,
                              batch_size=256,
                              verbose=0))])

# create a parameter dictionary using the sklearn API
#
# you can also set a parameter to be always equal to another parameter, by setting its value to
# parameter name to track (this is an extension from sklearn)
print("N series in train: ", len(X_train))
print("N series in test: ", len(X_test))
print("N segments in train: ", pipe.N_train)
print("N segments in test: ", pipe.N_test)
print("Score: ", score)

# generate some predictions
ytr, ytr_p = pipe.transform_predict(X_train, y_train)  # training predictions
yte, yte_p = pipe.transform_predict(X_test, y_test)  # test predictions
xtr = np.arange(len(ytr))  # segment number
xte = np.arange(len(yte)) + len(xtr)

# plot the amazing results
plt.plot(xtr, ytr, '.', label="training")
plt.plot(xte, yte, '.', label="actual")
plt.plot(xte, yte_p, label="predicted")
plt.xlabel("Segment Number")
plt.ylabel("Target")
plt.legend()
plt.show()

# now try a cross validation
X = [np.arange(4000) / 100.]
y = [np.sin(X[0])]

tkf = TemporalKFold()
X, y, cv = tkf.split(X, y)
cv_scores = cross_validate(pipe, X, y, cv=cv, return_train_score=True)
print("CV Scores: ", pd.DataFrame(cv_scores))