Ejemplo n.º 1
0
def test_serialize_shapelets():
    def get_model_weights(model):
        return model.model_.get_weights()

    n, sz, d = 15, 10, 3
    rng = numpy.random.RandomState(0)
    X = rng.randn(n, sz, d)

    for y in [rng.randint(low=0, high=3, size=n),
              rng.choice(["one", "two", "three"], size=n)]:

        shp = LearningShapelets(max_iter=1)
        _check_not_fitted(shp)
        shp.fit(X, y)
        _check_params_predict(shp, X, ['predict'],
                              check_params_fun=get_model_weights,
                              formats=["json", "pickle"])
Ejemplo n.º 2
0
def test_series_lengths():
    pytest.importorskip('tensorflow')
    from tslearn.shapelets import LearningShapelets

    # Test long shapelets
    y = [0, 1]
    time_series = to_time_series_dataset([[1, 2, 3, 4, 5], [3, 2, 1]])
    clf = LearningShapelets(n_shapelets_per_size={8: 1},
                            max_iter=1,
                            verbose=0,
                            random_state=0)
    np.testing.assert_raises(ValueError, clf.fit, time_series, y)

    # Test small max_size
    y = [0, 1]
    time_series = to_time_series_dataset([[1, 2, 3, 4, 5], [3, 2, 1]])
    clf = LearningShapelets(n_shapelets_per_size={3: 1},
                            max_iter=1,
                            verbose=0,
                            max_size=4,
                            random_state=0)
    np.testing.assert_raises(ValueError, clf.fit, time_series, y)
Ejemplo n.º 3
0
def executeLearningShapelet(datasetName):
    # INPUT: Dataset name

    # Execution of a ShapeletTransformation algorithm over the dataset: datasetName

    X_train, y_train, X_test, y_test = UCR_UEA_datasets().load_dataset(
        datasetName)

    # RE-SIZE BY FUN X TRAIN
    dfTrain = computeLoadedDataset(X_train, y_train)

    y_train = dfTrain['target'].values
    y_train = y_train.astype(int)

    #get the number of classes
    le = LabelEncoder()
    distinct_classes = le.fit_transform(dfTrain['target'])
    distinct_classes = np.unique(distinct_classes, return_counts=False)
    num_classes = len(distinct_classes)

    print(distinct_classes)
    print(num_classes)

    del dfTrain['target']
    del dfTrain['TsIndex']

    # RE-SIZE BY FUN X TEST
    dfTest = computeLoadedDataset(X_test, y_test)

    y_test = dfTest['target'].values
    y_test = y_test.astype(int)

    del dfTest['target']
    del dfTest['TsIndex']

    # inizio preprocessing train
    start_timePreprocessingTrain = time.time()

    shapelet_sizes = grabocka_params_to_shapelet_size_dict(
        n_ts=len(dfTrain),
        ts_sz=len(dfTrain.iloc[0]),
        n_classes=num_classes,
        l=0.1,  # parametri fissi
        r=1)

    grabocka = LearningShapelets(n_shapelets_per_size=shapelet_sizes)
    grabocka.fit(dfTrain, y_train)
    X_train_distances = grabocka.transform(dfTrain)

    # fine preprocessing train
    PreprocessingTrainTime = time.time() - start_timePreprocessingTrain

    # inizio train
    start_timeTrain = time.time()

    dt = DecisionTreeClassifier(criterion='entropy',
                                max_depth=3,
                                min_samples_leaf=20)
    dt.fit(X_train_distances, y_train)

    # fine train
    TrainTime = time.time() - start_timeTrain

    # inizio preprocessing test
    start_timePreprocessingTest = time.time()

    X_test_distances = grabocka.transform(dfTest)

    # fine preprocessing test
    PreprocessingTestTime = time.time() - start_timePreprocessingTest

    # inizio test
    start_timeTest = time.time()

    y_predict = dt.predict(X_test_distances)

    # fine test
    TestTime = time.time() - start_timeTest

    print(accuracy_score(y_test, y_predict))

    row = [
        'LearningShapelets', datasetName,
        round(accuracy_score(y_test, y_predict), 2),
        round(PreprocessingTrainTime, 2),
        round(TrainTime, 2),
        round(PreprocessingTestTime, 2),
        round(TestTime, 2)
    ]

    WriteCsvShapeletAlgo('Shapelet_Algo_Experiments_29-12.csv', row)
Ejemplo n.º 4
0
def test_shapelets():
    pytest.importorskip('tensorflow')
    from tslearn.shapelets import LearningShapelets
    import tensorflow as tf

    n, sz, d = 15, 10, 2
    rng = np.random.RandomState(0)
    time_series = rng.randn(n, sz, d)
    y = rng.randint(2, size=n)
    clf = LearningShapelets(n_shapelets_per_size={2: 5},
                            max_iter=1,
                            verbose=0,
                            optimizer="sgd",
                            random_state=0)

    cross_validate(clf, time_series, y, cv=2)

    clf = LearningShapelets(n_shapelets_per_size={2: 5},
                            max_iter=1,
                            verbose=0,
                            optimizer=tf.optimizers.Adam(.1),
                            random_state=0)
    cross_validate(clf, time_series, y, cv=2)

    model = LearningShapelets(n_shapelets_per_size={3: 2, 4: 1}, max_iter=1)
    model.fit(time_series, y)
    for shp, shp_bis in zip(model.shapelets_, model.shapelets_as_time_series_):
        np.testing.assert_allclose(shp,
                                   to_time_series(shp_bis, remove_nans=True))

    # Test set_weights / get_weights
    clf = LearningShapelets(n_shapelets_per_size={2: 5},
                            max_iter=1,
                            verbose=0,
                            random_state=0)
    clf.fit(time_series, y)
    preds_before = clf.predict_proba(time_series)
    weights = clf.get_weights()
    # Change number of iterations, then refit, then set weights
    clf.max_iter *= 2
    clf.fit(time_series, y)
    clf.set_weights(weights)
    np.testing.assert_allclose(preds_before, clf.predict_proba(time_series))
Ejemplo n.º 5
0
def test_shapelet_lengths():
    pytest.importorskip('tensorflow')
    from tslearn.shapelets import LearningShapelets

    # Test variable-length
    y = [0, 1]
    time_series = to_time_series_dataset([[1, 2, 3, 4, 5], [3, 2, 1]])
    clf = LearningShapelets(n_shapelets_per_size={3: 1},
                            max_iter=1,
                            verbose=0,
                            random_state=0)
    clf.fit(time_series, y)

    weights_shapelet = [np.array([[1, 2, 3]])]
    clf.set_weights(weights_shapelet, layer_name="shapelets_0_0")
    tr = clf.transform(time_series)
    np.testing.assert_allclose(tr, np.array([[0.], [8. / 3]]))

    # Test max_size to predict longer series than those passed at fit time
    y = [0, 1]
    time_series = to_time_series_dataset([[1, 2, 3, 4, 5], [3, 2, 1]])
    clf = LearningShapelets(n_shapelets_per_size={3: 1},
                            max_iter=1,
                            verbose=0,
                            max_size=6,
                            random_state=0)
    clf.fit(time_series[:, :-1], y)  # Fit with size 4
    weights_shapelet = [np.array([[1, 2, 3]])]
    clf.set_weights(weights_shapelet, layer_name="shapelets_0_0")
    tr = clf.transform(time_series)
    np.testing.assert_allclose(tr, np.array([[0.], [8. / 3]]))
Ejemplo n.º 6
0
    # Normalize the time series
    time_series_train = TimeSeriesScalerMinMax().fit_transform(
        time_series_train)

    # Get dimensions of the dataset
    n_time_series, time_series_size = time_series_train.shape[:2]
    n_classes = len(set(labels_train))

    # We will extract 2 shapelets and align them with the time series
    shapelet_sizes = {10: 2}

    # Define the model
    shapelet_classification_model = LearningShapelets(
        n_shapelets_per_size=shapelet_sizes,
        weight_regularizer=0.0001,
        optimizer=Adam(lr=0.01),
        max_iter=300,
        verbose=1,
        scale=False,
        random_state=42)

    # fit the model using the training data
    shapelet_classification_model.fit(time_series_train, labels_train)

    #############################################################################################
    # Visualise shapelets, time series and distance transformed time series
    #############################################################################################

    # Plot distances in a 2D space
    distances = shapelet_classification_model.transform(
        time_series_train).reshape((-1, 2))
    weights, biases = shapelet_classification_model.get_weights(
Ejemplo n.º 7
0
from flask import Flask
from flask_restful import Api, Resource, reqparse
import numpy as np
from tslearn.neighbors import KNeighborsTimeSeriesClassifier
from tslearn.shapelets import LearningShapelets
from os import path as os_path
from pathlib import Path

from tslearn.utils import to_time_series_dataset, to_time_series

APP = Flask(__name__)
API = Api(APP)

# Load models from disk
k_nn_model = KNeighborsTimeSeriesClassifier.from_pickle('./models/k_nn.pickle')
shapelets_model = LearningShapelets.from_pickle(
    './models/learning_shapelets.pickle')

working_dir_path = Path.cwd()

filename = os_path.join(working_dir_path, './models/mlp_nn.pickle')
mlp_nn_model = pickle.load(open(filename, 'rb'))

filename = os_path.join(working_dir_path, './models/gak_svm.pickle')
gak_svm_model = pickle.load(open(filename, 'rb'))


class Classify(Resource):
    @staticmethod
    def post():
        parser = reqparse.RequestParser()
        parser.add_argument('classifier_type')
# Normalize the time series
X_train = TimeSeriesScalerMinMax().fit_transform(X_train)

# Get statistics of the dataset
n_ts, ts_sz = X_train.shape[:2]
n_classes = len(set(y_train))

# We will extract 1 shapelet and align it with a time series
shapelet_sizes = {20: 1}

# Define the model and fit it using the training data
shp_clf = LearningShapelets(n_shapelets_per_size=shapelet_sizes,
                            weight_regularizer=0.001,
                            optimizer=Adam(lr=0.01),
                            max_iter=250,
                            verbose=0,
                            scale=False,
                            random_state=42)
shp_clf.fit(X_train, y_train)

# Get the number of extracted shapelets, the (minimal) distances from
# each of the timeseries to each of the shapelets, and the corresponding
# locations (index) where the minimal distance was found
n_shapelets = sum(shapelet_sizes.values())
distances = shp_clf.transform(X_train)
predicted_locations = shp_clf.locate(X_train)

f, ax = plt.subplots(2, 1, sharex=True)

# Plot the shapelet and align it on the best matched time series. The optimizer
Ejemplo n.º 9
0
n_ts, ts_sz = X_train.shape[:2]
n_classes = len(set(y_train))

# Set the number of shapelets per size as done in the original paper
shapelet_sizes = grabocka_params_to_shapelet_size_dict(n_ts=n_ts,
                                                       ts_sz=ts_sz,
                                                       n_classes=n_classes,
                                                       l=0.1,
                                                       r=1)

# Define the model using parameters provided by the authors (except that we
# use fewer iterations here)
shp_clf = LearningShapelets(n_shapelets_per_size=shapelet_sizes,
                            optimizer=tf.optimizers.Adam(.01),
                            batch_size=16,
                            weight_regularizer=.01,
                            max_iter=200,
                            random_state=42,
                            verbose=0)
shp_clf.fit(X_train, y_train)

# Make predictions and calculate accuracy score
pred_labels = shp_clf.predict(X_test)
print("Correct classification rate:", accuracy_score(y_test, pred_labels))

# Plot the different discovered shapelets
plt.figure()
for i, sz in enumerate(shapelet_sizes.keys()):
    plt.subplot(len(shapelet_sizes), 1, i + 1)
    plt.title("%d shapelets of size %d" % (shapelet_sizes[sz], sz))
    for shp in shp_clf.shapelets_:
Ejemplo n.º 10
0
# Normalize the time series
X_train = TimeSeriesScalerMinMax().fit_transform(X_train)

# Get statistics of the dataset
n_ts, ts_sz = X_train.shape[:2]
n_classes = len(set(y_train))

# We will extract 2 shapelets and align them with the time series
shapelet_sizes = {20: 2}

# Define the model and fit it using the training data
shp_clf = LearningShapelets(n_shapelets_per_size=shapelet_sizes,
                            weight_regularizer=0.0001,
                            optimizer=Adam(lr=0.01),
                            max_iter=300,
                            verbose=0,
                            scale=False,
                            random_state=42)
shp_clf.fit(X_train, y_train)

# We will plot our distances in a 2D space
distances = shp_clf.transform(X_train).reshape((-1, 2))
weights, biases = shp_clf.get_weights('classification')

# Create a grid for our two shapelets on the left and distances on the right
viridis = cm.get_cmap('viridis', 4)
fig = plt.figure(constrained_layout=True)
gs = fig.add_gridspec(3, 9)
fig_ax1 = fig.add_subplot(gs[0, :2])
fig_ax2 = fig.add_subplot(gs[0, 2:4])