Ejemplo n.º 1
0
def test_variable_length_svm():
    X = to_time_series_dataset([[1, 2, 3, 4], [1, 2, 3], [2, 5, 6, 7, 8, 9],
                                [3, 5, 6, 7, 8]])
    y = [0, 0, 1, 1]
    rng = np.random.RandomState(0)
    clf = TimeSeriesSVC(kernel="gak", random_state=rng)
    clf.fit(X, y)
    assert_allclose(clf.predict(X), [0, 0, 1, 1])

    y_reg = [-1., -1.3, 3.2, 4.1]
    clf = TimeSeriesSVR(kernel="gak")
    clf.fit(X, y_reg)
    assert_array_less(clf.predict(X[:2]), 0.)
    assert_array_less(-clf.predict(X[2:]), 0.)
Ejemplo n.º 2
0
def test_variable_cross_val():
    # TODO: here we just check that they can accept variable-length TS, not
    # that they do clever things
    X = to_time_series_dataset([[1, 2, 3, 4], [1, 2, 3], [1, 2, 3, 4],
                                [1, 2, 3], [2, 5, 6, 7, 8, 9], [3, 5, 6, 7, 8],
                                [2, 5, 6, 7, 8, 9], [3, 5, 6, 7, 8]])
    y = [0, 0, 0, 0, 1, 1, 1, 1]
    rng = np.random.RandomState(0)

    cv = KFold(n_splits=2, shuffle=True, random_state=rng)
    for estimator in [
            TimeSeriesSVC(kernel="gak", random_state=rng),
            TimeSeriesSVR(kernel="gak"),
            KNeighborsTimeSeriesClassifier(metric="dtw", n_neighbors=1),
            KNeighborsTimeSeriesClassifier(metric="softdtw", n_neighbors=1)
    ]:
        # TODO: cannot test for clustering methods since they don't have a
        # score method yet
        cross_val_score(estimator, X=X, y=y, cv=cv)
Ejemplo n.º 3
0
def cross_validate(experiment, outfile):

    X, y, ids, _, _, _, classnames, class_idxs = get_data(
        experiment,
        N_per_class=500,
        N_largest=None,
        do_add_spectral_indices=True)

    n_iter_search = 1

    svm = TimeSeriesSVC(n_jobs=-1)

    random_grid = {
        'C': scipy.stats.expon(scale=100),
        'gamma': scipy.stats.expon(scale=.1),
        'kernel': ['rbf']
    }

    random_search = RandomizedSearchCV(svm,
                                       param_distributions=random_grid,
                                       scoring='f1_macro',
                                       n_iter=n_iter_search,
                                       n_jobs=-1,
                                       cv=3,
                                       verbose=3)

    random_search.fit(X, y)
    print(random_search.best_params_, )

    print(random_search.best_score_)
    print(random_search.cv_results_)

    print(str(random_search.best_params_) + " score: " +
          str(random_search.best_score_),
          file=open(outfile, "w"))

    df = pd.DataFrame(random_search.cv_results_)
    print(f"writing {outfile}.csv")
    df.to_csv(outfile + ".csv")
Ejemplo n.º 4
0
                # subsample every time steps if certain length is exceeded
                subsample = max(int(np.floor(x_train.shape[1]/149)),1)
                x_train = x_train[:,::subsample,:]
                datasets.set_description(f"dataset: {name} --- shape: {x_train.shape}")

                #==================================================================================
                # Linear, RBF and GAK kernels
                #==================================================================================
                # define standard kernels
                std_kernels = tqdm(['linear', 'rbf', 'gak'], position=2, leave=False)
                for ker in std_kernels:
                    std_kernels.set_description(f"standard kernel: {ker}")

                    # SVC tslearn estimator
                    svc = TimeSeriesSVC(kernel=ker, decision_function_shape='ovo')
                    svc_model = GridSearchCV(estimator=svc, param_grid=svc_parameters, cv=5, n_jobs=-1)
                    svc_model.fit(x_train, y_train)
                    
                    # store results
                    if svc_model.best_score_ > best_scores_train[ker]:
                        best_scores_train[ker] = svc_model.best_score_
                        trained_models[(name, ker)] = (subsample, at, ll, svc_model)

                    sleep(0.5)

                #==================================================================================
                # Truncated signature kernels
                #==================================================================================
                # set max signature truncation
                dim  = x_train.shape[-1]
Ejemplo n.º 5
0
# License: BSD 3 clause

import numpy
import matplotlib.pyplot as plt

from tslearn.datasets import CachedDatasets
from tslearn.preprocessing import TimeSeriesScalerMinMax
from tslearn.svm import TimeSeriesSVC

numpy.random.seed(0)
X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace")
X_train = TimeSeriesScalerMinMax().fit_transform(X_train)
X_test = TimeSeriesScalerMinMax().fit_transform(X_test)

clf = TimeSeriesSVC(kernel="gak",
                    gamma=.1,
                    sz=X_train.shape[1],
                    d=X_train.shape[2])
clf.fit(X_train, y_train)
print(("Correct classification rate:", clf.score(X_test, y_test)))

n_classes = len(set(y_train))

plt.figure()
support_vectors = clf.support_vectors_time_series_(X_train)
for i, cl in enumerate(set(y_train)):
    plt.subplot(n_classes, 1, i + 1)
    plt.title("Support vectors for class %d" % (cl))
    for ts in support_vectors[i]:
        plt.plot(ts.ravel())

plt.tight_layout()
Model with rank: 3
Mean validation score: 0.901 (std: 0.015)
Parameters: {'kernel': 'poly', 'gamma': 16, 'C': 1}

Model with rank: 3
Mean validation score: 0.901 (std: 0.015)
Parameters: {'kernel': 'poly', 'gamma': 6, 'C': 3}

Model with rank: 3
Mean validation score: 0.901 (std: 0.015)
Parameters: {'kernel': 'poly', 'gamma': 26, 'C': 4}
'''
#train & validation
print('train & validation.................................\n')
clf = TimeSeriesSVC(C=3, kernel='rbf', gamma=46)
clf.fit(X_train, y_train)
scores = cross_val_score(clf, X_train, y_train, cv=10)
print("train Correct classification rate:", clf.score(X_train, y_train))
print('k-foldCV', scores, scores.mean())

#test
print('test.................................\n')
print("test Correct classification rate:", clf.score(X_test, y_test))
y_pred = clf.predict(X_test)
target_names = ['class 0', 'class 1']
print(classification_report(y_test, y_pred, target_names=target_names))
#show
print('show.................................\n')
n_classes = len(set(y_train))
plt.figure()
Ejemplo n.º 7
0
    def create_model(self, model_type, X, y, model_params, search_params):
        """
        Executes random search hyper parameter optimization for the specified model. Refer to sklearn
        and tslearn documentation for details.
        Sources:
        # https://www.kaggle.com/hatone/mlpclassifier-with-gridsearchcv
        # https://en.wikipedia.org/wiki/Hyperparameter_optimization#Grid_search
        # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html
        # alternative to grid search: https://github.com/sahilm89/lhsmdu
        :param model_type:
        :param X:
        :param y:
        :param model_params:
        :param search_params:
        :param test_size:
        :return:
        """
        try:

            if X is None or y is None or model_params is None or search_params is None:
                raise TypeError(self.messages.ILLEGAL_ARGUMENT_NONE_TYPE.value)
            if (not isinstance(X, pandas.DataFrame) and
                    not isinstance(X, pandas.core.frame.DataFrame) \
                    and not isinstance(X, pandas.core.series.Series)):
                raise TypeError(self.messages.ILLEGAL_ARGUMENT_TYPE.value)
            if (not isinstance(y, pandas.DataFrame) and
                    not isinstance(y, pandas.core.frame.DataFrame) \
                    and not isinstance(y, pandas.core.series.Series)):
                raise TypeError(self.messages.ILLEGAL_ARGUMENT_TYPE.value)

            if not isinstance(model_params, dict) or not isinstance(
                    search_params, list):
                raise TypeError(self.messages.ILLEGAL_ARGUMENT_TYPE.value)

            model = None

            if model_type == 'tssvc':
                model = TimeSeriesSVC(search_params[0])

            if model_type == 'knn_classifier':
                model = KNeighborsTimeSeriesClassifier(search_params[0])

            if model is None:
                raise ValueError(
                    self.messages.PROVIDED_MODE_DOESNT_EXIST.value)

            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=search_params[6], stratify=y)
            clf = RandomizedSearchCV(model,
                                     model_params,
                                     n_jobs=search_params[0],
                                     verbose=search_params[1],
                                     cv=search_params[2],
                                     n_iter=search_params[3])
            clf.fit(X_train, y_train)
            if search_params[4]:
                with open(r"{}".format(search_params[5]), "wb") as output_file:
                    pickle.dump(clf, output_file)

            return {'clf': clf, 'X_test': X_test, 'y_test': y_test}

        except (TypeError, ValueError):
            self.logger.error(traceback.format_exc())
            os._exit(1)

        except Exception:
            self.logger.error(traceback.format_exc())
            os._exit(2)
Ejemplo n.º 8
0
# Author: Romain Tavenard
# License: BSD 3 clause

import numpy
import matplotlib.pyplot as plt

from tslearn.datasets import CachedDatasets
from tslearn.preprocessing import TimeSeriesScalerMinMax
from tslearn.svm import TimeSeriesSVC

numpy.random.seed(0)
X_train, y_train, X_test, y_test = CachedDatasets().load_dataset("Trace")
X_train = TimeSeriesScalerMinMax().fit_transform(X_train)
X_test = TimeSeriesScalerMinMax().fit_transform(X_test)

clf = TimeSeriesSVC(kernel="gak", gamma=.1)
clf.fit(X_train, y_train)
print("Correct classification rate:", clf.score(X_test, y_test))

n_classes = len(set(y_train))

plt.figure()
support_vectors = clf.support_vectors_
for i, cl in enumerate(set(y_train)):
    plt.subplot(n_classes, 1, i + 1)
    plt.title("Support vectors for class %d" % cl)
    for ts in support_vectors[i]:
        plt.plot(ts.ravel())

plt.tight_layout()
plt.show()
Ejemplo n.º 9
0
    raw_data = pd.read_csv(os_path.join(working_dir_path,
                                        "./data/train_curves.csv"),
                           header=None)
    time_series_train = to_time_series_dataset(raw_data)

    labels_train = genfromtxt(os_path.join(
        working_dir_path, "./data/train_clustering_result.csv"),
                              delimiter=',')

    # Normalize the time series
    time_series_train = TimeSeriesScalerMinMax().fit_transform(
        time_series_train)

    # Define the model
    gak_svm_classification_model = TimeSeriesSVC(kernel="gak",
                                                 gamma=.1,
                                                 n_jobs=4,
                                                 verbose=1)

    # fit the model using the training data
    gak_svm_classification_model.fit(time_series_train, labels_train)

    #############################################################################################
    # Calculate classification rate
    #############################################################################################

    # load test data
    X_test = pd.read_csv(os_path.join(working_dir_path,
                                      "./data/test_curves.csv"),
                         header=None)
    X_test = to_time_series_dataset(X_test)
    X_test = TimeSeriesScalerMinMax().fit_transform(X_test)