Ejemplo n.º 1
0
    def fit(self, data):
        if data.kfold > 1:
            cv_eval = {}
            for k, cv_fold in enumerate(data.Xy_train.keys()):
#                 print('    cv_fold: ', cv_fold)
                [(X_train, y_train), (X_val, y_val)] = data.Xy_train[cv_fold]
                X_train, X_val = from_2d_array_to_nested(X_train), from_2d_array_to_nested(X_val)
                knn = KNeighborsTimeSeriesClassifier(n_neighbors=5, distance="dtw", n_jobs=-1)
                knn.fit(X_train, y_train)
                eval_metrics = weareval.eval_output(knn.predict(X_val), y_val, tasktype=data.tasktype)
                cv_eval[cv_fold] = {'model': knn, 
                                    # 'data': [(X_train, y_train), (X_val, y_val)], # store just IDs?
                                    'metric': eval_metrics['mae'] if data.tasktype=='regression' else eval_metrics['balanced_acc_adj'],
                                    'metrics': eval_metrics}
            # retain only best model
            tmp = {cv_fold:cv_eval[cv_fold]['metric'] for cv_fold in cv_eval.keys()}
            bst_fold = min(tmp, key=tmp.get) if data.tasktype=='regression' else max(tmp, key=tmp.get)
            self.knn = cv_eval[bst_fold]['model']
            return {'model': self.knn, 'metrics': cv_eval[bst_fold]['metrics']}
        else:
            X_train, y_train = data.Xy_train
            X_val, y_val = data.Xy_val
            X_train, X_val = from_2d_array_to_nested(X_train), from_2d_array_to_nested(X_val)
            self.knn = knn = KNeighborsTimeSeriesClassifier(n_neighbors=5, distance="dtw", n_jobs=-1)
            self.knn.fit(X_train, y_train)
            eval_metrics = weareval.eval_output(self.knn.predict(X_val), y_val, tasktype=data.tasktype)
            return {'model': self.knn, 'metrics': eval_metrics}
Ejemplo n.º 2
0
def test_mul_sklearn_autoadapt():
    """Test auto-adapter for sklearn in mul."""
    RAND_SEED = 42
    y = _make_classification_y(n_instances=10, random_state=RAND_SEED)
    X = _make_panel_X(n_instances=10,
                      n_timepoints=20,
                      random_state=RAND_SEED,
                      y=y)
    X_test = _make_panel_X(n_instances=10,
                           n_timepoints=20,
                           random_state=RAND_SEED)

    t1 = ExponentTransformer(power=2)
    t2 = StandardScaler()
    c = KNeighborsTimeSeriesClassifier()

    t12c_1 = t1 * (t2 * c)
    t12c_2 = (t1 * t2) * c
    t12c_3 = t1 * t2 * c

    assert isinstance(t12c_1, ClassifierPipeline)
    assert isinstance(t12c_2, ClassifierPipeline)
    assert isinstance(t12c_3, ClassifierPipeline)

    y_pred = t12c_1.fit(X, y).predict(X_test)

    _assert_array_almost_equal(y_pred, t12c_2.fit(X, y).predict(X_test))
    _assert_array_almost_equal(y_pred, t12c_3.fit(X, y).predict(X_test))
Ejemplo n.º 3
0
    def get_test_params(cls, parameter_set="default"):
        """Return testing parameter settings for the estimator.

        Parameters
        ----------
        parameter_set : str, default="default"
            Name of the set of test parameters to return, for use in tests. If no
            special parameters are defined for a value, will return `"default"` set.
            For classifiers, a "default" set of parameters should be provided for
            general testing, and a "results_comparison" set for comparing against
            previously recorded results if the general set does not produce suitable
            probabilities to compare against.

        Returns
        -------
        params : dict or list of dict, default={}
            Parameters to create testing instances of the class.
            Each dict are parameters to construct an "interesting" test instance, i.e.,
            `MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
            `create_test_instance` uses the first (or only) dictionary in `params`.
        """
        # imports
        from sktime.classification.distance_based import KNeighborsTimeSeriesClassifier
        from sktime.transformations.series.exponent import ExponentTransformer

        t1 = ExponentTransformer(power=2)
        t2 = ExponentTransformer(power=0.5)
        c = KNeighborsTimeSeriesClassifier()

        # construct without names
        return {"transformers": [t1, t2], "classifier": c}
Ejemplo n.º 4
0
def test_dunder_mul():
    """Test the mul dunder method."""
    RAND_SEED = 42
    y = _make_classification_y(n_instances=10, random_state=RAND_SEED)
    X = _make_panel_X(n_instances=10,
                      n_timepoints=20,
                      random_state=RAND_SEED,
                      y=y)
    X_test = _make_panel_X(n_instances=5,
                           n_timepoints=20,
                           random_state=RAND_SEED)

    t1 = ExponentTransformer(power=4)
    t2 = ExponentTransformer(power=0.25)

    c = KNeighborsTimeSeriesClassifier()
    t12c_1 = t1 * (t2 * c)
    t12c_2 = (t1 * t2) * c
    t12c_3 = t1 * t2 * c

    assert isinstance(t12c_1, ClassifierPipeline)
    assert isinstance(t12c_2, ClassifierPipeline)
    assert isinstance(t12c_3, ClassifierPipeline)

    y_pred = c.fit(X, y).predict(X_test)

    _assert_array_almost_equal(y_pred, t12c_1.fit(X, y).predict(X_test))
    _assert_array_almost_equal(y_pred, t12c_2.fit(X, y).predict(X_test))
    _assert_array_almost_equal(y_pred, t12c_3.fit(X, y).predict(X_test))
Ejemplo n.º 5
0
def test_missing_unequal_tag_inference():
    """Test that ClassifierPipeline infers missing/unequal tags correctly."""
    c = KNeighborsTimeSeriesClassifier()
    c1 = ExponentTransformer() * PaddingTransformer() * ExponentTransformer(
    ) * c
    c2 = ExponentTransformer() * ExponentTransformer() * c
    c3 = Imputer() * ExponentTransformer() * c
    c4 = ExponentTransformer() * Imputer() * c

    assert c1.get_tag("capability:unequal_length")
    assert not c2.get_tag("capability:unequal_length")
    assert c3.get_tag("capability:missing_values")
    assert not c4.get_tag("capability:missing_values")
def main():
    generator = DataGenerator(labeled_data_file=args.labeled_data_file, data_util_file=args.data_util_file,
                              threshold=args.threshold, dt=args.dt, L=args.L, tmin=args.tmin, tmax=args.tmax)
    training_data, test_data = generator.get_data(ts_nth_element=args.ts_nth_element,
                                                                   training_frac=0.7)
    knn = KNeighborsTimeSeriesClassifier(n_neighbors=args.n_neighbors, verbose=1, metric="dtw")
    x = detabularize(pd.DataFrame(training_data[:,1:]))
    try:
        with parallel_backend('threading', n_jobs=args.n_jobs):
            knn = knn.fit(x, training_data[:,0])
        with open('{save_file_name}.pickle'.format(save_file_name=args.save_file_name), 'wb') \
                as KNeighborsTimeSeriesModel:
            pickle.dump(knn, KNeighborsTimeSeriesModel, protocol=pickle.HIGHEST_PROTOCOL)
    except Exception as ex:
        print(ex)
Ejemplo n.º 7
0
    def fit(self, X, y):
        """
        Method to perform training on the classifier.

        Parameters
        ----------
        X - pandas dataframe of training data of shape [n_instances,1].
        y - list of class labels of shape [n_instances].

        Returns
        -------
        self : the shapeDTW object
        """
        # Perform preprocessing on params.
        if not (isinstance(self.shape_descriptor_function, str)):
            raise TypeError(
                "shape_descriptor_function must be an 'str'. \
                            Found '"
                + type(self.shape_descriptor_function).__name__
                + "' instead."
            )

        X, y = check_X_y(X, y, enforce_univariate=False)

        if self.metric_params is None:
            self.metric_params = {}

        # If the shape descriptor is 'compound',
        # calculate the appropriate weighting_factor
        if self.shape_descriptor_function == "compound":
            self._calculate_weighting_factor_value(X, y)

        # Fit the SlidingWindowSegmenter
        sw = SlidingWindowSegmenter(self.subsequence_length)
        sw.fit(X)
        self.sw = sw

        # Transform the training data.
        X = self._preprocess(X)

        # Fit the kNN classifier
        self.knn = KNeighborsTimeSeriesClassifier(n_neighbors=self.n_neighbors)
        self.knn.fit(X, y)
        self.classes_ = self.knn.classes_

        return self
Ejemplo n.º 8
0
def test_stat():
    """Test sign ranks."""
    data = load_gunpoint(split="train", return_X_y=False)
    dataset = RAMDataset(dataset=data, name="gunpoint")
    task = TSCTask(target="class_val")

    fc = ComposableTimeSeriesForestClassifier(n_estimators=1, random_state=1)
    strategy_fc = TSCStrategy(fc, name="tsf")
    pf = KNeighborsTimeSeriesClassifier()
    strategy_pf = TSCStrategy(pf, name="pf")

    # result backend
    results = RAMResults()
    orchestrator = Orchestrator(
        datasets=[dataset],
        tasks=[task],
        strategies=[strategy_pf, strategy_fc],
        cv=SingleSplit(random_state=1),
        results=results,
    )

    orchestrator.fit_predict(save_fitted_strategies=False)

    analyse = Evaluator(results)
    metric = PairwiseMetric(func=accuracy_score, name="accuracy")
    _ = analyse.evaluate(metric=metric)

    ranks = analyse.rank(ascending=True)
    pf_rank = ranks.loc[ranks.strategy == "pf",
                        "accuracy_mean_rank"].item()  # 1
    fc_rank = ranks.loc[ranks.strategy == "tsf",
                        "accuracy_mean_rank"].item()  # 2
    rank_array = [pf_rank, fc_rank]
    rank_array_test = [1, 2]
    _, sign_test_df = analyse.sign_test()

    sign_array = [
        [sign_test_df["pf"][0], sign_test_df["pf"][1]],
        [sign_test_df["tsf"][0], sign_test_df["tsf"][1]],
    ]
    sign_array_test = [[1, 1], [1, 1]]
    np.testing.assert_equal([rank_array, sign_array],
                            [rank_array_test, sign_array_test])
Ejemplo n.º 9
0
def main():
    #Load arff file into Tuple of size 2.
    #First element has the time-series data in arrays and Second element has the description of the attributes
    TRAIN = arff.loadarff('ItalyPowerDemand_TRAIN.arff')
    TEST = arff.loadarff('ItalyPowerDemand_TEST.arff')
    #Convert the data from the first Tuple elemento to a tabularized dataframe
    df_TRAIN = pd.DataFrame(TRAIN[0])
    df_TEST = pd.DataFrame(TEST[0])

    #Using sktime to handle the data
    print(df_TRAIN.head())
    print('\n Is nested the df above?', is_nested_dataframe(df_TRAIN), '\n')

    #Handling the datasets
    X_train = df_TRAIN.drop('target', axis=1)
    y_train = df_TRAIN['target'].astype(int)
    print(X_train.head(), y_train.head(), '\n')
    X_test = df_TEST.drop('target', axis=1)
    y_test = df_TEST['target'].astype(int)

    #Detabularizing and Nesting X_train, X_test
    X_train_detab = detabularize(X_train)
    X_test_detab = detabularize(X_test)
    print(X_train_detab.head())
    print('Is nested the detabularized df above?',
          is_nested_dataframe(X_train_detab), '\n')

    #The lines above could be simplified with the following method from sktime
    X, y = load_from_arff_to_dataframe('ItalyPowerDemand_TRAIN.arff')
    print(X_train_detab.head(), X.head(), type(y_train), type(y))

    #Classifier algorithm
    knn = KNeighborsTimeSeriesClassifier(n_neighbors=1, metric="dtw")
    knn.fit(X_train_detab, y_train)
    print('The score of the KNN classifier is:',
          round(knn.score(X_test_detab, y_test), 4))
Ejemplo n.º 10
0
def set_classifier(cls, resampleId=None):
    """
    Basic way of creating the classifier to build using the default settings. This
    set up is to help with batch jobs for multiple problems to facilitate easy
    reproducability. You can set up bespoke classifier in many other ways.

    :param cls: String indicating which classifier you want
    :param resampleId: classifier random seed

    :return: A classifier.

    """
    name = cls.lower()
    # Distance based
    if name == "pf" or name == "proximityforest":
        return ProximityForest(random_state=resampleId)
    elif name == "pt" or name == "proximitytree":
        return ProximityTree(random_state=resampleId)
    elif name == "ps" or name == "proximityStump":
        return ProximityStump(random_state=resampleId)
    elif name == "dtwcv" or name == "kneighborstimeseriesclassifier":
        return KNeighborsTimeSeriesClassifier(distance="dtwcv")
    elif name == "dtw" or name == "1nn-dtw":
        return KNeighborsTimeSeriesClassifier(distance="dtw")
    elif name == "msm" or name == "1nn-msm":
        return KNeighborsTimeSeriesClassifier(distance="msm")
    elif name == "ee" or name == "elasticensemble":
        return ElasticEnsemble()
    elif name == "shapedtw":
        return ShapeDTW()
    # Dictionary based
    elif name == "boss" or name == "bossensemble":
        return BOSSEnsemble(random_state=resampleId)
    elif name == "cboss" or name == "contractableboss":
        return ContractableBOSS(random_state=resampleId)
    elif name == "tde" or name == "temporaldictionaryensemble":
        return TemporalDictionaryEnsemble(random_state=resampleId)
    elif name == "weasel":
        return WEASEL(random_state=resampleId)
    elif name == "muse":
        return MUSE(random_state=resampleId)
    # Interval based
    elif name == "rise" or name == "randomintervalspectralforest":
        return RandomIntervalSpectralForest(random_state=resampleId)
    elif name == "tsf" or name == "timeseriesforestclassifier":
        return TimeSeriesForestClassifier(random_state=resampleId)
    elif name == "cif" or name == "canonicalintervalforest":
        return CanonicalIntervalForest(random_state=resampleId)
    elif name == "drcif":
        return DrCIF(random_state=resampleId)
    # Shapelet based
    elif name == "stc" or name == "shapelettransformclassifier":
        return ShapeletTransformClassifier(
            random_state=resampleId, time_contract_in_mins=1
        )
    elif name == "mrseql" or name == "mrseqlclassifier":
        return MrSEQLClassifier(seql_mode="fs", symrep=["sax", "sfa"])
    elif name == "rocket":
        return ROCKETClassifier(random_state=resampleId)
    elif name == "arsenal":
        return Arsenal(random_state=resampleId)
    # Hybrid
    elif name == "catch22":
        return Catch22ForestClassifier(random_state=resampleId)
    elif name == "hivecotev1":
        return HIVECOTEV1(random_state=resampleId)
    else:
        raise Exception("UNKNOWN CLASSIFIER")
Ejemplo n.º 11
0
# -------------- SETUP ------------------------------------------

# Dataset path folder
DATA_PATH = os.path.join(os.path.dirname(__file__), "Datasets")

# Datasets paths
# [ [train set path, test set path, dataset name], ...]
datasets_path = [[
    "RacketSports/RacketSports_TRAIN.ts", "RacketSports/RacketSports_TEST.ts",
    "RacketSport"
]]

# Setup classifier
# [ [classifier, classifier name], ...]
classifiers = [[
    KNeighborsTimeSeriesClassifier(1, 'uniform', 'brute', 'dtw', None),
    "DTW-1NN"
],
               [
                   KNeighborsTimeSeriesClassifier(4, 'uniform', 'brute', 'dtw',
                                                  None), "DTW-4NN"
               ]]

# --------------- MAIN PROGRAM ---------------------------------

# Load data
# [ ((train_data, train_class), (test_data, test_class), dataset name), ...]
data = []
for train_path, test_path, name in datasets_path:
    data += [
        (load_from_tsfile_to_dataframe(os.path.join(DATA_PATH, train_path)),
Ejemplo n.º 12
0
def set_classifier(cls, resample_id=None, train_file=False):
    """Construct a classifier.

    Basic way of creating the classifier to build using the default settings. This
    set up is to help with batch jobs for multiple problems to facilitate easy
    reproducibility for use with load_and_run_classification_experiment. You can pass a
    classifier object instead to run_classification_experiment.

    Parameters
    ----------
    cls : str
        String indicating which classifier you want.
    resample_id : int or None, default=None
        Classifier random seed.
    train_file : bool, default=False
        Whether a train file is being produced.

    Return
    ------
    classifier : A BaseClassifier.
        The classifier matching the input classifier name.
    """
    name = cls.lower()
    # Dictionary based
    if name == "boss" or name == "bossensemble":
        return BOSSEnsemble(random_state=resample_id)
    elif name == "cboss" or name == "contractableboss":
        return ContractableBOSS(random_state=resample_id)
    elif name == "tde" or name == "temporaldictionaryensemble":
        return TemporalDictionaryEnsemble(
            random_state=resample_id, save_train_predictions=train_file
        )
    elif name == "weasel":
        return WEASEL(random_state=resample_id)
    elif name == "muse":
        return MUSE(random_state=resample_id)
    # Distance based
    elif name == "pf" or name == "proximityforest":
        return ProximityForest(random_state=resample_id)
    elif name == "pt" or name == "proximitytree":
        return ProximityTree(random_state=resample_id)
    elif name == "ps" or name == "proximityStump":
        return ProximityStump(random_state=resample_id)
    elif name == "dtwcv" or name == "kneighborstimeseriesclassifier":
        return KNeighborsTimeSeriesClassifier(distance="dtwcv")
    elif name == "dtw" or name == "1nn-dtw":
        return KNeighborsTimeSeriesClassifier(distance="dtw")
    elif name == "msm" or name == "1nn-msm":
        return KNeighborsTimeSeriesClassifier(distance="msm")
    elif name == "ee" or name == "elasticensemble":
        return ElasticEnsemble(random_state=resample_id)
    elif name == "shapedtw":
        return ShapeDTW()
    # Feature based
    elif name == "catch22":
        return Catch22Classifier(
            random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500)
        )
    elif name == "matrixprofile":
        return MatrixProfileClassifier(random_state=resample_id)
    elif name == "signature":
        return SignatureClassifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500),
        )
    elif name == "tsfresh":
        return TSFreshClassifier(
            random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500)
        )
    elif name == "tsfresh-r":
        return TSFreshClassifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500),
            relevant_feature_extractor=True,
        )
    # Hybrid
    elif name == "hc1" or name == "hivecotev1":
        return HIVECOTEV1(random_state=resample_id)
    elif name == "hc2" or name == "hivecotev2":
        return HIVECOTEV2(random_state=resample_id)
    # Interval based
    elif name == "rise" or name == "randomintervalspectralforest":
        return RandomIntervalSpectralEnsemble(
            random_state=resample_id, n_estimators=500
        )
    elif name == "tsf" or name == "timeseriesforestclassifier":
        return TimeSeriesForestClassifier(random_state=resample_id, n_estimators=500)
    elif name == "cif" or name == "canonicalintervalforest":
        return CanonicalIntervalForest(random_state=resample_id, n_estimators=500)
    elif name == "stsf" or name == "supervisedtimeseriesforest":
        return SupervisedTimeSeriesForest(random_state=resample_id, n_estimators=500)
    elif name == "drcif":
        return DrCIF(
            random_state=resample_id, n_estimators=500, save_transformed_data=train_file
        )
    # Kernel based
    elif name == "rocket":
        return ROCKETClassifier(random_state=resample_id)
    elif name == "arsenal":
        return Arsenal(random_state=resample_id, save_transformed_data=train_file)
    # Shapelet based
    elif name == "stc" or name == "shapelettransformclassifier":
        return ShapeletTransformClassifier(
            random_state=resample_id, save_transformed_data=train_file
        )
    elif name == "mrseql" or name == "mrseqlclassifier":
        return MrSEQLClassifier(seql_mode="fs", symrep=["sax", "sfa"])
    else:
        raise Exception("UNKNOWN CLASSIFIER")
Ejemplo n.º 13
0
from sklearn.metrics.cluster import contingency_matrix
import numpy as np
import os
import sktime
import time


# -------------- SETUP -----------------

# Datasets path folder
DATA_PATH = os.path.join(os.path.dirname(__file__), "Datasets")

# Classifiers
# [ [classifier, classifier name], ...]
classifiers = [
    [KNeighborsTimeSeriesClassifier(1, 'uniform', 'dtw'), "DTW-1NN"],
    [KNeighborsTimeSeriesClassifier(4, 'uniform', 'dtw'), "DTW-4NN"]
]

# Number of cores to use (-1 -> all)
nb_jobs = -1

# Split strategy
nb_split = 10
cv = KFold(n_splits=nb_split)

# --------------- MAIN PROGRAM --------------------

# Display some info
nbDatasets = 0
for  dataset in os.listdir(DATA_PATH): nbDatasets+=1
# Run the fit and predict
for i, dataset in enumerate(datasets):
    print(f'Dataset: {i + 1}/{n_datasets} {dataset}')

    # pre-allocate results
    results = np.zeros(3)

    # load data
    train_file = os.path.join(data_path, f'{dataset}/{dataset}_TRAIN.ts')
    test_file = os.path.join(data_path, f'{dataset}/{dataset}_TEST.ts')

    x_train, y_train = load_from_tsfile_to_dataframe(train_file)
    x_test, y_test = load_from_tsfile_to_dataframe(test_file)

    tsf = KNeighborsTimeSeriesClassifier()

    # fit
    try:
        s = time.time()
        tsf.fit(x_train, y_train)
        results[0] = time.time() - s

        # predict
        s = time.time()
        y_pred = tsf.predict(x_test)
        results[1] = time.time() - s

    # catch and raise user exceptions
    except (KeyboardInterrupt, SystemExit):
        raise