예제 #1
0
    def fit(self, X, y):
        X, y = check_X_y(X, y, enforce_univariate=True, coerce_to_numpy=True)

        self.n_classes = np.unique(y).shape[0]
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]

        cv_size = 10
        _, counts = np.unique(y, return_counts=True)
        min_class = np.min(counts)
        if min_class < cv_size:
            cv_size = min_class

        self.stc = ShapeletTransformClassifier(
            random_state=self.random_state,
            time_contract_in_mins=60,
        )
        self.stc.fit(X, y)
        train_preds = cross_val_predict(
            ShapeletTransformClassifier(
                random_state=self.random_state,
                time_contract_in_mins=60,
            ),
            X=X,
            y=y,
            cv=cv_size,
        )
        self.stc_weight = accuracy_score(y, train_preds)**4

        self.tsf = TimeSeriesForest(random_state=self.random_state)
        self.tsf.fit(X, y)
        train_preds = cross_val_predict(
            TimeSeriesForest(random_state=self.random_state),
            X=X,
            y=y,
            cv=cv_size,
        )
        self.tsf_weight = accuracy_score(y, train_preds)**4

        self.rise = RandomIntervalSpectralForest(
            random_state=self.random_state)
        self.fit(X, y)
        train_preds = cross_val_predict(
            RandomIntervalSpectralForest(random_state=self.random_state),
            X=X,
            y=y,
            cv=cv_size,
        )
        self.rise_weight = accuracy_score(y, train_preds)**4

        self.cboss = ContractableBOSS(random_state=self.random_state)
        self.cboss.fit(X, y)
        train_probs = self.cboss._get_train_probs(X)
        train_preds = self.cboss.classes_[np.argmax(train_probs, axis=1)]
        self.cboss_weight = accuracy_score(y, train_preds)**4

        return self
예제 #2
0
def test_stc_on_unit_test_data():
    """Test of ShapeletTransformClassifier on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train STC
    stc = ShapeletTransformClassifier(
        estimator=RotationForest(n_estimators=3),
        max_shapelets=20,
        n_shapelet_samples=500,
        batch_size=100,
        random_state=0,
        save_transformed_data=True,
    )
    stc.fit(X_train, y_train)

    # assert probabilities are the same
    probas = stc.predict_proba(X_test.iloc[indices])
    testing.assert_array_equal(probas, stc_unit_test_probas)

    # test train estimate
    train_probas = stc._get_train_probs(X_train, y_train)
    train_preds = stc.classes_[np.argmax(train_probas, axis=1)]
    assert accuracy_score(y_train, train_preds) >= 0.75
예제 #3
0
def test_contracted_stc_on_unit_test_data():
    """Test of contracted ShapeletTransformClassifier on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")

    # train contracted STC
    stc = ShapeletTransformClassifier(
        estimator=RotationForest(contract_max_n_estimators=3),
        max_shapelets=20,
        time_limit_in_minutes=0.25,
        contract_max_n_shapelet_samples=500,
        batch_size=100,
        random_state=0,
    )
    stc.fit(X_train, y_train)
예제 #4
0
def test_stc_on_basic_motions():
    """Test of ShapeletTransformClassifier on basic motions data."""
    # load basic motions data
    X_train, y_train = load_basic_motions(split="train", return_X_y=True)
    X_test, y_test = load_basic_motions(split="test", return_X_y=True)
    indices = np.random.RandomState(4).choice(len(y_train), 15, replace=False)

    # train STC
    stc = ShapeletTransformClassifier(
        estimator=RotationForest(n_estimators=3),
        max_shapelets=20,
        n_shapelet_samples=500,
        batch_size=100,
        random_state=0,
    )
    stc.fit(X_train.iloc[indices], y_train[indices])

    # assert probabilities are the same
    probas = stc.predict_proba(X_test.iloc[indices[:10]])
    testing.assert_array_equal(probas, stc_basic_motions_probas)
예제 #5
0
def test_stc_train_estimate():
    """Test of ShapeletTransformClassifier train estimate on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train")

    # train STC
    stc = ShapeletTransformClassifier(
        estimator=RotationForest(n_estimators=2),
        max_shapelets=3,
        n_shapelet_samples=10,
        batch_size=5,
        random_state=0,
        save_transformed_data=True,
    )
    stc.fit(X_train, y_train)

    # test train estimate
    train_probas = stc._get_train_probs(X_train, y_train)
    assert train_probas.shape == (20, 2)
    train_preds = stc.classes_[np.argmax(train_probas, axis=1)]
    assert accuracy_score(y_train, train_preds) >= 0.6
예제 #6
0
def set_classifier(cls, resampleId=None):
    """
    Basic way of creating the classifier to build using the default settings. This
    set up is to help with batch jobs for multiple problems to facilitate easy
    reproducability. You can set up bespoke classifier in many other ways.

    :param cls: String indicating which classifier you want
    :param resampleId: classifier random seed

    :return: A classifier.

    """
    name = cls.lower()
    # Distance based
    if name == "pf" or name == "proximityforest":
        return ProximityForest(random_state=resampleId)
    elif name == "pt" or name == "proximitytree":
        return ProximityTree(random_state=resampleId)
    elif name == "ps" or name == "proximityStump":
        return ProximityStump(random_state=resampleId)
    elif name == "dtwcv" or name == "kneighborstimeseriesclassifier":
        return KNeighborsTimeSeriesClassifier(distance="dtwcv")
    elif name == "dtw" or name == "1nn-dtw":
        return KNeighborsTimeSeriesClassifier(distance="dtw")
    elif name == "msm" or name == "1nn-msm":
        return KNeighborsTimeSeriesClassifier(distance="msm")
    elif name == "ee" or name == "elasticensemble":
        return ElasticEnsemble()
    elif name == "shapedtw":
        return ShapeDTW()
    # Dictionary based
    elif name == "boss" or name == "bossensemble":
        return BOSSEnsemble(random_state=resampleId)
    elif name == "cboss" or name == "contractableboss":
        return ContractableBOSS(random_state=resampleId)
    elif name == "tde" or name == "temporaldictionaryensemble":
        return TemporalDictionaryEnsemble(random_state=resampleId)
    elif name == "weasel":
        return WEASEL(random_state=resampleId)
    elif name == "muse":
        return MUSE(random_state=resampleId)
    # Interval based
    elif name == "rise" or name == "randomintervalspectralforest":
        return RandomIntervalSpectralForest(random_state=resampleId)
    elif name == "tsf" or name == "timeseriesforestclassifier":
        return TimeSeriesForestClassifier(random_state=resampleId)
    elif name == "cif" or name == "canonicalintervalforest":
        return CanonicalIntervalForest(random_state=resampleId)
    elif name == "drcif":
        return DrCIF(random_state=resampleId)
    # Shapelet based
    elif name == "stc" or name == "shapelettransformclassifier":
        return ShapeletTransformClassifier(
            random_state=resampleId, time_contract_in_mins=1
        )
    elif name == "mrseql" or name == "mrseqlclassifier":
        return MrSEQLClassifier(seql_mode="fs", symrep=["sax", "sfa"])
    elif name == "rocket":
        return ROCKETClassifier(random_state=resampleId)
    elif name == "arsenal":
        return Arsenal(random_state=resampleId)
    # Hybrid
    elif name == "catch22":
        return Catch22ForestClassifier(random_state=resampleId)
    elif name == "hivecotev1":
        return HIVECOTEV1(random_state=resampleId)
    else:
        raise Exception("UNKNOWN CLASSIFIER")
예제 #7
0
    def fit(self, X, y):
        """Fit a HIVE-COTEv1.0 classifier.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_instances, 1]
            Nested dataframe with univariate time-series in cells.
        y : array-like, shape = [n_instances] The class labels.

        Returns
        -------
        self : object
        """
        X, y = check_X_y(X, y, enforce_univariate=True)

        self.n_classes = np.unique(y).shape[0]
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]

        cv_size = 10
        _, counts = np.unique(y, return_counts=True)
        min_class = np.min(counts)
        if min_class < cv_size:
            cv_size = min_class

        self.stc = ShapeletTransformClassifier(
            **self.stc_params,
            random_state=self.random_state,
        )
        self.stc.fit(X, y)

        if self.verbose > 0:
            print("STC ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        train_preds = cross_val_predict(
            ShapeletTransformClassifier(
                **self.stc_params,
                random_state=self.random_state,
            ),
            X=X,
            y=y,
            cv=cv_size,
            n_jobs=self.n_jobs,
        )
        self.stc_weight = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "STC train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("STC weight = " + str(self.stc_weight))  # noqa

        self.tsf = TimeSeriesForestClassifier(
            **self.tsf_params,
            random_state=self.random_state,
            n_jobs=self.n_jobs,
        )
        self.tsf.fit(X, y)

        if self.verbose > 0:
            print("TSF ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        train_preds = cross_val_predict(
            TimeSeriesForestClassifier(**self.tsf_params,
                                       random_state=self.random_state),
            X=X,
            y=y,
            cv=cv_size,
            n_jobs=self.n_jobs,
        )
        self.tsf_weight = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "TSF train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("TSF weight = " + str(self.tsf_weight))  # noqa

        self.rise = RandomIntervalSpectralForest(
            **self.rise_params,
            random_state=self.random_state,
            n_jobs=self.n_jobs,
        )
        self.rise.fit(X, y)

        if self.verbose > 0:
            print("RISE ",
                  datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        train_preds = cross_val_predict(
            RandomIntervalSpectralForest(
                **self.rise_params,
                random_state=self.random_state,
            ),
            X=X,
            y=y,
            cv=cv_size,
            n_jobs=self.n_jobs,
        )
        self.rise_weight = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "RISE train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("RISE weight = " + str(self.rise_weight))  # noqa

        self.cboss = ContractableBOSS(**self.cboss_params,
                                      random_state=self.random_state,
                                      n_jobs=self.n_jobs)
        self.cboss.fit(X, y)
        train_probs = self.cboss._get_train_probs(X)
        train_preds = self.cboss.classes_[np.argmax(train_probs, axis=1)]
        self.cboss_weight = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "cBOSS (estimate included) ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("cBOSS weight = " + str(self.cboss_weight))  # noqa

        self._is_fitted = True
        return self
예제 #8
0
def set_classifier(cls, resample_id=None, train_file=False):
    """Construct a classifier.

    Basic way of creating the classifier to build using the default settings. This
    set up is to help with batch jobs for multiple problems to facilitate easy
    reproducibility for use with load_and_run_classification_experiment. You can pass a
    classifier object instead to run_classification_experiment.

    Parameters
    ----------
    cls : str
        String indicating which classifier you want.
    resample_id : int or None, default=None
        Classifier random seed.
    train_file : bool, default=False
        Whether a train file is being produced.

    Return
    ------
    classifier : A BaseClassifier.
        The classifier matching the input classifier name.
    """
    name = cls.lower()
    # Dictionary based
    if name == "boss" or name == "bossensemble":
        return BOSSEnsemble(random_state=resample_id)
    elif name == "cboss" or name == "contractableboss":
        return ContractableBOSS(random_state=resample_id)
    elif name == "tde" or name == "temporaldictionaryensemble":
        return TemporalDictionaryEnsemble(
            random_state=resample_id, save_train_predictions=train_file
        )
    elif name == "weasel":
        return WEASEL(random_state=resample_id)
    elif name == "muse":
        return MUSE(random_state=resample_id)
    # Distance based
    elif name == "pf" or name == "proximityforest":
        return ProximityForest(random_state=resample_id)
    elif name == "pt" or name == "proximitytree":
        return ProximityTree(random_state=resample_id)
    elif name == "ps" or name == "proximityStump":
        return ProximityStump(random_state=resample_id)
    elif name == "dtwcv" or name == "kneighborstimeseriesclassifier":
        return KNeighborsTimeSeriesClassifier(distance="dtwcv")
    elif name == "dtw" or name == "1nn-dtw":
        return KNeighborsTimeSeriesClassifier(distance="dtw")
    elif name == "msm" or name == "1nn-msm":
        return KNeighborsTimeSeriesClassifier(distance="msm")
    elif name == "ee" or name == "elasticensemble":
        return ElasticEnsemble(random_state=resample_id)
    elif name == "shapedtw":
        return ShapeDTW()
    # Feature based
    elif name == "catch22":
        return Catch22Classifier(
            random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500)
        )
    elif name == "matrixprofile":
        return MatrixProfileClassifier(random_state=resample_id)
    elif name == "signature":
        return SignatureClassifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500),
        )
    elif name == "tsfresh":
        return TSFreshClassifier(
            random_state=resample_id, estimator=RandomForestClassifier(n_estimators=500)
        )
    elif name == "tsfresh-r":
        return TSFreshClassifier(
            random_state=resample_id,
            estimator=RandomForestClassifier(n_estimators=500),
            relevant_feature_extractor=True,
        )
    # Hybrid
    elif name == "hc1" or name == "hivecotev1":
        return HIVECOTEV1(random_state=resample_id)
    elif name == "hc2" or name == "hivecotev2":
        return HIVECOTEV2(random_state=resample_id)
    # Interval based
    elif name == "rise" or name == "randomintervalspectralforest":
        return RandomIntervalSpectralEnsemble(
            random_state=resample_id, n_estimators=500
        )
    elif name == "tsf" or name == "timeseriesforestclassifier":
        return TimeSeriesForestClassifier(random_state=resample_id, n_estimators=500)
    elif name == "cif" or name == "canonicalintervalforest":
        return CanonicalIntervalForest(random_state=resample_id, n_estimators=500)
    elif name == "stsf" or name == "supervisedtimeseriesforest":
        return SupervisedTimeSeriesForest(random_state=resample_id, n_estimators=500)
    elif name == "drcif":
        return DrCIF(
            random_state=resample_id, n_estimators=500, save_transformed_data=train_file
        )
    # Kernel based
    elif name == "rocket":
        return ROCKETClassifier(random_state=resample_id)
    elif name == "arsenal":
        return Arsenal(random_state=resample_id, save_transformed_data=train_file)
    # Shapelet based
    elif name == "stc" or name == "shapelettransformclassifier":
        return ShapeletTransformClassifier(
            random_state=resample_id, save_transformed_data=train_file
        )
    elif name == "mrseql" or name == "mrseqlclassifier":
        return MrSEQLClassifier(seql_mode="fs", symrep=["sax", "sfa"])
    else:
        raise Exception("UNKNOWN CLASSIFIER")
예제 #9
0
    def _fit(self, X, y):
        """Fit HIVE-COTE 2.0 to training data.

        Parameters
        ----------
        X : 3D np.array of shape = [n_instances, n_dimensions, series_length]
            The training data.
        y : array-like, shape = [n_instances]
            The class labels.

        Returns
        -------
        self :
            Reference to self.

        Notes
        -----
        Changes state by creating a fitted model that updates attributes
        ending in "_" and sets is_fitted flag to True.
        """
        # Default values from HC2 paper
        if self.stc_params is None:
            self._stc_params = {"transform_limit_in_minutes": 120}
        if self.drcif_params is None:
            self._drcif_params = {"n_estimators": 500}
        if self.arsenal_params is None:
            self._arsenal_params = {}
        if self.tde_params is None:
            self._tde_params = {}

        # If we are contracting split the contract time between each algorithm
        if self.time_limit_in_minutes > 0:
            # Leave 1/3 for train estimates
            ct = self.time_limit_in_minutes / 6
            self._stc_params["time_limit_in_minutes"] = ct
            self._drcif_params["time_limit_in_minutes"] = ct
            self._arsenal_params["time_limit_in_minutes"] = ct
            self._tde_params["time_limit_in_minutes"] = ct

        # Build STC
        self._stc = ShapeletTransformClassifier(
            **self._stc_params,
            save_transformed_data=True,
            random_state=self.random_state,
            n_jobs=self._threads_to_use,
        )
        self._stc.fit(X, y)

        if self.verbose > 0:
            print("STC ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        # Find STC weight using train set estimate
        train_probs = self._stc._get_train_probs(X, y)
        train_preds = self._stc.classes_[np.argmax(train_probs, axis=1)]
        self.stc_weight_ = accuracy_score(y, train_preds) ** 4

        if self.verbose > 0:
            print(  # noqa
                "STC train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("STC weight = " + str(self.stc_weight_))  # noqa

        # Build DrCIF
        self._drcif = DrCIF(
            **self._drcif_params,
            save_transformed_data=True,
            random_state=self.random_state,
            n_jobs=self._threads_to_use,
        )
        self._drcif.fit(X, y)

        if self.verbose > 0:
            print("DrCIF ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        # Find DrCIF weight using train set estimate
        train_probs = self._drcif._get_train_probs(X, y)
        train_preds = self._drcif.classes_[np.argmax(train_probs, axis=1)]
        self.drcif_weight_ = accuracy_score(y, train_preds) ** 4

        if self.verbose > 0:
            print(  # noqa
                "DrCIF train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("DrCIF weight = " + str(self.drcif_weight_))  # noqa

        # Build Arsenal
        self._arsenal = Arsenal(
            **self._arsenal_params,
            save_transformed_data=True,
            random_state=self.random_state,
            n_jobs=self._threads_to_use,
        )
        self._arsenal.fit(X, y)

        if self.verbose > 0:
            print("Arsenal ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        # Find Arsenal weight using train set estimate
        train_probs = self._arsenal._get_train_probs(X, y)
        train_preds = self._arsenal.classes_[np.argmax(train_probs, axis=1)]
        self.arsenal_weight_ = accuracy_score(y, train_preds) ** 4

        if self.verbose > 0:
            print(  # noqa
                "Arsenal train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("Arsenal weight = " + str(self.arsenal_weight_))  # noqa

        # Build TDE
        self._tde = TemporalDictionaryEnsemble(
            **self._tde_params,
            save_train_predictions=True,
            random_state=self.random_state,
            n_jobs=self._threads_to_use,
        )
        self._tde.fit(X, y)

        if self.verbose > 0:
            print("TDE ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        # Find TDE weight using train set estimate
        train_probs = self._tde._get_train_probs(X, y, train_estimate_method="loocv")
        train_preds = self._tde.classes_[np.argmax(train_probs, axis=1)]
        self.tde_weight_ = accuracy_score(y, train_preds) ** 4

        if self.verbose > 0:
            print(  # noqa
                "TDE train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("TDE weight = " + str(self.tde_weight_))  # noqa

        return self
예제 #10
0
         RocketClassifier(num_kernels=500, random_state=0)
     ),
 )
 _print_array(
     "RocketClassifier - BasicMotions",
     _reproduce_classification_basic_motions(
         RocketClassifier(num_kernels=500, random_state=0)
     ),
 )
 _print_array(
     "ShapeletTransformClassifier - UnitTest",
     _reproduce_classification_unit_test(
         ShapeletTransformClassifier(
             estimator=RotationForest(n_estimators=3),
             max_shapelets=20,
             n_shapelet_samples=500,
             batch_size=100,
             random_state=0,
         )
     ),
 )
 _print_array(
     "ShapeletTransformClassifier - BasicMotions",
     _reproduce_classification_basic_motions(
         ShapeletTransformClassifier(
             estimator=RotationForest(n_estimators=3),
             max_shapelets=20,
             n_shapelet_samples=500,
             batch_size=100,
             random_state=0,
         )
예제 #11
0
    def _fit(self, X, y):
        """Fit HIVE-COTE 1.0 to training data.

        Parameters
        ----------
        X : 3D np.array of shape = [n_instances, n_dimensions, series_length]
            The training data.
        y : array-like, shape = [n_instances]
            The class labels.

        Returns
        -------
        self :
            Reference to self.

        Notes
        -----
        Changes state by creating a fitted model that updates attributes
        ending in "_" and sets is_fitted flag to True.
        """
        # Default values from HC1 paper
        if self.stc_params is None:
            self._stc_params = {"transform_limit_in_minutes": 120}
        if self.tsf_params is None:
            self._tsf_params = {"n_estimators": 500}
        if self.rise_params is None:
            self._rise_params = {"n_estimators": 500}
        if self.cboss_params is None:
            self._cboss_params = {}

        # Cross-validation size for TSF and RISE
        cv_size = 10
        _, counts = np.unique(y, return_counts=True)
        min_class = np.min(counts)
        if min_class < cv_size:
            cv_size = min_class

        # Build STC
        self._stc = ShapeletTransformClassifier(
            **self._stc_params,
            save_transformed_data=True,
            random_state=self.random_state,
            n_jobs=self._threads_to_use,
        )
        self._stc.fit(X, y)

        if self.verbose > 0:
            print("STC ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        # Find STC weight using train set estimate
        train_probs = self._stc._get_train_probs(X, y)
        train_preds = self._stc.classes_[np.argmax(train_probs, axis=1)]
        self.stc_weight_ = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "STC train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("STC weight = " + str(self.stc_weight_))  # noqa

        # Build TSF
        self._tsf = TimeSeriesForestClassifier(
            **self._tsf_params,
            random_state=self.random_state,
            n_jobs=self._threads_to_use,
        )
        self._tsf.fit(X, y)

        if self.verbose > 0:
            print("TSF ", datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        # Find TSF weight using train set estimate found through CV
        train_preds = cross_val_predict(
            TimeSeriesForestClassifier(**self._tsf_params,
                                       random_state=self.random_state),
            X=X,
            y=y,
            cv=cv_size,
            n_jobs=self._threads_to_use,
        )
        self.tsf_weight_ = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "TSF train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("TSF weight = " + str(self.tsf_weight_))  # noqa

        # Build RISE
        self._rise = RandomIntervalSpectralEnsemble(
            **self._rise_params,
            random_state=self.random_state,
            n_jobs=self._threads_to_use,
        )
        self._rise.fit(X, y)

        if self.verbose > 0:
            print("RISE ",
                  datetime.now().strftime("%H:%M:%S %d/%m/%Y"))  # noqa

        # Find RISE weight using train set estimate found through CV
        train_preds = cross_val_predict(
            RandomIntervalSpectralEnsemble(
                **self._rise_params,
                random_state=self.random_state,
            ),
            X=X,
            y=y,
            cv=cv_size,
            n_jobs=self._threads_to_use,
        )
        self.rise_weight_ = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "RISE train estimate ",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("RISE weight = " + str(self.rise_weight_))  # noqa

        # Build cBOSS
        self._cboss = ContractableBOSS(
            **self._cboss_params,
            random_state=self.random_state,
            n_jobs=self._threads_to_use,
        )
        self._cboss.fit(X, y)

        # Find cBOSS weight using train set estimate
        train_probs = self._cboss._get_train_probs(X, y)
        train_preds = self._cboss.classes_[np.argmax(train_probs, axis=1)]
        self.cboss_weight_ = accuracy_score(y, train_preds)**4

        if self.verbose > 0:
            print(  # noqa
                "cBOSS (estimate included)",
                datetime.now().strftime("%H:%M:%S %d/%m/%Y"),
            )
            print("cBOSS weight = " + str(self.cboss_weight_))  # noqa

        return self
예제 #12
0
     "RocketClassifier - UnitTest",
     _reproduce_classification_unit_test(
         RocketClassifier(num_kernels=100, random_state=0)),
 )
 _print_array(
     "RocketClassifier - BasicMotions",
     _reproduce_classification_basic_motions(
         RocketClassifier(num_kernels=100, random_state=0)),
 )
 _print_array(
     "ShapeletTransformClassifier - UnitTest",
     _reproduce_classification_unit_test(
         ShapeletTransformClassifier(
             estimator=RandomForestClassifier(n_estimators=5),
             n_shapelet_samples=50,
             max_shapelets=10,
             batch_size=10,
             random_state=0,
         )),
 )
 _print_array(
     "ShapeletTransformClassifier - BasicMotions",
     _reproduce_classification_basic_motions(
         ShapeletTransformClassifier(
             estimator=RandomForestClassifier(n_estimators=5),
             n_shapelet_samples=50,
             max_shapelets=10,
             batch_size=10,
             random_state=0,
         )),
 )
예제 #13
0
def build_model(dataset,
                pipeline,
                experiment,
                current_target='class',
                test_size=0.3):
    models_dir = './results/{}_{}_{}/models/'.format(dataset, pipeline,
                                                     experiment)
    reports_dir = './results/{}_{}_{}/reports/'.format(dataset, pipeline,
                                                       experiment)
    experiment_index_file = './results/{}_{}_{}/index.json'.format(
        dataset, pipeline, experiment)
    log_file = './results/{}_{}_{}/model_build.log'.format(
        dataset, pipeline, experiment)

    scoring = make_scorer(precision_score, zero_division=1, average='micro')
    os.makedirs(models_dir, exist_ok=True)
    os.makedirs(reports_dir, exist_ok=True)
    # Setup logging
    logger.setup(filename=log_file,
                 filemode='w',
                 root_level=logging.DEBUG,
                 log_level=logging.DEBUG,
                 logger='build_model')
    index_name = 'index'
    if '.' in dataset:
        splits = dataset.split(".")
        dataset = splits[0]
        index_name = splits[1]
    # Load the dataset index
    dataset_index = load_dataset(dataset,
                                 return_index=True,
                                 index_name=index_name)
    # Dynamically import the pipeline we want to use for building the model
    logger.info('Start experiment: {} using {} on {} with target {}'.format(
        experiment, pipeline, dataset, current_target))
    reports = ReportCollection(dataset, pipeline, experiment)
    for _sym, data in {'BTC': dataset_index['BTC']}.items():
        try:
            logger.info('Start processing: {}'.format(_sym))
            features = pd.read_csv(data['csv'],
                                   sep=',',
                                   encoding='utf-8',
                                   index_col='Date',
                                   parse_dates=True)
            targets = pd.read_csv(data['target_csv'],
                                  sep=',',
                                  encoding='utf-8',
                                  index_col='Date',
                                  parse_dates=True)

            # Drop columns whose values are all NaN, as well as rows with ANY nan value, then
            # replace infinity values with nan so that they can later be imputed to a finite value
            features = features.dropna(
                axis='columns', how='all').dropna().replace([np.inf, -np.inf],
                                                            np.nan)
            target = targets.loc[features.index][current_target]

            #X_train, X_test, y_train, y_test = train_test_split(features, target, shuffle=False, test_size=test_size)

            all_size = features.shape[0]
            train_size = int(all_size * (1 - test_size))
            features = detabularise(
                features[[c for c in features.columns if 'close' in c]])
            X_train = features.iloc[0:train_size]
            y_train = target.iloc[0:train_size]
            X_test = features.iloc[train_size:all_size]
            y_test = target.iloc[train_size:all_size]
            # Summarize distribution
            logger.info("Start Grid search")
            clf = ShapeletTransformClassifier(time_contract_in_mins=5)
            clf.fit(X_train, y_train)
            print('{} Score: {}'.format(_sym, clf.score(X_test, y_test)))
            pred = clf.predict(X_test)
            print(classification_report(y_test, pred))
            logger.info("End Grid search")

            logger.info("--- {} end ---".format(_sym))
        except Exception as e:
            logger.error(
                "Exception while building model pipeline: {} dataset: {} symbol: {}\nException:\n{}"
                .format(pipeline, dataset, _sym, e))
            traceback.print_exc()
    return reports