Ejemplo n.º 1
0
    def fit(self, X, y):
        """Build an ensemble of BOSS classifiers from the training set (X,
        y), through randomising over the para space to make a fixed size
        ensemble of the best.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_instances, 1]
            Nested dataframe with univariate time-series in cells.
        y : array-like, shape = [n_instances] The class labels.

        Returns
        -------
        self : object
        """
        X, y = check_X_y(X, y, enforce_univariate=True, coerce_to_numpy=True)

        start_time = time.time()
        self.time_limit = self.time_limit * 60
        self.n_instances, _, self.series_length = X.shape
        self.n_classes = np.unique(y).shape[0]
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]
        for index, classVal in enumerate(self.classes_):
            self.class_dictionary[classVal] = index

        self.classifiers = []
        self.weights = []

        # Window length parameter space dependent on series length
        max_window_searches = self.series_length / 4
        max_window = int(self.series_length * self.max_win_len_prop)
        win_inc = int((max_window - self.min_window) / max_window_searches)
        if win_inc < 1:
            win_inc = 1

        possible_parameters = self._unique_parameters(max_window, win_inc)
        num_classifiers = 0
        train_time = 0
        subsample_size = int(self.n_instances * 0.7)
        lowest_acc = 1
        lowest_acc_idx = 0

        rng = check_random_state(self.random_state)

        if self.time_limit > 0:
            self.n_parameter_samples = 0

        while (train_time < self.time_limit or num_classifiers <
               self.n_parameter_samples) and len(possible_parameters) > 0:
            parameters = possible_parameters.pop(
                rng.randint(0, len(possible_parameters)))

            subsample = rng.choice(self.n_instances,
                                   size=subsample_size,
                                   replace=False)
            X_subsample = X[subsample]  # .iloc[subsample, :]
            y_subsample = y[subsample]

            boss = IndividualBOSS(*parameters,
                                  alphabet_size=self.alphabet_size,
                                  save_words=False,
                                  random_state=self.random_state)
            boss.fit(X_subsample, y_subsample)
            boss._clean()

            boss.accuracy = self._individual_train_acc(boss, y_subsample,
                                                       subsample_size,
                                                       lowest_acc)
            weight = math.pow(boss.accuracy, 4)

            if num_classifiers < self.max_ensemble_size:
                if boss.accuracy < lowest_acc:
                    lowest_acc = boss.accuracy
                    lowest_acc_idx = num_classifiers
                self.weights.append(weight)
                self.classifiers.append(boss)

            elif boss.accuracy > lowest_acc:
                self.weights[lowest_acc_idx] = weight
                self.classifiers[lowest_acc_idx] = boss
                lowest_acc, lowest_acc_idx = self._worst_ensemble_acc()

            num_classifiers += 1
            train_time = time.time() - start_time

        self.n_estimators = len(self.classifiers)
        self.weight_sum = np.sum(self.weights)

        self._is_fitted = True
        return self
Ejemplo n.º 2
0
    def _fit(self, X, y):
        """Fit a c-boss ensemble on cases (X,y), where y is the target variable.

        Build an ensemble of BOSS classifiers from the training set (X,
        y), through randomising over the para space to make a fixed size
        ensemble of the best.

        Parameters
        ----------
        X : nested pandas DataFrame of shape (n_instances, 1)
            Nested dataframe with univariate time-series in cells.
        y : array-like of shape (n_instances,)
            The class labels.

        Returns
        -------
        self : object
        """
        time_limit = self.time_limit_in_minutes * 60
        self.n_instances, _, self.series_length = X.shape
        self.n_classes = np.unique(y).shape[0]
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]
        for index, classVal in enumerate(self.classes_):
            self.class_dictionary[classVal] = index

        self.classifiers = []
        self.weights = []

        # Window length parameter space dependent on series length
        max_window_searches = self.series_length / 4
        max_window = int(self.series_length * self.max_win_len_prop)
        win_inc = int((max_window - self.min_window) / max_window_searches)
        if win_inc < 1:
            win_inc = 1
        if self.min_window > max_window + 1:
            raise ValueError(
                f"Error in ContractableBOSS, min_window ="
                f"{self.min_window} is bigger"
                f" than max_window ={max_window},"
                f" series length is {self.series_length}"
                f" try set min_window to be smaller than series length in "
                f"the constructor, but the classifier may not work at "
                f"all with very short series")
        possible_parameters = self._unique_parameters(max_window, win_inc)
        num_classifiers = 0
        start_time = time.time()
        train_time = 0
        subsample_size = int(self.n_instances * 0.7)
        lowest_acc = 1
        lowest_acc_idx = 0

        rng = check_random_state(self.random_state)

        if time_limit > 0:
            self.n_parameter_samples = 0

        while (train_time < time_limit or num_classifiers <
               self.n_parameter_samples) and len(possible_parameters) > 0:
            parameters = possible_parameters.pop(
                rng.randint(0, len(possible_parameters)))

            subsample = rng.choice(self.n_instances,
                                   size=subsample_size,
                                   replace=False)
            X_subsample = X[subsample]
            y_subsample = y[subsample]

            boss = IndividualBOSS(
                *parameters,
                alphabet_size=self.alphabet_size,
                save_words=False,
                random_state=self.random_state,
            )
            boss.fit(X_subsample, y_subsample)
            boss._clean()
            boss.subsample = subsample

            boss.accuracy = self._individual_train_acc(
                boss,
                y_subsample,
                subsample_size,
                0 if num_classifiers < self.max_ensemble_size else lowest_acc,
            )
            if boss.accuracy > 0:
                weight = math.pow(boss.accuracy, 4)
            else:
                weight = 0.000000001

            if num_classifiers < self.max_ensemble_size:
                if boss.accuracy < lowest_acc:
                    lowest_acc = boss.accuracy
                    lowest_acc_idx = num_classifiers
                self.weights.append(weight)
                self.classifiers.append(boss)
            elif boss.accuracy > lowest_acc:
                self.weights[lowest_acc_idx] = weight
                self.classifiers[lowest_acc_idx] = boss
                lowest_acc, lowest_acc_idx = self._worst_ensemble_acc()

            num_classifiers += 1
            train_time = time.time() - start_time

        self.n_estimators = len(self.classifiers)
        self.weight_sum = np.sum(self.weights)
        return self