def _fit(self, X, y): """Fit a cBOSS ensemble on cases (X,y), where y is the target variable. Build an ensemble of BOSS classifiers from the training set (X, y), through randomising over the para space to make a fixed size ensemble of the best. Parameters ---------- X : 3D np.array of shape = [n_instances, n_dimensions, series_length] The training data. y : array-like, shape = [n_instances] The class labels. Returns ------- self : Reference to self. Notes ----- Changes state by creating a fitted model that updates attributes ending in "_" and sets is_fitted flag to True. """ time_limit = self.time_limit_in_minutes * 60 self.n_instances_, _, self.series_length_ = X.shape self.estimators_ = [] self.weights_ = [] # Window length parameter space dependent on series length max_window_searches = self.series_length_ / 4 max_window = int(self.series_length_ * self.max_win_len_prop) win_inc = int((max_window - self.min_window) / max_window_searches) if win_inc < 1: win_inc = 1 if self.min_window > max_window + 1: raise ValueError( f"Error in ContractableBOSS, min_window =" f"{self.min_window} is bigger" f" than max_window ={max_window}." f" Try set min_window to be smaller than series length in " f"the constructor, but the classifier may not work at " f"all with very short series") possible_parameters = self._unique_parameters(max_window, win_inc) num_classifiers = 0 start_time = time.time() train_time = 0 subsample_size = int(self.n_instances_ * 0.7) lowest_acc = 1 lowest_acc_idx = 0 rng = check_random_state(self.random_state) if time_limit > 0: n_parameter_samples = 0 contract_max_n_parameter_samples = self.contract_max_n_parameter_samples else: n_parameter_samples = self.n_parameter_samples contract_max_n_parameter_samples = np.inf while ((train_time < time_limit and num_classifiers < contract_max_n_parameter_samples) or num_classifiers < n_parameter_samples ) and len(possible_parameters) > 0: parameters = possible_parameters.pop( rng.randint(0, len(possible_parameters))) subsample = rng.choice(self.n_instances_, size=subsample_size, replace=False) X_subsample = X[subsample] y_subsample = y[subsample] boss = IndividualBOSS( *parameters, alphabet_size=self._alphabet_size, save_words=False, typed_dict=self.typed_dict, n_jobs=self._threads_to_use, random_state=self.random_state, ) boss.fit(X_subsample, y_subsample) boss._clean() boss._subsample = subsample boss._accuracy = self._individual_train_acc( boss, y_subsample, subsample_size, 0 if num_classifiers < self.max_ensemble_size else lowest_acc, ) if boss._accuracy > 0: weight = math.pow(boss._accuracy, 4) else: weight = 0.000000001 if num_classifiers < self.max_ensemble_size: if boss._accuracy < lowest_acc: lowest_acc = boss._accuracy lowest_acc_idx = num_classifiers self.weights_.append(weight) self.estimators_.append(boss) elif boss._accuracy > lowest_acc: self.weights_[lowest_acc_idx] = weight self.estimators_[lowest_acc_idx] = boss lowest_acc, lowest_acc_idx = self._worst_ensemble_acc() num_classifiers += 1 train_time = time.time() - start_time self.n_estimators_ = len(self.estimators_) self._weight_sum = np.sum(self.weights_) return self
def fit(self, X, y): """Build an ensemble of BOSS classifiers from the training set (X, y), through randomising over the para space to make a fixed size ensemble of the best. Parameters ---------- X : nested pandas DataFrame of shape [n_instances, 1] Nested dataframe with univariate time-series in cells. y : array-like, shape = [n_instances] The class labels. Returns ------- self : object """ X, y = check_X_y(X, y, enforce_univariate=True, coerce_to_numpy=True) start_time = time.time() self.time_limit = self.time_limit * 60 self.n_instances, _, self.series_length = X.shape self.n_classes = np.unique(y).shape[0] self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] for index, classVal in enumerate(self.classes_): self.class_dictionary[classVal] = index self.classifiers = [] self.weights = [] # Window length parameter space dependent on series length max_window_searches = self.series_length / 4 max_window = int(self.series_length * self.max_win_len_prop) win_inc = int((max_window - self.min_window) / max_window_searches) if win_inc < 1: win_inc = 1 possible_parameters = self._unique_parameters(max_window, win_inc) num_classifiers = 0 train_time = 0 subsample_size = int(self.n_instances * 0.7) lowest_acc = 1 lowest_acc_idx = 0 rng = check_random_state(self.random_state) if self.time_limit > 0: self.n_parameter_samples = 0 while (train_time < self.time_limit or num_classifiers < self.n_parameter_samples) and len(possible_parameters) > 0: parameters = possible_parameters.pop( rng.randint(0, len(possible_parameters))) subsample = rng.choice(self.n_instances, size=subsample_size, replace=False) X_subsample = X[subsample] # .iloc[subsample, :] y_subsample = y[subsample] boss = IndividualBOSS(*parameters, alphabet_size=self.alphabet_size, save_words=False, random_state=self.random_state) boss.fit(X_subsample, y_subsample) boss._clean() boss.accuracy = self._individual_train_acc(boss, y_subsample, subsample_size, lowest_acc) weight = math.pow(boss.accuracy, 4) if num_classifiers < self.max_ensemble_size: if boss.accuracy < lowest_acc: lowest_acc = boss.accuracy lowest_acc_idx = num_classifiers self.weights.append(weight) self.classifiers.append(boss) elif boss.accuracy > lowest_acc: self.weights[lowest_acc_idx] = weight self.classifiers[lowest_acc_idx] = boss lowest_acc, lowest_acc_idx = self._worst_ensemble_acc() num_classifiers += 1 train_time = time.time() - start_time self.n_estimators = len(self.classifiers) self.weight_sum = np.sum(self.weights) self._is_fitted = True return self
def _fit(self, X, y): """Fit a c-boss ensemble on cases (X,y), where y is the target variable. Build an ensemble of BOSS classifiers from the training set (X, y), through randomising over the para space to make a fixed size ensemble of the best. Parameters ---------- X : nested pandas DataFrame of shape (n_instances, 1) Nested dataframe with univariate time-series in cells. y : array-like of shape (n_instances,) The class labels. Returns ------- self : object """ time_limit = self.time_limit_in_minutes * 60 self.n_instances, _, self.series_length = X.shape self.n_classes = np.unique(y).shape[0] self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] for index, classVal in enumerate(self.classes_): self.class_dictionary[classVal] = index self.classifiers = [] self.weights = [] # Window length parameter space dependent on series length max_window_searches = self.series_length / 4 max_window = int(self.series_length * self.max_win_len_prop) win_inc = int((max_window - self.min_window) / max_window_searches) if win_inc < 1: win_inc = 1 if self.min_window > max_window + 1: raise ValueError( f"Error in ContractableBOSS, min_window =" f"{self.min_window} is bigger" f" than max_window ={max_window}," f" series length is {self.series_length}" f" try set min_window to be smaller than series length in " f"the constructor, but the classifier may not work at " f"all with very short series") possible_parameters = self._unique_parameters(max_window, win_inc) num_classifiers = 0 start_time = time.time() train_time = 0 subsample_size = int(self.n_instances * 0.7) lowest_acc = 1 lowest_acc_idx = 0 rng = check_random_state(self.random_state) if time_limit > 0: self.n_parameter_samples = 0 while (train_time < time_limit or num_classifiers < self.n_parameter_samples) and len(possible_parameters) > 0: parameters = possible_parameters.pop( rng.randint(0, len(possible_parameters))) subsample = rng.choice(self.n_instances, size=subsample_size, replace=False) X_subsample = X[subsample] y_subsample = y[subsample] boss = IndividualBOSS( *parameters, alphabet_size=self.alphabet_size, save_words=False, random_state=self.random_state, ) boss.fit(X_subsample, y_subsample) boss._clean() boss.subsample = subsample boss.accuracy = self._individual_train_acc( boss, y_subsample, subsample_size, 0 if num_classifiers < self.max_ensemble_size else lowest_acc, ) if boss.accuracy > 0: weight = math.pow(boss.accuracy, 4) else: weight = 0.000000001 if num_classifiers < self.max_ensemble_size: if boss.accuracy < lowest_acc: lowest_acc = boss.accuracy lowest_acc_idx = num_classifiers self.weights.append(weight) self.classifiers.append(boss) elif boss.accuracy > lowest_acc: self.weights[lowest_acc_idx] = weight self.classifiers[lowest_acc_idx] = boss lowest_acc, lowest_acc_idx = self._worst_ensemble_acc() num_classifiers += 1 train_time = time.time() - start_time self.n_estimators = len(self.classifiers) self.weight_sum = np.sum(self.weights) return self