class IndividualTDE(BaseClassifier): """ Single TDE classifier, based off the Bag of SFA Symbols (BOSS) model """ def __init__(self, window_size=10, word_length=8, norm=False, levels=1, igb=False, alphabet_size=4, random_state=None): self.window_size = window_size self.word_length = word_length self.norm = norm self.levels = levels self.igb = igb self.alphabet_size = alphabet_size self.random_state = random_state binning_method = "information-gain" if igb else "equi-depth" self.transformer = SFA(word_length=word_length, alphabet_size=alphabet_size, window_size=window_size, norm=norm, levels=levels, binning_method=binning_method, bigrams=True, remove_repeat_words=True, save_words=False) self.transformed_data = [] self.accuracy = 0 self.class_vals = [] self.num_classes = 0 self.classes_ = [] self.class_dictionary = {} super(IndividualTDE, self).__init__() def fit(self, X, y): X, y = check_X_y(X, y, enforce_univariate=True) sfa = self.transformer.fit_transform(X, y) self.transformed_data = [series.to_dict() for series in sfa.iloc[:, 0]] self.class_vals = y self.num_classes = np.unique(y).shape[0] self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] for index, classVal in enumerate(self.classes_): self.class_dictionary[classVal] = index self._is_fitted = True return self def predict(self, X): self.check_is_fitted() X = check_X(X, enforce_univariate=True) rng = check_random_state(self.random_state) classes = [] test_bags = self.transformer.transform(X) test_bags = [series.to_dict() for series in test_bags.iloc[:, 0]] for i, test_bag in enumerate(test_bags): best_sim = -1 nn = None for n, bag in enumerate(self.transformed_data): sim = histogram_intersection(test_bag, bag) if sim > best_sim or (sim == best_sim and rng.random() < 0.5): best_sim = sim nn = self.class_vals[n] classes.append(nn) return np.array(classes) def predict_proba(self, X): preds = self.predict(X) dists = np.zeros((X.shape[0], self.num_classes)) for i in range(0, X.shape[0]): dists[i, self.class_dictionary.get(preds[i])] += 1 return dists def _train_predict(self, train_num): test_bag = self.transformed_data[train_num] best_sim = -1 nn = None for n, bag in enumerate(self.transformed_data): if n == train_num: continue sim = histogram_intersection(test_bag, bag) if sim > best_sim: best_sim = sim nn = self.class_vals[n] return nn
class BOSSIndividual(BaseClassifier): """ Single Bag of SFA Symbols (BOSS) classifier Bag of SFA Symbols Ensemble: implementation of BOSS from Schaffer : @article """ def __init__(self, window_size=10, word_length=8, norm=False, alphabet_size=4, save_words=True, random_state=None ): self.window_size = window_size self.word_length = word_length self.norm = norm self.alphabet_size = alphabet_size self.save_words = save_words self.random_state = random_state self.transformer = SFA(word_length=word_length, alphabet_size=alphabet_size, window_size=window_size, norm=norm, remove_repeat_words=True, bigrams=False, save_words=save_words) self.transformed_data = [] self.accuracy = 0 self.class_vals = [] self.num_classes = 0 self.classes_ = [] self.class_dictionary = {} super(BOSSIndividual, self).__init__() def fit(self, X, y): X, y = check_X_y(X, y, enforce_univariate=True) sfa = self.transformer.fit_transform(X) self.transformed_data = sfa.iloc[:, 0] self.class_vals = y self.num_classes = np.unique(y).shape[0] self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0] for index, classVal in enumerate(self.classes_): self.class_dictionary[classVal] = index self._is_fitted = True return self def predict(self, X): self.check_is_fitted() X = check_X(X, enforce_univariate=True) rng = check_random_state(self.random_state) classes = [] test_bags = self.transformer.transform(X) test_bags = test_bags.iloc[:, 0] for i, test_bag in enumerate(test_bags): best_dist = sys.float_info.max nn = None for n, bag in enumerate(self.transformed_data): dist = boss_distance(test_bag, bag, best_dist) if dist < best_dist or (dist == best_dist and rng.random() < 0.5): best_dist = dist nn = self.class_vals[n] classes.append(nn) return np.array(classes) def predict_proba(self, X): preds = self.predict(X) dists = np.zeros((X.shape[0], self.num_classes)) for i in range(0, X.shape[0]): dists[i, self.class_dictionary.get(preds[i])] += 1 return dists def _train_predict(self, train_num): test_bag = self.transformed_data[train_num] best_dist = sys.float_info.max nn = None for n, bag in enumerate(self.transformed_data): if n == train_num: continue dist = boss_distance(test_bag, bag, best_dist) if dist < best_dist: best_dist = dist nn = self.class_vals[n] return nn def _shorten_bags(self, word_len): new_boss = BOSSIndividual(self.window_size, word_len, self.norm, self.alphabet_size, save_words=self.save_words, random_state=self.random_state) new_boss.transformer = self.transformer sfa = self.transformer._shorten_bags(word_len) new_boss.transformed_data = sfa.iloc[:, 0] new_boss.class_vals = self.class_vals new_boss.num_classes = self.num_classes new_boss.classes_ = self.classes_ new_boss.class_dictionary = self.class_dictionary new_boss._is_fitted = True return new_boss def _clean(self): self.transformer.words = None self.transformer.save_words = False def _set_word_len(self, word_len): self.word_length = word_len self.transformer.word_length = word_len