def __init__(self, whether_continuous): NaiveBayes.__init__(self) self._multinomial, self._gaussian = MultinomialNB(), GaussianNB() if whether_continuous is None: self._whether_discrete = self._whether_continuous = None else: self._whether_continuous = np.array(whether_continuous) self._whether_discrete = ~self._whether_continuous
def __init__(self, **kwargs): super(MergedNB, self).__init__(**kwargs) self._multinomial, self._gaussian = MultinomialNB(), GaussianNB() wc = kwargs.get("whether_continuous") if wc is None: self._whether_discrete = self._whether_continuous = None else: self._whether_continuous = np.asarray(wc) self._whether_discrete = ~self._whether_continuous
class MergedNB(NaiveBayes): def __init__(self, whether_discrete): NaiveBayes.__init__(self) self._whether_discrete = np.array(whether_discrete) self._whether_continuous = ~self._whether_discrete self._multinomial, self._gaussian = MultinomialNB(), GaussianNB() def feed_data(self, x, y, sample_weights=None): x = np.array(x) self._multinomial.feed_data(x[:, self._whether_discrete], y, sample_weights) y = self._multinomial["y"] self.label_dic = self._multinomial.label_dic self._cat_counter = self._multinomial["cat_counter"] self._gaussian.feed_data(x[:, self._whether_continuous], y, sample_weights) self._gaussian.label_dic = self._multinomial.label_dic def feed_sample_weights(self, sample_weights=None): self._multinomial.feed_sample_weights(sample_weights) self._gaussian.feed_sample_weights(sample_weights) def _fit(self, lb): self._multinomial.fit() self._gaussian.fit() discrete_func, continuous_func = self._multinomial[ "func"], self._gaussian["func"] def func(input_x, tar_category): input_x = np.array(input_x) return discrete_func( input_x[self._whether_discrete].astype(np.int), tar_category) * continuous_func( input_x[self._whether_continuous], tar_category) return func def _transfer_x(self, x): _feat_dics = self._multinomial["feat_dics"] idx = 0 for d, discrete in enumerate(self._whether_discrete): if not discrete: x[d] = float(x[d]) else: x[d] = _feat_dics[idx][x[d]] if discrete: idx += 1 return x
plt.title(title) for c in range(len(self.label_dic)): plt.plot(tmp_x, [self._data[j][c](xx) for xx in tmp_x], c=colors[self.label_dic[c]], label="class: {}".format(self.label_dic[c])) plt.xlim(x_min-0.2*gap, x_max+0.2*gap) plt.legend() if not save: plt.show() else: plt.savefig("d{}".format(j + 1)) if __name__ == '__main__': import time xs, ys = DataUtil.get_dataset("mushroom", "../../_Data/mushroom.txt", tar_idx=0) nb = MultinomialNB() nb.feed_data(xs, ys) xs, ys = nb["x"].tolist(), nb["y"].tolist() train_num = 6000 x_train, x_test = xs[:train_num], xs[train_num:] y_train, y_test = ys[:train_num], ys[train_num:] learning_time = time.time() nb = GaussianNB() nb.fit(x_train, y_train) learning_time = time.time() - learning_time estimation_time = time.time() nb.evaluate(x_train, y_train) nb.evaluate(x_test, y_test)
class MergedNB(NaiveBayes): MergedNBTiming = Timing() def __init__(self, **kwargs): super(MergedNB, self).__init__(**kwargs) self._multinomial, self._gaussian = MultinomialNB(), GaussianNB() wc = kwargs.get("whether_continuous") if wc is None: self._whether_discrete = self._whether_continuous = None else: self._whether_continuous = np.asarray(wc) self._whether_discrete = ~self._whether_continuous @MergedNBTiming.timeit(level=1, prefix="[API] ") def feed_data(self, x, y, sample_weight=None): if sample_weight is not None: sample_weight = np.asarray(sample_weight) x, y, wc, features, feat_dics, label_dic = DataUtil.quantize_data( x, y, wc=self._whether_continuous, separate=True) if self._whether_continuous is None: self._whether_continuous = wc self._whether_discrete = ~self._whether_continuous self.label_dic = label_dic discrete_x, continuous_x = x cat_counter = np.bincount(y) self._cat_counter = cat_counter labels = [y == value for value in range(len(cat_counter))] labelled_x = [discrete_x[ci].T for ci in labels] self._multinomial._x, self._multinomial._y = x, y self._multinomial._labelled_x, self._multinomial._label_zip = labelled_x, list(zip(labels, labelled_x)) self._multinomial._cat_counter = cat_counter self._multinomial._feat_dics = [dic for i, dic in enumerate(feat_dics) if self._whether_discrete[i]] self._multinomial._n_possibilities = [len(feats) for i, feats in enumerate(features) if self._whether_discrete[i]] self._multinomial.label_dic = label_dic labelled_x = [continuous_x[label].T for label in labels] self._gaussian._x, self._gaussian._y = continuous_x.T, y self._gaussian._labelled_x, self._gaussian._label_zip = labelled_x, labels self._gaussian._cat_counter, self._gaussian.label_dic = cat_counter, label_dic self.feed_sample_weight(sample_weight) @MergedNBTiming.timeit(level=1, prefix="[Core] ") def feed_sample_weight(self, sample_weight=None): self._multinomial.feed_sample_weight(sample_weight) self._gaussian.feed_sample_weight(sample_weight) @MergedNBTiming.timeit(level=1, prefix="[Core] ") def _fit(self, lb): self._multinomial.fit() self._gaussian.fit() p_category = self._multinomial.get_prior_probability(lb) discrete_func, continuous_func = self._multinomial["func"], self._gaussian["func"] def func(input_x, tar_category): input_x = np.asarray(input_x) return discrete_func( input_x[self._whether_discrete].astype(np.int), tar_category) * continuous_func( input_x[self._whether_continuous], tar_category) / p_category[tar_category] return func @MergedNBTiming.timeit(level=1, prefix="[Core] ") def _transfer_x(self, x): feat_dics = self._multinomial["feat_dics"] idx = 0 for d, discrete in enumerate(self._whether_discrete): if not discrete: x[d] = float(x[d]) else: x[d] = feat_dics[idx][x[d]] if discrete: idx += 1 return x
rs *= data[d][tar_category](xx) return rs * p_category[tar_category] return func if __name__ == '__main__': import time _data = DataUtil.get_dataset("mushroom", "../../_Data/mushroom.txt") np.random.shuffle(_data) train_num = 6000 xs = _data ys = [xx.pop(0) for xx in xs] nb = MultinomialNB() nb.feed_data(xs, ys) xs, ys = nb["x"].tolist(), nb["y"].tolist() train_x, test_x = xs[:train_num], xs[train_num:] train_y, test_y = ys[:train_num], ys[train_num:] train_num = 6000 train_data = _data[:train_num] test_data = _data[train_num:] learning_time = time.time() nb = GaussianNB() nb.fit(train_x, train_y) learning_time = time.time() - learning_time
def __init__(self, whether_discrete): NaiveBayes.__init__(self) self._whether_discrete = np.array(whether_discrete) self._whether_continuous = ~self._whether_discrete self._multinomial, self._gaussian = MultinomialNB(), GaussianNB()