class MergedNB(NaiveBayes): def __init__(self, whether_discrete): NaiveBayes.__init__(self) self._whether_discrete = np.array(whether_discrete) self._whether_continuous = ~self._whether_discrete self._multinomial, self._gaussian = MultinomialNB(), GaussianNB() def feed_data(self, x, y, sample_weights=None): x = np.array(x) self._multinomial.feed_data(x[:, self._whether_discrete], y, sample_weights) y = self._multinomial["y"] self.label_dic = self._multinomial.label_dic self._cat_counter = self._multinomial["cat_counter"] self._gaussian.feed_data(x[:, self._whether_continuous], y, sample_weights) self._gaussian.label_dic = self._multinomial.label_dic def feed_sample_weights(self, sample_weights=None): self._multinomial.feed_sample_weights(sample_weights) self._gaussian.feed_sample_weights(sample_weights) def _fit(self, lb): self._multinomial.fit() self._gaussian.fit() discrete_func, continuous_func = self._multinomial[ "func"], self._gaussian["func"] def func(input_x, tar_category): input_x = np.array(input_x) return discrete_func( input_x[self._whether_discrete].astype(np.int), tar_category) * continuous_func( input_x[self._whether_continuous], tar_category) return func def _transfer_x(self, x): _feat_dics = self._multinomial["feat_dics"] idx = 0 for d, discrete in enumerate(self._whether_discrete): if not discrete: x[d] = float(x[d]) else: x[d] = _feat_dics[idx][x[d]] if discrete: idx += 1 return x
for c in range(len(self.label_dic)): plt.plot(tmp_x, [self._data[j][c](xx) for xx in tmp_x], c=colors[self.label_dic[c]], label="class: {}".format(self.label_dic[c])) plt.xlim(x_min-0.2*gap, x_max+0.2*gap) plt.legend() if not save: plt.show() else: plt.savefig("d{}".format(j + 1)) if __name__ == '__main__': import time xs, ys = DataUtil.get_dataset("mushroom", "../../_Data/mushroom.txt", tar_idx=0) nb = MultinomialNB() nb.feed_data(xs, ys) xs, ys = nb["x"].tolist(), nb["y"].tolist() train_num = 6000 x_train, x_test = xs[:train_num], xs[train_num:] y_train, y_test = ys[:train_num], ys[train_num:] learning_time = time.time() nb = GaussianNB() nb.fit(x_train, y_train) learning_time = time.time() - learning_time estimation_time = time.time() nb.evaluate(x_train, y_train) nb.evaluate(x_test, y_test) estimation_time = time.time() - estimation_time