def feed_data(self, x, y, sample_weight=None): # if sample_weight is not None: sample_weight = np.asarray(sample_weight) x, y, _, features, feat_dicts, label_dict = DataUtil.quantize_data(x, y, wc=np.array([False] * len(x[0]))) # 利用Numpy中bincount方法,获得各类别数据的个数; cat_counter = np.bincount(y) # 记录各维度特征的取值个数; n_possibilities = [len(feats) for feats in features] # 获得各类别数据的下标; labels = [y == value for value in range(len(cat_counter))] # 利用下标获取按类别分开后的输入数据的数组; labelled_x = [x[ci].T for ci in labels] # 更新模型的各个属性; self._x, self._y = x, y self._labelled_x, self._label_zip = labelled_x, list(zip(labels, labelled_x)) self._cat_counter, self._feat_dicts, self._n_possibilities = cat_counter, feat_dicts, n_possibilities self.label_dict = label_dict # 调用处理样本权重的函数,以更新记录条件概率的数组; self.feed_sample_weight(sample_weight)
def feed_data(self, x, y, sample_weight=None): if sample_weight is not None: sample_weight = np.asarray(sample_weight) x, y, wc, features, feat_dics, label_dic = DataUtil.quantize_data( x, y, wc=self._whether_continuous, separate=True) if self._whether_continuous is None: self._whether_continuous = wc self._whether_discrete = ~self._whether_continuous self.label_dic = label_dic discrete_x, continuous_x = x cat_counter = np.bincount(y) self._cat_counter = cat_counter labels = [y == value for value in range(len(cat_counter))] labelled_x = [discrete_x[ci].T for ci in labels] self._multinomial._x, self._multinomial._y = x, y self._multinomial._labelled_x, self._multinomial._label_zip = labelled_x, list(zip(labels, labelled_x)) self._multinomial._cat_counter = cat_counter self._multinomial._feat_dics = [dic for i, dic in enumerate(feat_dics) if self._whether_discrete[i]] self._multinomial._n_possibilities = [len(feats) for i, feats in enumerate(features) if self._whether_discrete[i]] self._multinomial.label_dic = label_dic labelled_x = [continuous_x[label].T for label in labels] self._gaussian._x, self._gaussian._y = continuous_x.T, y self._gaussian._labelled_x, self._gaussian._label_zip = labelled_x, labels self._gaussian._cat_counter, self._gaussian.label_dic = cat_counter, label_dic self.feed_sample_weight(sample_weight)
def feed_data(self, x, y, sample_weight=None): if sample_weight is not None: sample_weight = np.asarray(sample_weight) x, y, wc, features, feat_dicts, label_dict = DataUtil.quantize_data( x, y, wc=self._whether_continuous, separate=True) if self._whether_continuous is None: self._whether_continuous = wc self._whether_discrete = ~self._whether_continuous self.label_dict = label_dict discrete_x, continuous_x = x cat_counter = np.bincount(y) self._cat_counter = cat_counter labels = [y == value for value in range(len(cat_counter))] labelled_x = [discrete_x[ci].T for ci in labels] self._multinomial._x, self._multinomial._y = x, y self._multinomial._labelled_x, self._multinomial._label_zip = labelled_x, list(zip(labels, labelled_x)) self._multinomial._cat_counter = cat_counter self._multinomial._feat_dicts = [dic for i, dic in enumerate(feat_dicts) if self._whether_discrete[i]] self._multinomial._n_possibilities = [len(feats) for i, feats in enumerate(features) if self._whether_discrete[i]] self._multinomial.label_dict = label_dict labelled_x = [continuous_x[label].T for label in labels] self._gaussian._x, self._gaussian._y = continuous_x.T, y self._gaussian._labelled_x, self._gaussian._label_zip = labelled_x, labels self._gaussian._cat_counter, self._gaussian.label_dict = cat_counter, label_dict self.feed_sample_weight(sample_weight)
def feed_data(self, x, y, sample_weight=None): if sample_weight is not None: sample_weight = np.asarray(sample_weight) x, y, _, features, feat_dicts, label_dict = DataUtil.quantize_data(x, y, wc=np.array([False] * len(x[0]))) cat_counter=np.bincount(y) #统计两个类别的个数 n_possibilities = [len(feats) for feats in features] #记录各维度特征的取值个数 labels = [y == value for value in range(len(cat_counter)) ]#获取各类别的数据的下标 labelled_x = [x[ci].T for ci in labels] self._x, self._y = x, y self._labelled_x, self._label_zip = labelled_x, list(zip(labels, labelled_x)) self._cat_counter, self._feat_dicts, self._n_possibilities = cat_counter, feat_dicts, n_possibilities self.label_dict = label_dict self.feed_sample_weight(sample_weight)
def feed_data(self, x, y, sample_weight=None): if sample_weight is not None: sample_weight = np.asarray(sample_weight) x, y, _, features, feat_dicts, label_dict = DataUtil.quantize_data(x, y, wc=np.array([False] * len(x[0]))) cat_counter = np.bincount(y) n_possibilities = [len(feats) for feats in features] labels = [y == value for value in range(len(cat_counter))] labelled_x = [x[ci].T for ci in labels] self._x, self._y = x, y self._labelled_x, self._label_zip = labelled_x, list(zip(labels, labelled_x)) self._cat_counter, self._feat_dicts, self._n_possibilities = cat_counter, feat_dicts, n_possibilities self.label_dict = label_dict self.feed_sample_weight(sample_weight)