def k_cross(self): # 未完成 self.F, self.D = 1.0, 1.0 self.valid_predict = [] self.train_predict = [] self.remain_train_n_idx = list(range( self.train_n_DP.size())) #保留下来的验证负样本 self.remain_valid_p_idx = list(range( self.valid_p_DP.size())) #保留下来的验证正样本 self.remain_valid_n_idx = list(range( self.valid_n_DP.size())) #保留下来的验证负样本 k_p_size = self.valid_p_DP.size() k_n_size = self.valid_n_DP.size() p_data1, p_data2 = self.train_p_DP.split_data(k_p_size) n_data1, n_data2 = self.train_n_DP.split_data(k_n_size) tmp_p_data = zip(p_data2, self.valid_p_DP.features) with poolContext(processes=8) as pool: self.train_p_DP.features = pool.map(np.concatenate, tmp_p_data) self.train_p_DP._size = len(self.train_p_DP.features[0]) self.valid_p_DP.features = p_data1 self.valid_p_DP._size = len(self.valid_p_DP.features[0]) tmp_n_data = zip(n_data2, self.valid_n_DP.features) with poolContext(processes=8) as pool: self.train_n_DP.features = pool.map(np.concatenate, tmp_n_data) self.train_n_DP._size = len(self.train_n_DP.features[0]) self.valid_n_DP.features = n_data1 self.valid_n_DP._size = len(self.valid_n_DP.features[0])
def get_features_from_images(self, images, features_extractors=None, int_images=None): if features_extractors is None: features_extractors = self.features_extractors if int_images is None: with poolContext(processes=16) as pool: int_images = pool.map(to_integral_image, images) with poolContext(processes=16) as pool: features = pool.map(partial(get_features, int_imgs=int_images), features_extractors) return features
def get_valid_data(self): tmp_features = zip(self.valid_p_features, self.valid_n_features) with poolContext(processes=16) as pool: valid_features = pool.map(np.concatenate, tmp_features) labels = np.array([1] * len(self.valid_p_features[0]) + [0] * len(self.valid_n_features[0])) return valid_features, labels
def get_features(self, data_idx=None): if data_idx is not None and len(data_idx) == 0: return [] #print(len(self.features), type(self.features), self.features[0].shape, type(self.features[0])) features = self.features if data_idx is not None: with poolContext(processes=8) as pool: features = pool.map(partial(get_sub_np, b=data_idx), features) return features
def get_train_data(self): train_p_features = self.train_p_DP.get_features() train_n_features = self.train_n_DP.get_features( data_idx=self.remain_train_n_idx) tmp_features = zip(train_p_features, train_n_features) with poolContext(processes=8) as pool: train_features = pool.map(np.concatenate, tmp_features) labels = np.array([1] * len(train_p_features[0]) + [0] * len(train_n_features[0])) return train_features, labels
def get_valid_data(self): valid_p_features = self.valid_p_DP.get_features( data_idx=self.remain_valid_p_idx) valid_n_features = self.valid_n_DP.get_features( data_idx=self.remain_valid_n_idx) tmp_features = zip(valid_p_features, valid_n_features) with poolContext(processes=8) as pool: valid_features = pool.map(np.concatenate, tmp_features) labels = np.array([1] * len(valid_p_features[0]) + [0] * len(valid_n_features[0])) return valid_features, labels
def update_features(self): # 根据预测值更新验证集合,去除正样本中被预测为false的样本,负样本只保留fp样本 # 更新valid集合,被标记为0的不能进入级联下层的Adaboost real_p_num = len(self.valid_p_features[0]) real_n_num = len(self.valid_n_features[0]) assert real_p_num + real_n_num == len(self.valid_predict) p_need_delete_idx = [] n_need_delete_idx = [] for idx in range(real_p_num): if self.valid_predict[idx] < EPS: p_need_delete_idx.append(idx) for idx in range(real_n_num): if self.valid_predict[real_p_num + idx] < EPS: n_need_delete_idx.append(idx) with poolContext(processes=4) as pool: self.valid_p_features = pool.map( partial(np.delete, obj=p_need_delete_idx), self.valid_p_features) with poolContext(processes=4) as pool: self.valid_n_features = pool.map( partial(np.delete, obj=n_need_delete_idx), self.valid_n_features) # 更新train集合,被标记为0的不能进入级联下层的Adaboost real_p_num = len(self.train_p_features[0]) real_n_num = len(self.train_n_features[0]) assert real_p_num + real_n_num == len(self.train_predict) n_need_delete_idx = [] for idx in range(real_n_num): if self.train_predict[real_p_num + idx] < EPS: n_need_delete_idx.append(idx) with poolContext(processes=12) as pool: self.train_n_features = pool.map( partial(np.delete, obj=n_need_delete_idx), self.train_n_features) self.train_n_num = len( self.train_n_features[0] ) # 只更新正样本,valid_feature去除是因为不能通过当前层,但是计算F,D的时候考虑的是所有进入级联模型的样本:w
def add_weak_classifier(self): self.weights /= np.sum(self.weights) #print("normalize weights ", self.weights) # 读取到所有的feature针对每个feature训练一个weakclassifier #print("features[0]", features[0]) with poolContext(processes=16) as pool: candidate_classifier = pool.map( partial(Weakclassifier, labels=self.train_labels, weights=self.weights), self.train_features) # 选择一个feature classifier_idx = range(len(candidate_classifier)) classifier_idx = sorted( classifier_idx, key=lambda x: candidate_classifier[x].train_error) for idx in classifier_idx: if self.used_features_idx_flag[idx] == 0: self.used_features_idx.append(idx) self.used_features_idx_flag[idx] = 1 self.used_features_extractors.append( self.ctx.features_extractors[idx]) self.used_valid_features.append(self.valid_features[idx]) self.used_train_features.append(self.train_features[idx]) self.weakclassifiers.append(candidate_classifier[idx]) error = candidate_classifier[idx].train_error beta = error / (1.0 - error + EPS) train_ouput = candidate_classifier[idx].predict( self.train_features[idx]) e = np.abs(train_ouput - self.train_labels) self.weights *= beta**(1.0 - e) self.alpha.append(np.log(1.0 / (beta + EPS))) return #print("feature ", self.train_features[idx]) print("threshold ", candidate_classifier[idx].threshold) print("parity ", candidate_classifier[idx].parity) print("predict ", train_ouput.astype(int)) print("labels ", self.train_labels) print("error ", candidate_classifier[idx].train_error) print("beta ", beta) print("weights ", self.weights) return
def gen_int_images(self): print("Generate Integral Images") with poolContext(processes=16) as pool: self.int_images = pool.map(to_integral_image, self.data)
def get_haar_features_from_int_images(features_extractor, int_images): with poolContext(processes=16) as pool: features = pool.map(partial(get_features, int_imgs=int_images), features_extractor) return features