def add_one_new_pic(self, pic_path, label): current_day = get_current_day() log_file = open(os.path.join(log_dir, current_day + '.txt'), 'a') try: # 读入数据时已经转换成需要的尺寸 result = self.extract_pic_feature(pic_path) if result == None: return False face_pic, pic_feature = result self.add_one_pic(pic_feature, label) pic_name = os.path.split(pic_path)[1] this_person_pic_folder = os.path.join(self.all_pic_data_folder, label) this_person_feature_folder = os.path.join( self.all_pic_feature_data_folder, label) if not os.path.exists(this_person_pic_folder): os.makedirs(this_person_pic_folder) if not os.path.exists(this_person_feature_folder): os.makedirs(this_person_feature_folder) # 直接存储图片对应的特征, 同时保存图片文件 this_pic_feature_name = os.path.join(this_person_feature_folder, pic_name + '.p') msgpack_numpy.dump(pic_feature, open(this_pic_feature_name, 'wb')) this_pic_face_name = os.path.join(this_person_pic_folder, pic_name + '.jpg') cv2.imwrite(this_pic_face_name, face_pic) log_file.write( '\t'.join(map(str, [pic_path, this_pic_face_name])) + '\n') return True except: traceback.print_exc() return False
def feature_trans_pca(src_pack_file, dst_pack_file): all_data = [] person_feature_dic = msgpack_numpy.load(open(src_pack_file, 'rb')) for person_index, person in enumerate(person_feature_dic): feature_list = person_feature_dic.get(person) for index in range(len(feature_list)): try: if feature_list[index][1] == None: continue this_feature = np.array(feature_list[index][1][0]) all_data.append(this_feature) except: traceback.print_exc() all_data = np.asarray(all_data) pca = PCA(n_components=128) pca.fit(all_data) for person_index, person in enumerate(person_feature_dic): feature_list = person_feature_dic.get(person) for index in range(len(feature_list)): try: if feature_list[index][1] == None: continue this_feature = np.array(feature_list[index][1][0]) this_feature = np.reshape(this_feature, (1, this_feature.size)) this_feature = pca.transform(this_feature)[0] feature_list[index][1][0] = this_feature except: traceback.print_exc() msgpack_numpy.dump(person_feature_dic, open(dst_pack_file, 'wb'))
def create_sample_list(root_folder, train_valid_sample_list_file, verif_sample_list_file): person_list = os.listdir(root_folder) train_valid_sample_list = [] verif_sample_list = [] # 留下2000人用于人脸验证模型 for person_index, person_name in enumerate(person_list[:-2000]): person_path = os.path.join(root_folder, person_name) pic_list = map( lambda y: (y, person_index), map(lambda x: os.path.join(person_path, x), os.listdir(person_path))) train_valid_sample_list.extend(pic_list) np.random.shuffle(train_valid_sample_list) train_num = int(len(train_valid_sample_list) * 0.8) train_sample_list = train_valid_sample_list[:train_num] valid_sample_list = train_valid_sample_list[train_num:] msgpack_numpy.dump((train_sample_list, valid_sample_list), open(train_valid_sample_list_file, 'wb')) for person_index, person_name in enumerate(person_list[-2000:]): person_path = os.path.join(root_folder, person_name) pic_list = map( lambda y: (y, person_index), map(lambda x: os.path.join(person_path, x), os.listdir(person_path))) verif_sample_list.extend(pic_list) msgpack_numpy.dump(verif_sample_list, open(verif_sample_list_file, 'wb'))
def create_sample_list_batch_shuffle(root_folder, train_valid_sample_list_file): person_list = os.listdir(root_folder) train_sample_list = [] valid_sample_list = [] # 一个人平均有29张图片, 最多产生565.5对正样本, 所以没565个人进行一次shuffle, 在训练时, 一次读入16385个图片 # 用于训练FaceNet等pair类型的模型 batch_train_valid_sample_list = [] for person_index, person_name in enumerate(person_list[:-2000]): person_path = os.path.join(root_folder, person_name) pic_list = map( lambda y: (y, person_index), map(lambda x: os.path.join(person_path, x), os.listdir(person_path))) if person_index > 0 and person_index % 565 == 0: np.random.shuffle(batch_train_valid_sample_list) train_num = int(len(batch_train_valid_sample_list) * 0.8) batch_train_sample_list = batch_train_valid_sample_list[:train_num] batch_valid_sample_list = batch_train_valid_sample_list[train_num:] train_sample_list.extend(batch_train_sample_list) valid_sample_list.extend(batch_valid_sample_list) batch_train_valid_sample_list = [] else: batch_train_valid_sample_list.extend(pic_list) np.random.shuffle(batch_train_valid_sample_list) train_num = int(len(batch_train_valid_sample_list) * 0.8) batch_train_sample_list = batch_train_valid_sample_list[:train_num] batch_valid_sample_list = batch_train_valid_sample_list[train_num:] train_sample_list.extend(batch_train_sample_list) valid_sample_list.extend(batch_valid_sample_list) msgpack_numpy.dump((train_sample_list, valid_sample_list), open(train_valid_sample_list_file, 'wb'))
def main_distance(): all_data = [] all_label = [] all_pic_path_list = [] count = 0 verif_path_feature_dic = msgpack_numpy.load(open(feature_pack_file, 'rb')) for line in open(pair_file): if count % 100 == 0: print count count += 1 tmp = line.rstrip().split() if len(tmp) == 3: path1 = tmp[0] path2 = tmp[1] label = int(tmp[2]) feature1 = verif_path_feature_dic.get(path1) feature2 = verif_path_feature_dic.get(path2) # pdb.set_trace() # predicts = pw.cosine_similarity(feature1, feature2) predicts = np.fabs(feature1-feature2) all_data.append(predicts) all_label.append(label) all_pic_path_list.append((path1, path2)) data = np.asarray(all_data) # print data.shape # data = np.reshape(data, newshape=(data.shape[0], 1)) data = np.reshape(data, newshape=(data.shape[0], data.shape[2])) label = np.asarray(all_label) print data.shape, label.shape msgpack_numpy.dump((data, label, all_pic_path_list), open('orl_verif_fc7_finetune_fc8.p', 'wb'))
def extract_triplet_feature(): lfw_feature_dic = msgpack_numpy.load(open(feature_pack_file, 'rb')) new_lfw_feature_dic = {} model_file = '/data/liubo/face/vgg_face_model/annotate_siamese_graph.model' weight_file = '/data/liubo/face/vgg_face_model/annotate_siamese_graph.weight' model = model_from_json(open(model_file, 'r').read()) opt = Adam() model.compile(optimizer=opt, loss=['categorical_crossentropy']) model.load_weights(weight_file) # pdb.set_trace() get_Conv_FeatureMap = K.function( [model.layers[2].layers[0].get_input_at(False), K.learning_phase()], [model.layers[2].layers[-1].get_output_at(False)]) for person in lfw_feature_dic: # print person this_person_feature_list = lfw_feature_dic.get(person) this_person_triplet_feature_list = [] for feature, path in this_person_feature_list: feature = np.reshape(feature, (1, feature.size)) new_feature = get_Conv_FeatureMap([feature, 0])[0].copy() this_person_triplet_feature_list.append((new_feature, path)) new_lfw_feature_dic[person] = this_person_triplet_feature_list msgpack_numpy.dump(new_lfw_feature_dic, open(triplet_feature_pack_file, 'wb'))
def create_sample_list_batch_shuffle(root_folder, train_valid_sample_list_file): person_list = os.listdir(root_folder) train_sample_list = [] valid_sample_list = [] # 一个人平均有29张图片, 最多产生565.5对正样本, 所以没565个人进行一次shuffle, 在训练时, 一次读入16385个图片 # 用于训练FaceNet等pair类型的模型 batch_train_valid_sample_list = [] for person_index, person_name in enumerate(person_list[:-2000]): person_path = os.path.join(root_folder, person_name) pic_list = map(lambda y: (y, person_index), map(lambda x: os.path.join(person_path, x), os.listdir(person_path))) if person_index > 0 and person_index % 565 == 0: np.random.shuffle(batch_train_valid_sample_list) train_num = int(len(batch_train_valid_sample_list) * 0.8) batch_train_sample_list = batch_train_valid_sample_list[:train_num] batch_valid_sample_list = batch_train_valid_sample_list[train_num:] train_sample_list.extend(batch_train_sample_list) valid_sample_list.extend(batch_valid_sample_list) batch_train_valid_sample_list = [] else: batch_train_valid_sample_list.extend(pic_list) np.random.shuffle(batch_train_valid_sample_list) train_num = int(len(batch_train_valid_sample_list) * 0.8) batch_train_sample_list = batch_train_valid_sample_list[:train_num] batch_valid_sample_list = batch_train_valid_sample_list[train_num:] train_sample_list.extend(batch_train_sample_list) valid_sample_list.extend(batch_valid_sample_list) msgpack_numpy.dump((train_sample_list, valid_sample_list), open(train_valid_sample_list_file, 'wb'))
def add_one_new_pic(self, pic_path, label): current_day = get_current_day() log_file = open(os.path.join(log_dir, current_day+'.txt'), 'a') try: # 读入数据时已经转换成需要的尺寸 result = self.extract_pic_feature(pic_path) if result == None: return False face_pic, pic_feature = result self.add_one_pic(pic_feature, label) pic_name = os.path.split(pic_path)[1] this_person_pic_folder = os.path.join(self.all_pic_data_folder, label) this_person_feature_folder = os.path.join(self.all_pic_feature_data_folder, label) if not os.path.exists(this_person_pic_folder): os.makedirs(this_person_pic_folder) if not os.path.exists(this_person_feature_folder): os.makedirs(this_person_feature_folder) # 直接存储图片对应的特征, 同时保存图片文件 this_pic_feature_name = os.path.join(this_person_feature_folder, pic_name + '.p') msgpack_numpy.dump(pic_feature, open(this_pic_feature_name, 'wb')) this_pic_face_name = os.path.join(this_person_pic_folder, pic_name + '.jpg') cv2.imwrite(this_pic_face_name, face_pic) log_file.write('\t'.join(map(str, [pic_path, this_pic_face_name]))+'\n') return True except: traceback.print_exc() return False
def save_pic_feature(self, pic_path, person_name): # 将已经存在的文件生成特征并保存到指定文件夹下, 用于管理员加入新的图片(加入新的图片后, 提取特征, 保存到指定文件夹) person_pic_folder_path = os.path.join(self.all_pic_data_folder, person_name) person_feature_folder_path = os.path.join( self.all_pic_feature_data_folder, person_name) if not os.path.exists(person_pic_folder_path): os.makedirs(person_pic_folder_path) if not os.path.exists(person_feature_folder_path): os.makedirs(person_feature_folder_path) pic_name = os.path.split(pic_path)[-1] # 特征文件 person_feature_path = os.path.join(person_feature_folder_path, pic_name) # 人脸文件 person_pic_path = os.path.join(person_pic_folder_path, pic_name) result = extract_feature_from_binary_data(open(pic_path, 'rb')) if result == None: return face_num, all_frames, all_feature = result biggest_face_index = find_big_face(all_frames) pic_frame = all_frames[biggest_face_index] pic_feature = all_feature[biggest_face_index] x, y, width, height = pic_frame face_pic = cv2.imread(pic_path)[y:y + width, x:x + height, :] cv2.imwrite(person_pic_path, face_pic) msgpack_numpy.dump(pic_feature, open(person_feature_path, 'wb'))
def create_train_valid_data(folder='/data/liubo/face/crop_face'): # 根据已经存在的数据训练人脸验证模型 person_list = os.listdir(folder) path_feature_dic = {} # for person in person_list: print person person_path = os.path.join(folder, person) pic_feature_list = os.listdir(person_path) for pic_feature_path in pic_feature_list: pic_feature_path = os.path.join(person_path, pic_feature_path) pic_feature = extract_feature_from_file(pic_feature_path) path_feature_dic[pic_feature_path] = pic_feature msgpack_numpy.dump(path_feature_dic, open(feature_pack_file, 'wb'))
def extract_verif_feature(): verif_path_feature_dic = {} # {person:[feature1,feature2,...,]} # path_feature_dic = msgpack_numpy.load(open(originalimages_verif_fc7_path_feature, 'rb')) path_set = set() for line in open(pair_file): tmp = line.rstrip().split() path_set.add(tmp[0]) path_set.add(tmp[1]) model, get_Conv_FeatureMap = load_model(layer_index=-5) for pic_path in path_set: fine_tune_feature = extract(pic_path, get_Conv_FeatureMap, (224, 224, 3)) verif_path_feature_dic[pic_path] = fine_tune_feature msgpack_numpy.dump(verif_path_feature_dic, open(feature_pack_file, 'wb'))
def extract_lfw_feature(): lfw_feature_dic = {} # {person:[feature1,feature2,...,]} person_list = os.listdir(lfw_folder) for person_index, person in enumerate(person_list): print person_index, person person_path = os.path.join(lfw_folder, person) pic_list = os.listdir(person_path) this_person_feature_list = [] for pic in pic_list: pic_path = os.path.join(person_path, pic) this_feature = extract(pic_path) this_person_feature_list.append((this_feature, os.path.join(person, pic))) lfw_feature_dic[person] = this_person_feature_list msgpack_numpy.dump(lfw_feature_dic, open(feature_pack_file, 'wb'))
def extract_lfw_feature(): lfw_feature_dic = {} # {person:[feature1,feature2,...,]} person_list = os.listdir(lfw_folder) for person_index, person in enumerate(person_list): print person_index, person person_path = os.path.join(lfw_folder, person) pic_list = os.listdir(person_path) this_person_feature_list = [] for pic in pic_list: pic_path = os.path.join(person_path, pic) this_feature = extract(pic_path) this_person_feature_list.append( (this_feature, os.path.join(person, pic))) lfw_feature_dic[person] = this_person_feature_list msgpack_numpy.dump(lfw_feature_dic, open(feature_pack_file, 'wb'))
def main(): folder = '/data/hanlin' person_path_dic = load_one_deep_path(folder) sample_list, person_num = person_path_dic_trans(person_path_dic) model, get_Conv_FeatureMap = load_model(output_layer_index=18) data = [] label = [] start = time() for pic_path, person_index in sample_list: feature_vector = extract(pic_path, get_Conv_FeatureMap, pic_shape)[0] data.append(feature_vector) label.append(person_index) end = time() print(end - start) msgpack_numpy.dump((data, label), open('hanlin.p', 'wb'))
def main(): folder = '/data/hanlin' person_path_dic = load_one_deep_path(folder) sample_list, person_num = person_path_dic_trans(person_path_dic) model, get_Conv_FeatureMap = load_model(output_layer_index=18) data = [] label = [] start = time() for pic_path, person_index in sample_list: feature_vector = extract(pic_path, get_Conv_FeatureMap, pic_shape)[0] data.append(feature_vector) label.append(person_index) end = time() print (end - start) msgpack_numpy.dump((data, label), open('hanlin.p', 'wb'))
def extract_verif_feature(): verif_path_feature_dic = {} # {person:[feature1,feature2,...,]} # path_feature_dic = msgpack_numpy.load(open(originalimages_verif_fc7_path_feature, 'rb')) path_set = set() for line in open(pair_file): tmp = line.rstrip().split() path_set.add(tmp[0]) path_set.add(tmp[1]) model, get_Conv_FeatureMap = load_model(layer_index=-5) print model.summary() for pic_path in path_set: fine_tune_feature = extract(pic_path, get_Conv_FeatureMap, (224, 224, 3)) verif_path_feature_dic[pic_path] = fine_tune_feature msgpack_numpy.dump(verif_path_feature_dic, open(feature_pack_file, 'wb'))
def train_valid_verif_model(): all_data = [] all_label = [] all_pic_path_list = [] count = 0 for line in open(pair_file): if count % 100 == 0: print count count += 1 tmp = line.rstrip().split() if len(tmp) == 3: path1 = tmp[0] path2 = tmp[1] if (os.path.exists(path1)) and (os.path.exists(path2)): feature1 = extract_feature_from_file(path1) feature2 = extract_feature_from_file(path2) predicts = pw.cosine_similarity(feature1, feature2) all_data.append(predicts) all_label.append(int(tmp[2])) msgpack_numpy.dump((all_data, all_label, all_pic_path_list), open(feature_pack_file, 'wb')) (all_data, all_label, all_pic_path_list) = msgpack_numpy.load(open(feature_pack_file, 'rb')) all_data = np.asarray(all_data) data = np.reshape(all_data, newshape=(all_data.shape[0], all_data.shape[2])) label = np.asarray(all_label) print data.shape, label.shape kf = KFold(len(label), n_folds=10) all_acc = [] for (train, valid) in kf: train_data = data[train] valid_data = data[valid] train_label = label[train] valid_label = label[valid] clf = LinearSVC() clf.fit(train_data, train_label) acc = accuracy_score(valid_label, clf.predict(valid_data)) roc_auc = roc_auc_score(valid_label, clf.predict(valid_data)) all_acc.append(acc) print acc, roc_auc print np.mean(all_acc) clf = LinearSVC() clf.fit(data, label) pdb.set_trace() cPickle.dump(clf, open(verification_model_file, 'wb'))
def load_data(result_file, pack_file): person_feature_dic = {} # {person_name:[(pic_name, pic_feature),...,(pic_name, pic_feature)]} for line in open(result_file): tmp = line.rstrip().split('\t') if len(tmp) == 2: try: pic_path = tmp[0].split('/') person_name = pic_path[-2] pic_name = pic_path[-1] feature = msgpack_numpy.loads(base64.b64decode(tmp[1])) feature_list = person_feature_dic.get(person_name, []) feature_list.append((pic_name, feature)) person_feature_dic[person_name] = feature_list except: print tmp continue msgpack_numpy.dump(person_feature_dic, open(pack_file, 'wb'))
def create_lfw_pair(folder, pair_file): person_list = os.listdir(folder) tmp_list = [] # 选择正样本 for person in person_list: person_path = os.path.join(folder, person) pic_list = map(lambda x: os.path.join(os.path.join(person_path, x)), os.listdir(person_path)) if len(pic_list) > 2: # 每个人选择一个正样本 np.random.shuffle(pic_list) tmp_list.append((pic_list[0], pic_list[1], True)) # 选择相同数量的负样本 np.random.shuffle(person_list) person_num = len(person_list) positive_num = len(tmp_list) count = 0 for person_index, person in enumerate(person_list): this_person_path = os.path.join(folder, person) pic_list = map( lambda x: os.path.join(os.path.join(this_person_path, x)), os.listdir(this_person_path)) other_person = person_list[(person_index + 1) % person_num] other_person_path = os.path.join(folder, other_person) other_pic_list = map( lambda x: os.path.join(os.path.join(other_person_path, x)), os.listdir(other_person_path)) np.random.shuffle(pic_list) np.random.shuffle(other_pic_list) tmp_list.append((pic_list[0], other_pic_list[0], False)) count += 1 if count == positive_num: break pair_list = [] label_list = [] np.random.shuffle(tmp_list) num = len(tmp_list) tmp_list = tmp_list[:num / 100 * 100] for element in tmp_list: pic_path1, pic_path2, label = element pair_list.append(pic_path1) pair_list.append(pic_path2) label_list.append(label) print len(tmp_list), len(pair_list), len(label_list) msgpack_numpy.dump((pair_list, label_list), open(pair_file, 'wb'))
def cb(output): predicting_result = output.result() if predicting_result is not None: self.s2c_socket.send_multipart( [nameClient(index), dump(predicting_result['action'])]) training_data = self.parse_memory(index, obs, predicting_result) if training_data is not None: self.training_queue.put(training_data)
def create_sample_list(root_folder, train_valid_sample_list_file, verif_sample_list_file): person_list = os.listdir(root_folder) train_valid_sample_list = [] verif_sample_list = [] # 留下2000人用于人脸验证模型 for person_index, person_name in enumerate(person_list[:-2000]): person_path = os.path.join(root_folder, person_name) pic_list = map(lambda y: (y, person_index), map(lambda x: os.path.join(person_path, x), os.listdir(person_path))) train_valid_sample_list.extend(pic_list) np.random.shuffle(train_valid_sample_list) train_num = int(len(train_valid_sample_list) * 0.8) train_sample_list = train_valid_sample_list[:train_num] valid_sample_list = train_valid_sample_list[train_num:] msgpack_numpy.dump((train_sample_list, valid_sample_list), open(train_valid_sample_list_file, 'wb')) for person_index, person_name in enumerate(person_list[-2000:]): person_path = os.path.join(root_folder, person_name) pic_list = map(lambda y: (y, person_index), map(lambda x: os.path.join(person_path, x), os.listdir(person_path))) verif_sample_list.extend(pic_list) msgpack_numpy.dump(verif_sample_list, open(verif_sample_list_file, 'wb'))
def main_max_min(): lfw_folder = '/data/liubo/face/lfw_face' pair_file = '/data/liubo/face/lfw_pair.txt' same_dist_list = [] no_same_dist_list = [] count = 0 for line in open(pair_file): tmp = line.rstrip().split() if len(tmp) == 3: person = tmp[0] person_path = os.path.join(lfw_folder, person) pic_list = os.listdir(person_path) if len(pic_list) == 1: print 'error person :', person continue else: np.random.shuffle(pic_list) this_dist = [] for index_i in range(len(pic_list[0:10])): for index_j in range(index_i + 1, len(pic_list[0:10])): dist = cal_two_pic_distance( os.path.join(person_path, pic_list[index_i]), os.path.join(person_path, pic_list[index_j])) this_dist.append(dist) same_dist_list.append(np.min(this_dist)) elif len(tmp) == 4: person1 = tmp[0] person1_path = os.path.join(lfw_folder, person1) pic1_list = os.listdir(person1_path) person2 = tmp[2] person2_path = os.path.join(lfw_folder, person2) pic2_list = os.listdir(person2_path) if len(pic1_list) > 0 and len(pic2_list) > 0: np.random.shuffle(pic1_list) np.random.shuffle(pic2_list) pic_path1 = os.path.join(person1_path, pic1_list[0]) pic_path2 = os.path.join(person2_path, pic2_list[0]) dist = cal_two_pic_distance(pic_path1, pic_path2) no_same_dist_list.append(dist) msgpack_numpy.dump((same_dist_list, no_same_dist_list), open('dist_max_min.p', 'wb'))
def extract_lfw_feature(): lfw_feature_dic = {} # {person:[feature1,feature2,...,]} person_list = os.listdir(lfw_folder) for person_index, person in enumerate(person_list): print person_index, person person_path = os.path.join(lfw_folder, person) pic_list = os.listdir(person_path) this_person_feature_dic = {} for pic in pic_list: try: pic_path = os.path.join(person_path, pic) index = int(pic.split('.')[0].split('_')[-1]) this_feature = extract(pic_path) this_person_feature_dic[index] = (this_feature, pic_path) except: traceback.print_exc() pdb.set_trace() lfw_feature_dic[person] = this_person_feature_dic msgpack_numpy.dump(lfw_feature_dic, open(feature_pack_file, 'wb'))
def create_lfw_pair(folder, pair_file): person_list = os.listdir(folder) tmp_list = [] # 选择正样本 for person in person_list: person_path = os.path.join(folder, person) pic_list = map(lambda x:os.path.join(os.path.join(person_path, x)), os.listdir(person_path)) if len(pic_list) > 2: # 每个人选择一个正样本 np.random.shuffle(pic_list) tmp_list.append((pic_list[0], pic_list[1], True)) # 选择相同数量的负样本 np.random.shuffle(person_list) person_num = len(person_list) positive_num = len(tmp_list) count = 0 for person_index, person in enumerate(person_list): this_person_path = os.path.join(folder, person) pic_list = map(lambda x:os.path.join(os.path.join(this_person_path, x)), os.listdir(this_person_path)) other_person = person_list[(person_index+1)%person_num] other_person_path = os.path.join(folder, other_person) other_pic_list = map(lambda x:os.path.join(os.path.join(other_person_path, x)), os.listdir(other_person_path)) np.random.shuffle(pic_list) np.random.shuffle(other_pic_list) tmp_list.append((pic_list[0], other_pic_list[0], False)) count += 1 if count == positive_num: break pair_list = [] label_list = [] np.random.shuffle(tmp_list) num = len(tmp_list) tmp_list = tmp_list[:num/100*100] for element in tmp_list: pic_path1, pic_path2, label = element pair_list.append(pic_path1) pair_list.append(pic_path2) label_list.append(label) print len(tmp_list), len(pair_list), len(label_list) msgpack_numpy.dump((pair_list, label_list), open(pair_file, 'wb'))
def feature_trans_autoencoder(src_pack_file, dst_pack_file): weight_file = '/data/liubo/face/annotate_face_model/skyeye_face_autoencoder.weight' model_file = '/data/liubo/face/annotate_face_model/skyeye_face_autoencoder.model' autoencoder = model_from_json(open(model_file, 'r').read()) autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy') autoencoder.load_weights(weight_file) get_Conv_FeatureMap = K.function([autoencoder.layers[0].get_input_at(False), K.learning_phase()], [autoencoder.layers[-2].get_output_at(False)]) person_feature_dic = msgpack_numpy.load(open(src_pack_file, 'rb')) for person_index, person in enumerate(person_feature_dic): feature_list = person_feature_dic.get(person) for index in range(len(feature_list)): try: if feature_list[index][1] == None: continue this_feature = np.array(feature_list[index][1][0]) this_feature = np.reshape(this_feature, (1, this_feature.size)) this_feature = get_Conv_FeatureMap([this_feature, 0])[0][0] feature_list[index][1][0] = this_feature except: traceback.print_exc() msgpack_numpy.dump(person_feature_dic, open(dst_pack_file, 'wb'))
def get_all_img_feature(): folder = '/tmp/annotate' result_pic_folder = '/data/liubo/face/research_self' result_feature_folder = '/data/liubo/face/research_feature_self' person_list = os.listdir(folder) for person in person_list: person_path = os.path.join(folder, person) result_person_pic_folder = os.path.join(result_pic_folder, person) if not os.path.exists(result_person_pic_folder): os.makedirs(result_person_pic_folder) person_feature_folder = os.path.join(result_feature_folder, person) if not os.path.exists(person_feature_folder): os.makedirs(person_feature_folder) pic_list = os.listdir(person_path) for pic in pic_list: try: pic_path = os.path.join(person_path, pic) feature = np.asarray(extract_feature_from_file(pic_path)[0]) shutil.copy(pic_path, os.path.join(result_person_pic_folder, pic)) msgpack_numpy.dump(feature, open(os.path.join(person_feature_folder, pic + '.p'), 'wb')) except: traceback.print_exc() continue
def save_pic_feature(self, pic_path, person_name): # 将已经存在的文件生成特征并保存到指定文件夹下, 用于管理员加入新的图片(加入新的图片后, 提取特征, 保存到指定文件夹) person_pic_folder_path = os.path.join(self.all_pic_data_folder, person_name) person_feature_folder_path = os.path.join(self.all_pic_feature_data_folder, person_name) if not os.path.exists(person_pic_folder_path): os.makedirs(person_pic_folder_path) if not os.path.exists(person_feature_folder_path): os.makedirs(person_feature_folder_path) pic_name = os.path.split(pic_path)[-1] # 特征文件 person_feature_path = os.path.join(person_feature_folder_path, pic_name) # 人脸文件 person_pic_path = os.path.join(person_pic_folder_path, pic_name) result = extract_feature_from_binary_data(open(pic_path, 'rb')) if result == None: return face_num, all_frames, all_feature = result biggest_face_index = find_big_face(all_frames) pic_frame = all_frames[biggest_face_index] pic_feature = all_feature[biggest_face_index] x, y, width, height = pic_frame face_pic = cv2.imread(pic_path)[y:y+width, x:x+height, :] cv2.imwrite(person_pic_path, face_pic) msgpack_numpy.dump(pic_feature, open(person_feature_path, 'wb'))
def extract_triplet_feature(): lfw_feature_dic = msgpack_numpy.load(open(feature_pack_file, 'rb')) new_lfw_feature_dic = {} model_file = '/data/liubo/face/vgg_face_model/annotate_siamese_graph.model' weight_file = '/data/liubo/face/vgg_face_model/annotate_siamese_graph.weight' model = model_from_json(open(model_file, 'r').read()) opt = Adam() model.compile(optimizer=opt, loss=['categorical_crossentropy']) model.load_weights(weight_file) # pdb.set_trace() get_Conv_FeatureMap = K.function([model.layers[2].layers[0].get_input_at(False), K.learning_phase()], [model.layers[2].layers[-1].get_output_at(False)]) for person in lfw_feature_dic: # print person this_person_feature_list = lfw_feature_dic.get(person) this_person_triplet_feature_list = [] for feature, path in this_person_feature_list: feature = np.reshape(feature, (1, feature.size)) new_feature = get_Conv_FeatureMap([feature, 0])[0].copy() this_person_triplet_feature_list.append((new_feature, path)) new_lfw_feature_dic[person] = this_person_triplet_feature_list msgpack_numpy.dump(new_lfw_feature_dic, open(triplet_feature_pack_file, 'wb'))
def run(self): self.player = Environment(self.index * 113) context = zmq.Context() self.c2s_socket = context.socket(zmq.PUSH) self.c2s_socket.setsockopt(zmq.IDENTITY, self.identity) self.c2s_socket.connect(self.c2s) self.s2c_socket = context.socket(zmq.DEALER) self.s2c_socket.setsockopt(zmq.IDENTITY, self.identity) self.s2c_socket.connect(self.s2c) while True: obs = self.player.current_state() self.c2s_socket.send(dump((self.index, obs)), copy=False) if obs is not None: action = load(self.s2c_socket.recv(copy=False).bytes) self.player.action(action)
def run(self): print "My pid is :%d\n" % os.getpid() self.player = Environment(self.index * 113) context = zmq.Context() self.c2s_socket = context.socket(zmq.PUSH) self.c2s_socket.setsockopt(zmq.IDENTITY, self.identity) self.c2s_socket.connect(self.c2s) self.s2c_socket = context.socket(zmq.DEALER) self.s2c_socket.setsockopt(zmq.IDENTITY, self.identity) self.s2c_socket.connect(self.s2c) rew, isover, frag, kdr = None, False, 0, 0 while True: frame = self.player.current_state() self.c2s_socket.send(dump( (self.identity, [frame], rew, isover, frag, kdr)), copy=False) #rew is last action's reward action = load(self.s2c_socket.recv(copy=False).bytes) rew, isover, frag, kdr = self.player.action(action) if isover: self.player.reset_stat()
# logger_error = logging.getLogger('errorhandler') root_folder = '/data/liubo/face/all_pic_data/FaceScrub' person_list = os.listdir(root_folder) all_data = [] all_label = [] index = 0 for person in person_list[:20]: print person, index pic_list = os.listdir(os.path.join(root_folder, person)) for pic in pic_list: try: pic_path = os.path.join(root_folder, person, pic) im = cv2.resize(cv2.imread(pic_path), (150, 150)) all_data.append(im) all_label.append(index) except: traceback.print_exc() continue index += 1 all_data = np.asarray(all_data) all_label = np.asarray(all_label) train_data, test_data, train_label, test_label = train_test_split(all_data, all_label) print train_data.shape, train_label.shape, test_data.shape, test_label.shape msgpack_numpy.dump((train_data, test_data, train_label, test_label), open('/data/liubo/face/all_pic_data/FaceScrub.p', 'wb'))
def find_max_min(): # 同一个人里找相似度最小的, 不同人里找相似度最大的 lfw_feature_dic = msgpack_numpy.load(open(feature_pack_file, 'rb')) person_list = lfw_feature_dic.keys() same_person_score = [] same_person_score_pair_dic = { } # {score:[(path1,path2), ...,(path1,path2)]} no_same_person_score = [] no_same_person_score_pair_dic = { } # {score:[(path1,path2), ...,(path1,path2)]} heapq.heapify(same_person_score) pair_threshold = 3000 for person_index, person in enumerate(person_list): start = time() path_feature_list = lfw_feature_dic.get(person) # 找出该人里所有可能的pair --- score越小越好(同一个人最不相似的照片) # 每次将最大的score去掉,加入更小的score,所以在加入是score取负,这样堆顶就是原来score最大的值 length = len(path_feature_list) for index_i in range(length): for index_j in range(index_i, length): feature1, path1 = path_feature_list[index_i] feature2, path2 = path_feature_list[index_j] feature1 = np.reshape(feature1, newshape=(1, feature1.shape[0])) feature2 = np.reshape(feature2, newshape=(1, feature2.shape[0])) this_score = 0 - pw.cosine_similarity(feature1, feature2)[0][0] if len(same_person_score) > pair_threshold: top_item = same_person_score[0] if this_score < top_item: # 更加不相似,加入 heapq.heappop(same_person_score) heapq.heappush(same_person_score, this_score) # 删除原来的pair, 加入当前pair (同一个分数可能对应于多个pair) if top_item in same_person_score_pair_dic: same_person_score_pair_dic.pop(top_item) pair_list = same_person_score_pair_dic.get( this_score, []) pair_list.append((path1, path2)) same_person_score_pair_dic[this_score] = pair_list else: heapq.heappush(same_person_score, this_score) pair_list = same_person_score_pair_dic.get(this_score, []) pair_list.append((path1, path2)) same_person_score_pair_dic[this_score] = pair_list # 找出所有可能的不相似的pair for other_person_index, other_person in enumerate( person_list[person_index + 1:], start=person_index + 1): other_path_feature_list = lfw_feature_dic.get(other_person) if other_person == person: continue other_length = len(other_path_feature_list) for index_i in range(length): for index_j in range(other_length): feature1, path1 = path_feature_list[index_i] feature2, path2 = other_path_feature_list[index_j] feature1 = np.reshape(feature1, newshape=(1, feature1.shape[0])) feature2 = np.reshape(feature2, newshape=(1, feature2.shape[0])) this_score = pw.cosine_similarity(feature1, feature2)[0][0] if len(no_same_person_score) > pair_threshold: top_item = no_same_person_score[0] if this_score < top_item: # 更加相似, 加入 heapq.heappop(no_same_person_score) heapq.heappush(no_same_person_score, this_score) # 删除原来的pair, 加入当前pair (同一个分数可能对应于多个pair) if top_item in no_same_person_score_pair_dic: no_same_person_score_pair_dic.pop(top_item) pair_list = no_same_person_score_pair_dic.get( this_score, []) pair_list.append((path1, path2)) no_same_person_score_pair_dic[ this_score] = pair_list else: heapq.heappush(no_same_person_score, this_score) pair_list = no_same_person_score_pair_dic.get( this_score, []) pair_list.append((path1, path2)) no_same_person_score_pair_dic[this_score] = pair_list end = time() print person_index, person, (end - start), length msgpack_numpy.dump((same_person_score_pair_dic, same_person_score, no_same_person_score_pair_dic, no_same_person_score), open(new_pair_pack_file, 'wb'))
def train_valid_verif_model(): all_data = [] all_label = [] all_pic_path_list = [] count = 0 path_feature_dic = msgpack_numpy.load(open(feature_pack_file, 'rb')) not_in = 0 not_in_pair = {} for line in open(pair_file): if count % 100 == 0: print count count += 1 tmp = line.rstrip().split() if len(tmp) == 3: path1 = tmp[0] path2 = tmp[1] label = int(tmp[2]) if path1 in path_feature_dic and path2 in path_feature_dic: try: feature1 = np.asarray(path_feature_dic.get(path1)) feature2 = np.asarray(path_feature_dic.get(path2)) predicts = pw.cosine_similarity(feature1, feature2) all_data.append(predicts) all_label.append(label) all_pic_path_list.append((path1, path2)) except: traceback.print_exc() else: traceback.print_exc() msgpack_numpy.dump((all_data, all_label, all_pic_path_list), open(feature_pack_file, 'wb')) (all_data, all_label, all_pic_path_list) = msgpack_numpy.load(open(feature_pack_file, 'rb')) pdb.set_trace() all_data = np.asarray(all_data) all_data = np.reshape(all_data, newshape=(all_data.shape[0], all_data.shape[2])) all_label = np.asarray(all_label) all_pic_path_list = np.asarray(all_pic_path_list) print all_data.shape, all_label.shape all_acc = [] kf = KFold(n_folds=10) all_acc = [] f = open('research_verif_result.txt', 'w') for k, (train, valid) in enumerate(kf.split(all_data, all_label, all_pic_path_list)): train_data = all_data[train] valid_data = all_data[valid] train_label = all_label[train] valid_label = all_label[valid] train_path_list = all_pic_path_list[train] valid_path_list = all_pic_path_list[valid] clf = LinearSVC() clf.fit(train_data, train_label) acc = accuracy_score(valid_label, clf.predict(valid_data)) for k in range(len(valid_path_list)): f.write(os.path.split(valid_path_list[k][0])[1] + '\t' + os.path.split(valid_path_list[k][1])[1] + '\t' + str(valid_data[k][0])+ '\t' + str(valid_label[k]) + '\n') all_acc.append(acc) print acc print 'mean_acc :', np.mean(all_acc) f.close() clf = LinearSVC() clf.fit(all_data, all_label) pdb.set_trace() cPickle.dump(clf, open(verification_model_file, 'wb'))
def main_feature(): lfw_folder = '/data/liubo/face/lfw_face' pair_file = '/data/liubo/face/lfw_pair.txt' data = [] label = [] for line in open(pair_file): tmp = line.rstrip().split() if len(tmp) == 3: person = tmp[0] person_path = os.path.join(lfw_folder, person) pic_list = os.listdir(person_path) if len(pic_list) == 1: print 'error person :', person continue else: np.random.shuffle(pic_list) pic_path1 = os.path.join(person_path, pic_list[0]) pic_path2 = os.path.join(person_path, pic_list[1]) im1 = np.transpose( np.reshape(imresize(imread(pic_path1), size=(78, 62, 3)), (1, 78, 62, 3)), (0, 3, 1, 2)) im2 = np.transpose( np.reshape(imresize(imread(pic_path2), size=(78, 62, 3)), (1, 78, 62, 3)), (0, 3, 1, 2)) # im1 = np.transpose(np.reshape(imresize(imread(pic_path1), size=(50, 50, 3)), (1, 50, 50, 3)), (0, 3, 1, 2)) # im2 = np.transpose(np.reshape(imresize(imread(pic_path2), size=(50, 50, 3)), (1, 50, 50, 3)), (0, 3, 1, 2)) im1_feature = get_Conv_FeatureMap([im1, 0])[0] im2_feature = get_Conv_FeatureMap([im2, 0])[0] this_data = [] this_data.extend(list(im1_feature[0])) this_data.extend(list(im2_feature[0])) data.append(this_data) label.append(0) elif len(tmp) == 4: person1 = tmp[0] person1_path = os.path.join(lfw_folder, person1) pic1_list = os.listdir(person1_path) person2 = tmp[2] person2_path = os.path.join(lfw_folder, person2) pic2_list = os.listdir(person2_path) if len(pic1_list) > 0 and len(pic2_list) > 0: np.random.shuffle(pic1_list) np.random.shuffle(pic2_list) pic_path1 = os.path.join(person1_path, pic1_list[0]) pic_path2 = os.path.join(person2_path, pic2_list[0]) im1 = np.transpose( np.reshape(imresize(imread(pic_path1), size=(78, 62, 3)), (1, 78, 62, 3)), (0, 3, 1, 2)) im2 = np.transpose( np.reshape(imresize(imread(pic_path2), size=(78, 62, 3)), (1, 78, 62, 3)), (0, 3, 1, 2)) # im1 = np.transpose(np.reshape(imresize(imread(pic_path1), size=(50, 50, 3)), (1, 50, 50, 3)), (0, 3, 1, 2)) # im2 = np.transpose(np.reshape(imresize(imread(pic_path2), size=(50, 50, 3)), (1, 50, 50, 3)), (0, 3, 1, 2)) im1_feature = get_Conv_FeatureMap([im1, 0])[0] im2_feature = get_Conv_FeatureMap([im2, 0])[0] this_data = [] this_data.extend(list(im1_feature[0])) this_data.extend(list(im2_feature[0])) data.append(this_data) label.append(1) msgpack_numpy.dump((data, label), open('lfw_data_label.p', 'w'))
def train_valid_verif_model(): all_data = [] all_label = [] all_pic_path_list = [] count = 0 path_feature_dic = msgpack_numpy.load(open(feature_pack_file, 'rb')) not_in = 0 not_in_pair = {} for line in open(pair_file): if count % 100 == 0: print count count += 1 tmp = line.rstrip().split() if len(tmp) == 3: path1 = tmp[0] path2 = tmp[1] label = int(tmp[2]) if path1 in path_feature_dic and path2 in path_feature_dic: try: feature1 = np.asarray(path_feature_dic.get(path1)) feature2 = np.asarray(path_feature_dic.get(path2)) predicts = pw.cosine_similarity(feature1, feature2) all_data.append(predicts) all_label.append(label) all_pic_path_list.append((path1, path2)) except: traceback.print_exc() else: traceback.print_exc() msgpack_numpy.dump((all_data, all_label, all_pic_path_list), open(feature_pack_file, 'wb')) (all_data, all_label, all_pic_path_list) = msgpack_numpy.load(open(feature_pack_file, 'rb')) pdb.set_trace() all_data = np.asarray(all_data) all_data = np.reshape(all_data, newshape=(all_data.shape[0], all_data.shape[2])) all_label = np.asarray(all_label) all_pic_path_list = np.asarray(all_pic_path_list) print all_data.shape, all_label.shape all_acc = [] kf = KFold(n_folds=10) all_acc = [] f = open('research_verif_result.txt', 'w') for k, (train, valid) in enumerate( kf.split(all_data, all_label, all_pic_path_list)): train_data = all_data[train] valid_data = all_data[valid] train_label = all_label[train] valid_label = all_label[valid] train_path_list = all_pic_path_list[train] valid_path_list = all_pic_path_list[valid] clf = LinearSVC() clf.fit(train_data, train_label) acc = accuracy_score(valid_label, clf.predict(valid_data)) for k in range(len(valid_path_list)): f.write( os.path.split(valid_path_list[k][0])[1] + '\t' + os.path.split(valid_path_list[k][1])[1] + '\t' + str(valid_data[k][0]) + '\t' + str(valid_label[k]) + '\n') all_acc.append(acc) print acc print 'mean_acc :', np.mean(all_acc) f.close() clf = LinearSVC() clf.fit(all_data, all_label) pdb.set_trace() cPickle.dump(clf, open(verification_model_file, 'wb'))
def recognize_online_cluster(self, image, image_id): ''' :param image: 将得到的图片进行识别,加入的LSH Forest,根据距离计算proba(不同的距离对应不同的准确率,根据已有的dist计算阈值); 和已经设定的阈值判断是不是一个新出现的人,确定是原来已有的人,还是不确定是原来已有的人 :return: ''' start = time.time() need_add = False need_save = False current_day = get_current_day() log_file = open(os.path.join(log_dir, current_day + '.txt'), 'a') log_file.write( '\t'.join(map(str, ["receive image", image_id, time.time()])) + '\n') feature_str = '' try: image = base64.decodestring(image) image = zlib.decompress(image) im = cv2.imdecode(np.fromstring(image, dtype=np.uint8), 1) log_file.write( '\t'.join(map(str, ['shape :', im.shape[0], im.shape[1]])) + '\n') # 图片尺寸过滤 if im.shape[0] < size_threshold or im.shape[1] < size_threshold: log_file.write('\t'.join( map(str, [ 'stat recognize_time :', (time.time() - start), 'small_size' ])) + '\n') log_file.close() return self.unknown, 1.0, feature_str, need_save # 清晰度过滤 blur_sign, blur_var = is_blur(cv2.resize(im, (96, 96))) if blur_sign: log_file.write('\t'.join( map(str, [ 'stat recognize_time :', (time.time() - start), 'blur_filter', blur_var ])) + '\n') log_file.close() return self.unknown, 1.0, feature_str, need_save # 保存传过来的图片 # img_file = '/tmp/research_face/%s.jpg' %image_id time_slot = get_time_slot(image_id) if time_slot == None: time_slot = 'error' time_slot_dir = os.path.join(tmp_face_dir, time_slot) if not os.path.exists(time_slot_dir): os.makedirs(time_slot_dir) img_file = os.path.join(time_slot_dir, image_id + '.jpg') cv2.imwrite(img_file, im) except: traceback.print_exc() log_file.close() return self.unknown, 1.0, feature_str, need_save try: # 流程 : 找距离最近的图片 ; 计算prob ; 在线聚类 ; 加入LSH Forest result = self.extract_pic_feature(img_file) if result == None: log_file.write('\t'.join( map(str, [ 'stat not_find_face', 'time :', (time.time() - start) ])) + '\n') log_file.close() return self.unknown, 1.0, feature_str, need_save face_pic, im_feature = result try: # nearest_sim_list的格式和dist_label_list的格式一样,这样可以将两个list合并,一起计算(这样不用考虑时间的因素) # 在识别出人名后将人名和feature放入到self.nearest nearest_sim_list = self.cal_nearest_sim( current_feature=im_feature) except: traceback.print_exc() nearest_sim_list = [] log_file.write('\t'.join( map(str, ['nearest_sim_list :', map(str, nearest_sim_list)])) + '\n') feature_str = base64.b64encode(msgpack_numpy.dumps(im_feature)) log_file.write('\t'.join( map(str, ['extract_feature_time :', (time.time() - start)])) + '\n') # 找距离最近的图片 --- 用LSH Forest 找出最近的10张图片,然后分别计算距离 tmp_list = self.find_k_neighbors_with_lsh(im_feature) nearest_sim_list.sort(key=lambda x: x[0], reverse=True) nearest_sim_list.extend(tmp_list) dist_label_list = nearest_sim_list[:] # 计算 log_file.write('\t'.join( map(str, ['dist_label_list :', map(str, dist_label_list)])) + '\n') if dist_label_list == None: this_id = self.must_be_not_same_id this_label = self.new_person_str + str( self.current_new_person_id) else: # 计算prob --- 根据距离计算prob this_id, this_label = self.evaluate_result(dist_label_list) # 不管概率, 都要将最新的一张图片加入到self.nearest self.nearest.append((this_label, im_feature)) log_file.write( '\t'.join(map(str, ['self.nearest :', map(str, self.nearest)])) + '\n') # 在线聚类 --- 根据dist确定是重新增加一个人还是加入到已有的人中 if this_id == self.same_pic_id: need_add = False elif this_id == self.must_be_same_id: need_add = False need_save = True this_person_pic_folder = os.path.join( self.all_pic_data_folder, this_label + self.must_same_str) this_person_feature_folder = os.path.join( self.all_pic_feature_data_folder, this_label + self.must_same_str) elif this_id == self.must_be_not_same_id: this_label = self.new_person_str + str( self.current_new_person_id) self.current_new_person_id += 1 this_person_pic_folder = os.path.join(self.all_pic_data_folder, this_label) this_person_feature_folder = os.path.join( self.all_pic_feature_data_folder, this_label) need_add = True need_save = True elif this_id == self.maybe_same_id: this_person_pic_folder = os.path.join( self.all_pic_data_folder, this_label + self.maybe_same_str) this_person_feature_folder = os.path.join( self.all_pic_feature_data_folder, this_label + self.maybe_same_str) need_add = False # prob在灰度区域的不如入,其余情况加入 need_save = True else: log_file.write('\t'.join(map(str, ['error para :', this_id])) + '\n') if need_save: try: if not os.path.exists(this_person_pic_folder): os.makedirs(this_person_pic_folder) if not os.path.exists(this_person_feature_folder): os.makedirs(this_person_feature_folder) # 直接存储图片对应的特征, 同时保存图片文件 this_pic_feature_name = os.path.join( this_person_feature_folder, image_id + '.p') msgpack_numpy.dump(im_feature, open(this_pic_feature_name, 'wb')) this_pic_face_name = os.path.join(this_person_pic_folder, image_id + '.jpg') cv2.imwrite(this_pic_face_name, face_pic) except: traceback.print_exc() return self.unknown, 1.0, feature_str, False # 加入LSH Forest --- partial_fit if need_add: self.add_one_pic(im_feature, this_label) # 根据label和image_id可以存生成文件名,确定是否要存储文件[可以选择在服务器和本地同时存储] if this_id == self.same_pic_id or this_id == self.must_be_not_same_id or this_id == self.must_be_same_id: end = time.time() log_file.write('\t'.join( map(str, [ 'stat recognize_time :', (end - start), 'this_id :', self.trans_dic.get(this_id) ])) + '\n') log_file.close() need_save = True return this_label.replace(self.must_same_str, ''), str( dist_label_list[0][0]), str(feature_str), str(need_save) else: # 灰度区域,不显示人名 end = time.time() log_file.write( '\t'.join(map(str, ['stat gray_area :', (end - start)])) + '\n') log_file.close() return self.unknown, str( dist_label_list[0][0]), str(feature_str), str(False) except: traceback.print_exc() log_file.close() return self.unknown, str(100.0), str(feature_str), str(False)
def save_data(extract_func, data_path): feature, label = extract_func() msgpack_numpy.dump((feature, label), open(data_path, 'wb'))
def main(args): with tf.Graph().as_default(): with tf.Session() as sess: # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images paths, actual_issame = lfw.get_paths( os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext) # Load the model print('Model directory: %s' % args.model_dir) meta_file, ckpt_file = facenet.get_model_filenames( os.path.expanduser(args.model_dir)) print('Metagraph file: %s' % meta_file) print('Checkpoint file: %s' % ckpt_file) facenet.load_model(args.model_dir, meta_file, ckpt_file) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name( "input:0") embeddings = tf.get_default_graph().get_tensor_by_name( "embeddings:0") image_size = images_placeholder.get_shape()[1] embedding_size = embeddings.get_shape()[1] image_size = image_size.value # pdb.set_trace() # Run forward pass to calculate embeddings print('Runnning forward pass on LFW images') batch_size = args.lfw_batch_size nrof_images = len(paths) nrof_batches = int(math.ceil(1.0 * nrof_images / batch_size)) emb_array = np.zeros((nrof_images, embedding_size)) print('nrof_batches :{}'.format(nrof_batches)) all_time = 0 for i in range(nrof_batches): start_index = i * batch_size end_index = min((i + 1) * batch_size, nrof_images) paths_batch = paths[start_index:end_index] # pdb.set_trace() images = facenet.load_data(paths_batch, False, False, image_size) feed_dict = {images_placeholder: images} start = time() emb_array[start_index:end_index, :] = sess.run( embeddings, feed_dict=feed_dict) end = time() all_time += (end - start) print('index: {} time: {}'.format(i, (end - start))) # pdb.set_trace() print('all_time :', all_time) msgpack_numpy.dump((paths, emb_array, actual_issame), open('lfw_feature.p', 'wb')) tpr, fpr, accuracy, val, val_std, far = lfw.evaluate( emb_array, args.seed, actual_issame, nrof_folds=args.lfw_nrof_folds) print('Accuracy: %1.3f+-%1.3f' % (np.mean(accuracy), np.std(accuracy)))
def recognize_online_cluster(self, image, image_id): ''' :param image: 将得到的图片进行识别,加入的LSH Forest,根据距离计算proba(不同的距离对应不同的准确率,根据已有的dist计算阈值); 和已经设定的阈值判断是不是一个新出现的人,确定是原来已有的人,还是不确定是原来已有的人 :return: ''' start = time.time() need_add = False need_save = False current_day = get_current_day() log_file = open(os.path.join(log_dir, current_day+'.txt'), 'a') log_file.write('\t'.join(map(str, ["receive image", image_id, time.time()])) + '\n') feature_str = '' try: image = base64.decodestring(image) image = zlib.decompress(image) im = cv2.imdecode(np.fromstring(image, dtype=np.uint8), 1) log_file.write('\t'.join(map(str, ['shape :', im.shape[0], im.shape[1]])) + '\n') # 图片尺寸过滤 if im.shape[0] < size_threshold or im.shape[1] < size_threshold: log_file.write('\t'.join(map(str, ['stat recognize_time :', (time.time() - start), 'small_size'])) + '\n') log_file.close() return self.unknown, 1.0, feature_str, need_save # 清晰度过滤 blur_sign, blur_var = is_blur(cv2.resize(im, (96, 96))) if blur_sign: log_file.write('\t'.join(map(str, ['stat recognize_time :', (time.time() - start), 'blur_filter', blur_var])) + '\n') log_file.close() return self.unknown, 1.0, feature_str, need_save # 保存传过来的图片 # img_file = '/tmp/research_face/%s.jpg' %image_id time_slot = get_time_slot(image_id) if time_slot == None: time_slot = 'error' time_slot_dir = os.path.join(tmp_face_dir, time_slot) if not os.path.exists(time_slot_dir): os.makedirs(time_slot_dir) img_file = os.path.join(time_slot_dir, image_id+'.jpg') cv2.imwrite(img_file, im) except: traceback.print_exc() log_file.close() return self.unknown, 1.0, feature_str, need_save try: # 流程 : 找距离最近的图片 ; 计算prob ; 在线聚类 ; 加入LSH Forest result = self.extract_pic_feature(img_file) if result == None: log_file.write('\t'.join(map(str, ['stat not_find_face', 'time :', (time.time() - start)]))+'\n') log_file.close() return self.unknown, 1.0, feature_str, need_save face_pic, im_feature = result try: # nearest_sim_list的格式和dist_label_list的格式一样,这样可以将两个list合并,一起计算(这样不用考虑时间的因素) # 在识别出人名后将人名和feature放入到self.nearest nearest_sim_list = self.cal_nearest_sim(current_feature=im_feature) except: traceback.print_exc() nearest_sim_list = [] log_file.write('\t'.join(map(str, ['nearest_sim_list :', map(str, nearest_sim_list)])) + '\n') feature_str = base64.b64encode(msgpack_numpy.dumps(im_feature)) log_file.write('\t'.join(map(str, ['extract_feature_time :', (time.time() - start)]))+'\n') # 找距离最近的图片 --- 用LSH Forest 找出最近的10张图片,然后分别计算距离 tmp_list = self.find_k_neighbors_with_lsh(im_feature) nearest_sim_list.sort(key=lambda x: x[0], reverse=True) nearest_sim_list.extend(tmp_list) dist_label_list = nearest_sim_list[:] # 计算 log_file.write('\t'.join(map(str, ['dist_label_list :', map(str, dist_label_list)])) + '\n') if dist_label_list == None: this_id = self.must_be_not_same_id this_label = self.new_person_str + str(self.current_new_person_id) else: # 计算prob --- 根据距离计算prob this_id, this_label = self.evaluate_result(dist_label_list) # 不管概率, 都要将最新的一张图片加入到self.nearest self.nearest.append((this_label, im_feature)) log_file.write('\t'.join(map(str, ['self.nearest :', map(str, self.nearest)])) + '\n') # 在线聚类 --- 根据dist确定是重新增加一个人还是加入到已有的人中 if this_id == self.same_pic_id: need_add = False elif this_id == self.must_be_same_id: need_add = False need_save = True this_person_pic_folder = os.path.join(self.all_pic_data_folder, this_label+self.must_same_str) this_person_feature_folder = os.path.join(self.all_pic_feature_data_folder, this_label+self.must_same_str) elif this_id == self.must_be_not_same_id: this_label = self.new_person_str + str(self.current_new_person_id) self.current_new_person_id += 1 this_person_pic_folder = os.path.join(self.all_pic_data_folder, this_label) this_person_feature_folder = os.path.join(self.all_pic_feature_data_folder, this_label) need_add = True need_save = True elif this_id == self.maybe_same_id: this_person_pic_folder = os.path.join(self.all_pic_data_folder, this_label + self.maybe_same_str) this_person_feature_folder = os.path.join(self.all_pic_feature_data_folder, this_label + self.maybe_same_str) need_add = False # prob在灰度区域的不如入,其余情况加入 need_save = True else: log_file.write('\t'.join(map(str, ['error para :', this_id]))+'\n') if need_save: try: if not os.path.exists(this_person_pic_folder): os.makedirs(this_person_pic_folder) if not os.path.exists(this_person_feature_folder): os.makedirs(this_person_feature_folder) # 直接存储图片对应的特征, 同时保存图片文件 this_pic_feature_name = os.path.join(this_person_feature_folder, image_id+'.p') msgpack_numpy.dump(im_feature, open(this_pic_feature_name, 'wb')) this_pic_face_name = os.path.join(this_person_pic_folder, image_id+'.jpg') cv2.imwrite(this_pic_face_name, face_pic) except: traceback.print_exc() return self.unknown, 1.0, feature_str, False # 加入LSH Forest --- partial_fit if need_add: self.add_one_pic(im_feature, this_label) # 根据label和image_id可以存生成文件名,确定是否要存储文件[可以选择在服务器和本地同时存储] if this_id == self.same_pic_id or this_id == self.must_be_not_same_id or this_id == self.must_be_same_id: end = time.time() log_file.write('\t'.join(map(str, ['stat recognize_time :',(end - start), 'this_id :', self.trans_dic.get(this_id)]))+'\n') log_file.close() need_save = True return this_label.replace(self.must_same_str, ''), str(dist_label_list[0][0]), str(feature_str), str(need_save) else: # 灰度区域,不显示人名 end = time.time() log_file.write('\t'.join(map(str, ['stat gray_area :',(end - start)]))+'\n') log_file.close() return self.unknown, str(dist_label_list[0][0]), str(feature_str), str(False) except: traceback.print_exc() log_file.close() return self.unknown, str(100.0), str(feature_str), str(False)
def main(args): with tf.Graph().as_default(): with tf.Session() as sess: # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images paths, actual_issame = lfw.get_paths(os.path.expanduser(args.lfw_dir), pairs, args.lfw_file_ext) # Load the model print('Model directory: %s' % args.model_dir) meta_file, ckpt_file = facenet.get_model_filenames(os.path.expanduser(args.model_dir)) print('Metagraph file: %s' % meta_file) print('Checkpoint file: %s' % ckpt_file) facenet.load_model(args.model_dir, meta_file, ckpt_file) # Get input and output tensors images_placeholder = tf.get_default_graph().get_tensor_by_name("input:0") embeddings = tf.get_default_graph().get_tensor_by_name("embeddings:0") image_size = images_placeholder.get_shape()[1] embedding_size = embeddings.get_shape()[1] image_size = image_size.value # pdb.set_trace() # Run forward pass to calculate embeddings print('Runnning forward pass on LFW images') batch_size = args.lfw_batch_size nrof_images = len(paths) nrof_batches = int(math.ceil(1.0*nrof_images / batch_size)) emb_array = np.zeros((nrof_images, embedding_size)) print('nrof_batches :{}'.format(nrof_batches)) all_time = 0 for i in range(nrof_batches): start_index = i*batch_size end_index = min((i+1)*batch_size, nrof_images) paths_batch = paths[start_index:end_index] # pdb.set_trace() images = facenet.load_data(paths_batch, False, False, image_size) feed_dict = {images_placeholder:images} start = time() emb_array[start_index:end_index,:] = sess.run(embeddings, feed_dict=feed_dict) end = time() all_time += (end - start) print('index: {} time: {}'.format(i, (end-start))) # pdb.set_trace() print('all_time :', all_time) msgpack_numpy.dump((paths, emb_array, actual_issame), open('lfw_feature.p', 'wb')) tpr, fpr, accuracy, val, val_std, far = lfw.evaluate(emb_array, args.seed, actual_issame, nrof_folds=args.lfw_nrof_folds) print('Accuracy: %1.3f+-%1.3f' % (np.mean(accuracy), np.std(accuracy)))
all_num = 0 for root_dir, dir_list, pic_list in os.walk(folder): if len(pic_list) >= 10: pic_num_list.append((root_dir, len(pic_list))) all_num += len(pic_list) print len(pic_num_list), all_num current_person_index += 1 if __name__ == '__main__': pass # stat(folder='/data/liubo/face/baihe/person_mtcnn_160') # # new_sample_list = create_sample_list('/data/liubo/face/baihe/person_dlib_face') train_sample_list, valid_sample_list = train_test_split(new_sample_list, test_size=0.1) print len(train_sample_list), len(valid_sample_list) msgpack_numpy.dump((train_sample_list, valid_sample_list), open('/data/liubo/face/baihe/person_dlib_face_sample_list_30.p', 'wb')) # train_sample_list, valid_sample_list = create_train_valid_list('/data/liubo/face/baihe/baihe_person_face_align') # msgpack_numpy.dump((train_sample_list, valid_sample_list), # open('/data/liubo/face/baihe/face_align_train_valid_sample_list_filter.p', 'wb'))
query_list = shanghai_1130_query_keyword_dic.keys() all_dist = [] start1 = time() # # 用于输出数据, hadoop计算 # new_dic = {} # for query in query_list: # query_search_list = shanghai_1130_query_keyword_dic.get(query) # keyword_list_vector, keyword_list_freq = create_wmd_data(query_search_list, word2vec_model) # new_dic[query[0]] = (query_search_list, keyword_list_vector, keyword_list_freq) # cPickle.dump(new_dic, open('beijing_1130_query_keyword_dic.p', 'wb')) # 直接计算 query_dist_dic = {} for index1, query1 in enumerate(query_list): for index2, query2 in enumerate(query_list): if index2 > index1: keyword_list1 = shanghai_1130_query_keyword_dic[query_list[index1]] keyword_list2 = shanghai_1130_query_keyword_dic[query_list[index2]] query_distance = cal_query_distance(keyword_list1, keyword_list2, word2vec_model) all_dist.append(query_distance) query_dist_dic[(query_list[index1][0], query_list[index2][0])] = query_distance # print index1, index2, query_distance print index1, (time() - start1) start1 = time() print len(all_dist) msgpack_numpy.dump((query_list, all_dist), open('/data/liubo/hotspot/query_search/all_query_dist_beijing_{}.p'.format(day), 'wb')) cPickle.dump(query_dist_dic, open('/data/liubo/hotspot/query_search/beijing_query_dist_dic_{}.p'.format(day), 'wb')) end = time() print 'time :', (end - start)
def cb(output): action, value = output.result() self.send_queue.put([ident, dump(action)]) self.client[ident].append( TransitionExperience(frame, action, None, value))
def train_valid_verif_model(): all_data = [] all_label = [] all_pic_path_list = [] count = 0 path_feature_dic = msgpack.load(open('research_feature.p', 'rb')) not_in = 0 not_in_pair = {} for line in open(pair_file): if count % 100 == 0: print count count += 1 tmp = line.rstrip().split() if len(tmp) == 3: path1 = tmp[0] path2 = tmp[1] label = int(tmp[2]) if path1 in path_feature_dic and path2 in path_feature_dic: try: feature1 = np.asarray(path_feature_dic.get(path1)) feature2 = np.asarray(path_feature_dic.get(path2)) if len(feature1) < 100 or len(feature2) < 100: print path1, path2 not_in += 1 not_in_pair[(path1, path2)] = 1 continue feature1 = np.reshape(feature1, newshape=(1, feature1.shape[0])) feature2 = np.reshape(feature2, newshape=(1, feature2.shape[0])) predicts = pw.cosine_similarity(feature1, feature2) all_data.append(predicts) all_label.append(label) all_pic_path_list.append((path1, path2)) except: traceback.print_exc() # pdb.set_trace() else: traceback.print_exc() # pdb.set_trace() msgpack_numpy.dump((all_data, all_label, all_pic_path_list), open(feature_pack_file, 'wb')) (all_data, all_label, all_pic_path_list) = msgpack_numpy.load(open(feature_pack_file, 'rb')) all_data = np.asarray(all_data) all_data = np.reshape(all_data, newshape=(all_data.shape[0], all_data.shape[2])) all_label = np.asarray(all_label) print all_data.shape, all_label.shape kf = KFold(len(all_label), n_folds=10) all_acc = [] for (train, valid) in kf: train_data = all_data[train] valid_data = all_data[valid] train_label = all_label[train] valid_label = all_label[valid] clf = LinearSVC() clf.fit(train_data, train_label) acc = accuracy_score(valid_label, clf.predict(valid_data)) roc_auc = roc_auc_score(valid_label, clf.predict(valid_data)) all_acc.append(acc) print acc, roc_auc print 'mean_acc :', np.mean(all_acc) clf = LinearSVC() clf.fit(all_data, all_label) cPickle.dump(clf, open(verification_model_file, 'wb'))
def find_max_min(): # 同一个人里找相似度最小的, 不同人里找相似度最大的 lfw_feature_dic = msgpack_numpy.load(open(feature_pack_file, 'rb')) person_list = lfw_feature_dic.keys() same_person_score = [] same_person_score_pair_dic = {} # {score:[(path1,path2), ...,(path1,path2)]} no_same_person_score = [] no_same_person_score_pair_dic = {} # {score:[(path1,path2), ...,(path1,path2)]} heapq.heapify(same_person_score) pair_threshold = 3000 for person_index, person in enumerate(person_list): start = time() path_feature_list = lfw_feature_dic.get(person) # 找出该人里所有可能的pair --- score越小越好(同一个人最不相似的照片) # 每次将最大的score去掉,加入更小的score,所以在加入是score取负,这样堆顶就是原来score最大的值 length = len(path_feature_list) for index_i in range(length): for index_j in range(index_i, length): feature1, path1 = path_feature_list[index_i] feature2, path2 = path_feature_list[index_j] feature1 = np.reshape(feature1, newshape=(1, feature1.shape[0])) feature2 = np.reshape(feature2, newshape=(1, feature2.shape[0])) this_score = 0 - pw.cosine_similarity(feature1, feature2)[0][0] if len(same_person_score) > pair_threshold: top_item = same_person_score[0] if this_score < top_item: # 更加不相似,加入 heapq.heappop(same_person_score) heapq.heappush(same_person_score, this_score) # 删除原来的pair, 加入当前pair (同一个分数可能对应于多个pair) if top_item in same_person_score_pair_dic: same_person_score_pair_dic.pop(top_item) pair_list = same_person_score_pair_dic.get(this_score, []) pair_list.append((path1, path2)) same_person_score_pair_dic[this_score] = pair_list else: heapq.heappush(same_person_score, this_score) pair_list = same_person_score_pair_dic.get(this_score, []) pair_list.append((path1, path2)) same_person_score_pair_dic[this_score] = pair_list # 找出所有可能的不相似的pair for other_person_index, other_person in enumerate(person_list[person_index+1:], start=person_index+1): other_path_feature_list = lfw_feature_dic.get(other_person) if other_person == person: continue other_length = len(other_path_feature_list) for index_i in range(length): for index_j in range(other_length): feature1, path1 = path_feature_list[index_i] feature2, path2 = other_path_feature_list[index_j] feature1 = np.reshape(feature1, newshape=(1, feature1.shape[0])) feature2 = np.reshape(feature2, newshape=(1, feature2.shape[0])) this_score = pw.cosine_similarity(feature1, feature2)[0][0] if len(no_same_person_score) > pair_threshold: top_item = no_same_person_score[0] if this_score < top_item: # 更加相似, 加入 heapq.heappop(no_same_person_score) heapq.heappush(no_same_person_score, this_score) # 删除原来的pair, 加入当前pair (同一个分数可能对应于多个pair) if top_item in no_same_person_score_pair_dic: no_same_person_score_pair_dic.pop(top_item) pair_list = no_same_person_score_pair_dic.get(this_score, []) pair_list.append((path1, path2)) no_same_person_score_pair_dic[this_score] = pair_list else: heapq.heappush(no_same_person_score, this_score) pair_list = no_same_person_score_pair_dic.get(this_score, []) pair_list.append((path1, path2)) no_same_person_score_pair_dic[this_score] = pair_list end = time() print person_index, person, (end - start), length msgpack_numpy.dump((same_person_score_pair_dic, same_person_score, no_same_person_score_pair_dic, no_same_person_score), open(new_pair_pack_file, 'wb'))