def _first_level_segment_upload(segm_i): l = ds_segments_bounds[segm_i][0, 0] r = ds_segments_bounds[segm_i][-1, 1] segm_start, segm_end = np.searchsorted( sp_mz_int_buf[:, 1], (l, r)) # mz expected to be in column 1 segm = sp_mz_int_buf[segm_start:segm_end] return storage.put_cobject(msgpack.dumps(segm))
def upload_chunk(ch_i, storage): chunk_sp_inds = chunks[ch_i] # Get imzml_reader from COS because it's too big to include via pywren captured vars imzml_reader = pickle.loads( read_cloud_object_with_retry(storage, imzml_cobject)) n_spectra = sum(imzml_reader.mzLengths[sp_i] for sp_i in chunk_sp_inds) sp_mz_int_buf = np.zeros((n_spectra, 3), dtype=imzml_reader.mzPrecision) chunk_start = 0 for sp_i, mzs, ints in get_spectra(ibd_path, imzml_reader, chunk_sp_inds): chunk_end = chunk_start + len(mzs) sp_mz_int_buf[chunk_start:chunk_end, 0] = sp_id_to_idx[sp_i] sp_mz_int_buf[chunk_start:chunk_end, 1] = mzs sp_mz_int_buf[chunk_start:chunk_end, 2] = ints chunk_start = chunk_end by_mz = np.argsort(sp_mz_int_buf[:, 1]) sp_mz_int_buf = sp_mz_int_buf[by_mz] del by_mz chunk = msgpack.dumps(sp_mz_int_buf) size = sys.getsizeof(chunk) * (1 / 1024**2) logger.info(f'Uploading spectra chunk {ch_i} - %.2f MB' % size) chunk_cobject = storage.put_cobject(chunk) logger.info(f'Spectra chunk {ch_i} finished') return chunk_cobject
def merge_spectra_chunk_segments(segm_cobjects, id, storage): print(f'Merging segment {id} spectra chunks') def _merge(ch_i): segm_spectra_chunk = read_cloud_object_with_retry( storage, segm_cobjects[ch_i], msgpack.load) return segm_spectra_chunk with ThreadPoolExecutor(max_workers=128) as pool: segm = list(pool.map(_merge, range(len(segm_cobjects)))) segm = np.concatenate(segm) # Alternative in-place sorting (slower) : # segm.view(f'{ds_segm_dtype},{ds_segm_dtype},{ds_segm_dtype}').sort(order=['f1'], axis=0) segm = segm[segm[:, 1].argsort()] bounds_list = ds_segments_bounds[id] segms_len = [] segms_cobjects = [] for segm_j in range(len(bounds_list)): l, r = bounds_list[segm_j] segm_start, segm_end = np.searchsorted( segm[:, 1], (l, r)) # mz expected to be in column 1 sub_segm = segm[segm_start:segm_end] segms_len.append(len(sub_segm)) base_id = sum([len(bounds) for bounds in ds_segments_bounds[:id]]) segm_i = base_id + segm_j print(f'Storing dataset segment {segm_i}') segms_cobjects.append(storage.put_cobject(msgpack.dumps(sub_segm))) return segms_len, segms_cobjects
def store_formula_to_id_chunk(ch_i, ibm_cos): print(f'Storing formula_to_id dictionary chunk {ch_i}') start_id = (N_FORMULAS_SEGMENTS // N_FORMULA_TO_ID) * ch_i end_id = (N_FORMULAS_SEGMENTS // N_FORMULA_TO_ID) * (ch_i + 1) keys = [ f'{formulas_chunks_prefix}/{formulas_chunk}.msgpack' for formulas_chunk in range(start_id, end_id) ] def _get(key): formula_chunk = read_object_with_retry(ibm_cos, bucket, key, pd.read_msgpack) formula_to_id_chunk = dict( zip(formula_chunk.formula, formula_chunk.index)) return formula_to_id_chunk with ThreadPoolExecutor(max_workers=128) as pool: results = list(pool.map(_get, keys)) formula_to_id = {} for chunk_dict in results: formula_to_id.update(chunk_dict) ibm_cos.put_object(Bucket=bucket, Key=f'{formula_to_id_chunks_prefix}/{ch_i}.msgpack', Body=msgpack.dumps(formula_to_id))
def merge_spectra_chunk_segments(segm_i, ibm_cos): print(f'Merging segment {segm_i} spectra chunks') keys = list_keys(bucket, f'{ds_segments_prefix}/chunk/{segm_i}/', ibm_cos) def _merge(key): segm_spectra_chunk = read_object_with_retry(ibm_cos, bucket, key, msgpack.load) return segm_spectra_chunk with ThreadPoolExecutor(max_workers=128) as pool: segm = list(pool.map(_merge, keys)) segm = np.concatenate(segm) # Alternative in-place sorting (slower) : # segm.view(f'{segm_dtype},{segm_dtype},{segm_dtype}').sort(order=['f1'], axis=0) segm = segm[segm[:, 1].argsort()] clean_from_cos(None, bucket, f'{ds_segments_prefix}/chunk/{segm_i}/', ibm_cos) bounds_list = ds_segments_bounds[segm_i] segms_len = [] for segm_j in range(len(bounds_list)): l, r = bounds_list[segm_j] segm_start, segm_end = np.searchsorted(segm[:, 1], (l, r)) # mz expected to be in column 1 sub_segm = segm[segm_start:segm_end] segms_len.append(len(sub_segm)) base_id = sum([len(bounds) for bounds in ds_segments_bounds[:segm_i]]) id = base_id + segm_j print(f'Storing dataset segment {id}') ibm_cos.put_object(Bucket=bucket, Key=f'{ds_segments_prefix}/{id}.msgpack', Body=msgpack.dumps(sub_segm)) return segms_len
def convert_to_bytes(obj): """ Serialize an object. Returns: Implementation-dependent bytes-like object. """ return msgpack_numpy.dumps(obj, use_bin_type=True)
def _first_level_segment_upload(segm_i): l = ds_segments_bounds[segm_i][0, 0] r = ds_segments_bounds[segm_i][-1, 1] segm_start, segm_end = np.searchsorted(sp_mz_int_buf[:, 1], (l, r)) # mz expected to be in column 1 segm = sp_mz_int_buf[segm_start:segm_end] ibm_cos.put_object(Bucket=bucket, Key=f'{ds_segments_prefix}/chunk/{segm_i}/{id}.msgpack', Body=msgpack.dumps(segm))
def check_face_img(face_img): # pose_predict(姿势): [[pitch, yaw, roll]](Pitch: 俯仰; Yaw: 摇摆; Roll: 倾斜) ''' :param face_img: 人脸对应的矩阵 :param image_id: 图片id :return: 是否进行识别(False:不进行识别) ''' current_day = get_current_day() log_file = open(os.path.join(log_dir, current_day + '.txt'), 'a') face_img_str = base64.b64encode(msgpack_numpy.dumps(face_img)) request = { "request_type": 'check_pose', "face_img_str": face_img_str, "image_id": str(time.time()) } result = requests.post(angle_url, data=request) try: if result.status_code == 200: pose_predict = json.loads(result.content)["pose_predict"] if not pose_predict: # 加载失败 log_file.write('\t'.join(map(str, ['pose filter request'])) + '\n') log_file.close() return False else: pose_predict = msgpack_numpy.loads( base64.b64decode(pose_predict)) if pose_predict == None: log_file.write( '\t'.join(map(str, ['pose filter detect'])) + '\n') log_file.close() return False pitch, yaw, roll = pose_predict[0] if math.fabs(pitch) < pitch_threshold and math.fabs( yaw) < yaw_threshold and math.fabs( roll) < roll_threshold: log_file.write('\t'.join( map(str, ['pose not filter', str(pose_predict[0])])) + '\n') log_file.close() return True else: log_file.write('\t'.join( map(str, ['pose filter threshold', str(pose_predict[0])])) + '\n') log_file.close() return False else: return False except: traceback.print_exc() log_file.close() return False
def _upload_chunk(ch_i, sp_mz_int_buf): chunk = msgpack.dumps(sp_mz_int_buf) key = f'{input_data["ds_chunks"]}/{ch_i}.msgpack' size = sys.getsizeof(chunk) * (1 / 1024 ** 2) logger.info(f'Uploading spectra chunk {ch_i} - %.2f MB' % size) cos_client.put_object(Bucket=config["storage"]["ds_bucket"], Key=key, Body=chunk) logger.info(f'Spectra chunk {ch_i} finished') return key
def check_face_img(self, face_img, image_id): # 计算角度 ''' :param face_img: 人脸对应的矩阵 :param image_id: 图片id :return: 是否进行识别(False:不进行识别) ''' # 姿势检测 current_day = get_current_day() log_file = open(os.path.join(log_dir, current_day + '.txt'), 'a') face_img_str = base64.b64encode(msgpack_numpy.dumps(face_img)) request = { "request_type": 'check_pose', "face_img_str": face_img_str, "image_id": image_id, } url = "http://%s:%d/" % (check_ip, check_port) result = image_request(request, url) try: pose_predict = json.loads(result)["pose_predict"] if not pose_predict: # 加载失败 log_file.write( '\t'.join(map(str, [image_id, 'pose filter request'])) + '\n') log_file.close() return False else: pose_predict = msgpack_numpy.loads( base64.b64decode(pose_predict)) if pose_predict == None: log_file.write( '\t'.join(map(str, [image_id, 'pose filter detect'])) + '\n') log_file.close() return False pitch, yaw, roll = pose_predict[0] if math.fabs(pitch) < self.pitch_threshold and \ math.fabs(yaw) < self.yaw_threshold and \ math.fabs(roll) < self.roll_threshold: log_file.close() return True else: log_file.write('\t'.join( map(str, [image_id, 'pose filter threshold'])) + '\n') log_file.close() return False except: traceback.print_exc() log_file.close() return False
def get_all_img_feature(): feature_result_file = '/tmp/annotate_light_cnn_feature.p' f = open(feature_result_file, 'w') folder = '/tmp/all_images' person_list = os.listdir(folder) for person in person_list: person_path = os.path.join(folder, person) pic_list = os.listdir(person_path) for pic in pic_list: pic_path = os.path.join(person_path, pic) feature = np.asarray(extract_feature_from_file(pic_path)[0]) f.write(base64.b64encode(msgpack_numpy.dumps((feature, person_path)))+'\n') f.close()
def emit(self, topic, data): """ Emits data of some topic over TCP """ if isinstance(topic, str): topic = topic.encode() log.debug("Emitting message: %s %s" % (topic, data)) try: self.publisher.send_multipart([topic, dumps(data)]) except (NameError, AttributeError): pass # No dumps defined if topic == b'results': self.recorder_queue.put(data) elif topic == b'status' or topic == b'progress': self.monitor_queue.put((topic.decode(), data))
def get_pic_feature(pic_folder, result_file): f_result = open(result_file, 'w') pic_list = os.listdir(pic_folder) for pic in pic_list: try: pic_path = os.path.join(pic_folder, pic) feature = extract_feature_from_file(pic_path) # print feature, feature.shape f_result.write(pic_path.decode('gbk')+'\t'+base64.b64encode(msgpack_numpy.dumps(feature))+'\n') print pic_path.decode('gbk') except: traceback.print_exc() continue f_result.close()
def extract_all_feature(folder, result_file, extract_func): f = open(result_file, 'w') f.write('pic_path'+'\t'+'feature'+'\n') person_list = os.listdir(folder) for person in person_list: print person person_path = os.path.join(folder, person) pic_list = os.listdir(person_path) for pic in pic_list: pic_path = os.path.join(person_path, pic) # feature = extract_feature_from_file(pic_path) feature = extract_func(pic_path) feature_str = base64.b64encode(msgpack_numpy.dumps(feature)) f.write(pic_path+'\t'+feature_str+'\n') f.close()
def get_all_img_feature(): feature_result_file = '/tmp/annotate_light_cnn_feature.p' f = open(feature_result_file, 'w') folder = '/tmp/all_images' person_list = os.listdir(folder) for person in person_list: person_path = os.path.join(folder, person) pic_list = os.listdir(person_path) for pic in pic_list: pic_path = os.path.join(person_path, pic) feature = np.asarray(extract_feature_from_file(pic_path)[0]) f.write( base64.b64encode(msgpack_numpy.dumps((feature, person_path))) + '\n') f.close()
def get_pic_feature(pic_folder, result_file): f_result = open(result_file, 'w') pic_list = os.listdir(pic_folder) for pic in pic_list: try: pic_path = os.path.join(pic_folder, pic) feature = extract_feature_from_file(pic_path) # print feature, feature.shape f_result.write( pic_path.decode('gbk') + '\t' + base64.b64encode(msgpack_numpy.dumps(feature)) + '\n') print pic_path.decode('gbk') except: traceback.print_exc() continue f_result.close()
def store_formula_to_id(storage, bucket, formula_to_id_chunks_prefix, formulas_df): num_formulas = len(formulas_df) n_formula_to_id = int( math.ceil(num_formulas * 200 / (FORMULA_TO_ID_CHUNK_MB * 1024**2))) for ch_i in range(n_formula_to_id): print(f'Storing formula_to_id dictionary chunk {ch_i}') start_idx = num_formulas * ch_i // n_formula_to_id end_idx = num_formulas * (ch_i + 1) // n_formula_to_id formula_to_id = formulas_df.iloc[start_idx:end_idx].formula.to_dict() storage.put_object(Bucket=bucket, Key=f'{formula_to_id_chunks_prefix}/{ch_i}.msgpack', Body=msgpack.dumps(formula_to_id)) return n_formula_to_id
def valid_one_pic_pose(face_img, image_id): face_img_str = base64.b64encode(msgpack_numpy.dumps(face_img)) request = { "request_type": 'check_pose', "face_img_str": face_img_str, "image_id": image_id, } requestPOST = urllib2.Request(data=urllib.urlencode(request), url="http://10.160.164.26:%d/" % check_port) requestPOST.get_method = lambda: "POST" try: s = urllib2.urlopen(requestPOST).read() except urllib2.HTTPError, e: print e.code
def post(self): request_type = self.get_body_argument('request_type') if request_type == 'check_pose': try: image_id = self.get_body_argument("image_id") face_img_str = self.get_body_argument("face_img_str") print "receive image", image_id, time.time() face_img = msgpack_numpy.loads(base64.b64decode(face_img_str)) start = time.time() pose_predict = angle_calculate_server.calculate_angle(face_img, image_id) end = time.time() pose_predict = base64.b64encode(msgpack_numpy.dumps(pose_predict)) print 'pose predict time :', (end - start) self.write(json.dumps({"pose_predict": pose_predict})) except: traceback.print_exc() return
def check_face_img(self, face_img, image_id): # 计算角度 ''' :param face_img: 人脸对应的矩阵 :param image_id: 图片id :return: 是否进行识别(False:不进行识别) ''' # 姿势检测 current_day = get_current_day() log_file = open(os.path.join(log_dir, current_day+'.txt'), 'a') face_img_str = base64.b64encode(msgpack_numpy.dumps(face_img)) request = { "request_type": 'check_pose', "face_img_str": face_img_str, "image_id": image_id, } url = "http://%s:%d/" % (check_ip, check_port) result = image_request(request, url) try: pose_predict = json.loads(result)["pose_predict"] if not pose_predict: # 加载失败 log_file.write('\t'.join(map(str, [image_id, 'pose filter request'])) + '\n') log_file.close() return False else: pose_predict = msgpack_numpy.loads(base64.b64decode(pose_predict)) if pose_predict == None: log_file.write('\t'.join(map(str, [image_id, 'pose filter detect'])) + '\n') log_file.close() return False pitch, yaw, roll = pose_predict[0] if math.fabs(pitch) < self.pitch_threshold and \ math.fabs(yaw) < self.yaw_threshold and \ math.fabs(roll) < self.roll_threshold: log_file.close() return True else: log_file.write('\t'.join(map(str, [image_id, 'pose filter threshold'])) + '\n') log_file.close() return False except: traceback.print_exc() log_file.close() return False
def check_face_img(face_img): # pose_predict(姿势): [[pitch, yaw, roll]](Pitch: 俯仰; Yaw: 摇摆; Roll: 倾斜) ''' :param face_img: 人脸对应的矩阵 :param image_id: 图片id :return: 是否进行识别(False:不进行识别) ''' current_day = get_current_day() log_file = open(os.path.join(log_dir, current_day + '.txt'), 'a') face_img_str = base64.b64encode(msgpack_numpy.dumps(face_img)) request = {"request_type": 'check_pose', "face_img_str": face_img_str, "image_id": str(time.time())} result = requests.post(angle_url, data=request) try: if result.status_code == 200: pose_predict = json.loads(result.content)["pose_predict"] if not pose_predict: # 加载失败 log_file.write('\t'.join(map(str, ['pose filter request'])) + '\n') log_file.close() return False else: pose_predict = msgpack_numpy.loads(base64.b64decode(pose_predict)) if pose_predict == None: log_file.write('\t'.join(map(str, ['pose filter detect'])) + '\n') log_file.close() return False pitch, yaw, roll = pose_predict[0] if math.fabs(pitch) < pitch_threshold and math.fabs(yaw) < yaw_threshold and math.fabs(roll) < roll_threshold: log_file.write('\t'.join(map(str, ['pose not filter', str(pose_predict[0])])) + '\n') log_file.close() return True else: log_file.write('\t'.join(map(str, ['pose filter threshold', str(pose_predict[0])])) + '\n') log_file.close() return False else: return False except: traceback.print_exc() log_file.close() return False
def valid_one_pic_pose(face_img, image_id): face_img_str = base64.b64encode(msgpack_numpy.dumps(face_img)) request = { "request_type": 'check_pose', "face_img_str": face_img_str, "image_id": image_id, } requestPOST = urllib2.Request( data=urllib.urlencode(request), url="http://10.160.164.26:%d/" % check_port ) requestPOST.get_method = lambda: "POST" try: s = urllib2.urlopen(requestPOST).read() except urllib2.HTTPError, e: print e.code
def post(self): request_type = self.get_body_argument('request_type') if request_type == 'check_pose': try: image_id = self.get_body_argument("image_id") face_img_str = self.get_body_argument("face_img_str") print "receive image", image_id, time.time() face_img = msgpack_numpy.loads(base64.b64decode(face_img_str)) start = time.time() pose_predict = angle_calculate_server.calculate_angle( face_img, image_id) end = time.time() pose_predict = base64.b64encode( msgpack_numpy.dumps(pose_predict)) print 'pose predict time :', (end - start) self.write(json.dumps({"pose_predict": pose_predict})) except: traceback.print_exc() return
def recognize_one_feature(self, im_feature, image_id): ''' 根据特征确定label :param image_id : 大图的文件名+face_id(第几个人脸) --- 方便定位 ''' start = time.time() feature_str = base64.b64encode(msgpack_numpy.dumps(im_feature)) # im_feature = msgpack_numpy.loads(base64.b64decode(feature_str)) current_day = get_current_day() log_file = open(os.path.join(self.log_dir, current_day + '.txt'), 'a') current_time = get_current_time() log_file.write('\t'.join(map(str, [current_time, "receive image", image_id])) + '\n') try: # 流程 : 找距离最近的图片 ; 计算prob ; 在线聚类 ; 加入LSH Forest try: current_time = float(image_id) nearest_sim_list = self.cal_nearest_sim(current_time=current_time, current_feature=im_feature) # print 'current_time :', current_time, 'nearest_sim_list :', nearest_sim_list except: traceback.print_exc() nearest_sim_list = [] # 找距离最近的图片 --- 用LSH Forest 找出最近的10张图片,然后分别计算距离 dist_label_list = self.find_k_neighbors_with_lsh(im_feature) dist_label_list.extend(nearest_sim_list) dist_label_list = self.filter_result(dist_label_list) dist_label_list.sort(key=lambda x: x[0], reverse=True) # 计算 if dist_label_list == None: # 不考虑new_person的情况,小于阈值的都判断为new_person this_id = self.must_be_not_same_id this_label = 'new_person' # this_id = self.must_be_not_same_id # this_label = self.new_person_str + str(self.current_new_person_id) else: # 计算prob --- 根据距离计算prob this_id, this_label = self.evaluate_result(dist_label_list) # 在线聚类 --- 根据dist确定是重新增加一个人还是加入到已有的人中 if dist_label_list != None and len(dist_label_list) > 0: current_time = get_current_time() log_file.write('\t'.join(map(str, [current_time, 'dist_label_list :', map(str, dist_label_list)])) + '\n') # need_add 决定是否加入LSHForest ; need_save决定是否存入数据库 if this_id == self.same_pic_id: need_add = False need_save = True elif this_id == self.must_be_same_id: need_add = False need_save = True elif this_id == self.must_be_not_same_id: # 现在的版本不用加入新人, 不能识别的全部返回new_person this_label = 'new_person' need_save = True need_add = False # this_label = self.new_person_str + str(self.current_new_person_id) # self.current_new_person_id += 1 # need_add = True # need_save = True elif this_id == self.maybe_same_id: need_add = False need_save = False else: current_time = get_current_time() log_file.write('\t'.join(map(str, [current_time, 'error para :', this_id])) + '\n') return self.unknown, str(self.max_dist_threshold), feature_str, str(False) self.nearest.append((this_label, im_feature, image_id)) # 现在不在增加new_person # # 加入LSH Forest --- partial_fit # if need_add: # # 只将新人的图片加入LSHForest并保存到文件 # self.add_one_pic(im_feature, this_label) # write_start = time.time() # tmp_file = open(self.all_feature_label_file, 'a') # tmp_file.write(base64.b64encode(msgpack_numpy.dumps((im_feature, this_label)))+'\n') # tmp_file.close() # print 'write time :', (time.time() - write_start) # # 根据label和image_id可以存生成文件名,确定是否要存储文件[可以选择在服务器和本地同时存储] # 统计有多少图片在gray area log_file.write('\t'.join(map(str, ['stat', 'recognize_id', self.trans_dic[this_id], 'recog time :', (time.time() - start)])) + '\n') log_file.close() if this_id == self.same_pic_id or this_id == self.must_be_not_same_id or this_id == self.must_be_same_id: if this_label == None or dist_label_list == None: # 数据库里可能一个人也没有, 这时this_label = None return self.unknown, str(self.max_dist_threshold), feature_str, str(False) else: return this_label.replace(self.must_same_str, ''), str(dist_label_list[0][0]), feature_str, str(need_save) else: # 灰度区域,不显示人名 # return this_label.replace(self.maybe_same_str, ''), tr(dist_label_list[0][0]), str(has_save_num), str(need_add) return self.unknown, str(dist_label_list[0][0]), feature_str, str(need_save) except: traceback.print_exc() log_file.close() return self.unknown, str(self.max_dist_threshold), feature_str, str(False)
def recognize_online_cluster(self, image, image_id): ''' :param image: 将得到的图片进行识别,加入的LSH Forest,根据距离计算proba(不同的距离对应不同的准确率,根据已有的dist计算阈值); 和已经设定的阈值判断是不是一个新出现的人,确定是原来已有的人,还是不确定是原来已有的人 :return: ''' start = time.time() need_add = False need_save = False current_day = get_current_day() log_file = open(os.path.join(log_dir, current_day+'.txt'), 'a') log_file.write('\t'.join(map(str, ["receive image", image_id, time.time()])) + '\n') feature_str = '' try: image = base64.decodestring(image) image = zlib.decompress(image) im = cv2.imdecode(np.fromstring(image, dtype=np.uint8), 1) log_file.write('\t'.join(map(str, ['shape :', im.shape[0], im.shape[1]])) + '\n') # 图片尺寸过滤 if im.shape[0] < size_threshold or im.shape[1] < size_threshold: log_file.write('\t'.join(map(str, ['stat recognize_time :', (time.time() - start), 'small_size'])) + '\n') log_file.close() return self.unknown, 1.0, feature_str, need_save # 清晰度过滤 blur_sign, blur_var = is_blur(cv2.resize(im, (96, 96))) if blur_sign: log_file.write('\t'.join(map(str, ['stat recognize_time :', (time.time() - start), 'blur_filter', blur_var])) + '\n') log_file.close() return self.unknown, 1.0, feature_str, need_save # 保存传过来的图片 # img_file = '/tmp/research_face/%s.jpg' %image_id time_slot = get_time_slot(image_id) if time_slot == None: time_slot = 'error' time_slot_dir = os.path.join(tmp_face_dir, time_slot) if not os.path.exists(time_slot_dir): os.makedirs(time_slot_dir) img_file = os.path.join(time_slot_dir, image_id+'.jpg') cv2.imwrite(img_file, im) except: traceback.print_exc() log_file.close() return self.unknown, 1.0, feature_str, need_save try: # 流程 : 找距离最近的图片 ; 计算prob ; 在线聚类 ; 加入LSH Forest result = self.extract_pic_feature(img_file) if result == None: log_file.write('\t'.join(map(str, ['stat not_find_face', 'time :', (time.time() - start)]))+'\n') log_file.close() return self.unknown, 1.0, feature_str, need_save face_pic, im_feature = result try: # nearest_sim_list的格式和dist_label_list的格式一样,这样可以将两个list合并,一起计算(这样不用考虑时间的因素) # 在识别出人名后将人名和feature放入到self.nearest nearest_sim_list = self.cal_nearest_sim(current_feature=im_feature) except: traceback.print_exc() nearest_sim_list = [] log_file.write('\t'.join(map(str, ['nearest_sim_list :', map(str, nearest_sim_list)])) + '\n') feature_str = base64.b64encode(msgpack_numpy.dumps(im_feature)) log_file.write('\t'.join(map(str, ['extract_feature_time :', (time.time() - start)]))+'\n') # 找距离最近的图片 --- 用LSH Forest 找出最近的10张图片,然后分别计算距离 tmp_list = self.find_k_neighbors_with_lsh(im_feature) nearest_sim_list.sort(key=lambda x: x[0], reverse=True) nearest_sim_list.extend(tmp_list) dist_label_list = nearest_sim_list[:] # 计算 log_file.write('\t'.join(map(str, ['dist_label_list :', map(str, dist_label_list)])) + '\n') if dist_label_list == None: this_id = self.must_be_not_same_id this_label = self.new_person_str + str(self.current_new_person_id) else: # 计算prob --- 根据距离计算prob this_id, this_label = self.evaluate_result(dist_label_list) # 不管概率, 都要将最新的一张图片加入到self.nearest self.nearest.append((this_label, im_feature)) log_file.write('\t'.join(map(str, ['self.nearest :', map(str, self.nearest)])) + '\n') # 在线聚类 --- 根据dist确定是重新增加一个人还是加入到已有的人中 if this_id == self.same_pic_id: need_add = False elif this_id == self.must_be_same_id: need_add = False need_save = True this_person_pic_folder = os.path.join(self.all_pic_data_folder, this_label+self.must_same_str) this_person_feature_folder = os.path.join(self.all_pic_feature_data_folder, this_label+self.must_same_str) elif this_id == self.must_be_not_same_id: this_label = self.new_person_str + str(self.current_new_person_id) self.current_new_person_id += 1 this_person_pic_folder = os.path.join(self.all_pic_data_folder, this_label) this_person_feature_folder = os.path.join(self.all_pic_feature_data_folder, this_label) need_add = True need_save = True elif this_id == self.maybe_same_id: this_person_pic_folder = os.path.join(self.all_pic_data_folder, this_label + self.maybe_same_str) this_person_feature_folder = os.path.join(self.all_pic_feature_data_folder, this_label + self.maybe_same_str) need_add = False # prob在灰度区域的不如入,其余情况加入 need_save = True else: log_file.write('\t'.join(map(str, ['error para :', this_id]))+'\n') if need_save: try: if not os.path.exists(this_person_pic_folder): os.makedirs(this_person_pic_folder) if not os.path.exists(this_person_feature_folder): os.makedirs(this_person_feature_folder) # 直接存储图片对应的特征, 同时保存图片文件 this_pic_feature_name = os.path.join(this_person_feature_folder, image_id+'.p') msgpack_numpy.dump(im_feature, open(this_pic_feature_name, 'wb')) this_pic_face_name = os.path.join(this_person_pic_folder, image_id+'.jpg') cv2.imwrite(this_pic_face_name, face_pic) except: traceback.print_exc() return self.unknown, 1.0, feature_str, False # 加入LSH Forest --- partial_fit if need_add: self.add_one_pic(im_feature, this_label) # 根据label和image_id可以存生成文件名,确定是否要存储文件[可以选择在服务器和本地同时存储] if this_id == self.same_pic_id or this_id == self.must_be_not_same_id or this_id == self.must_be_same_id: end = time.time() log_file.write('\t'.join(map(str, ['stat recognize_time :',(end - start), 'this_id :', self.trans_dic.get(this_id)]))+'\n') log_file.close() need_save = True return this_label.replace(self.must_same_str, ''), str(dist_label_list[0][0]), str(feature_str), str(need_save) else: # 灰度区域,不显示人名 end = time.time() log_file.write('\t'.join(map(str, ['stat gray_area :',(end - start)]))+'\n') log_file.close() return self.unknown, str(dist_label_list[0][0]), str(feature_str), str(False) except: traceback.print_exc() log_file.close() return self.unknown, str(100.0), str(feature_str), str(False)
def recognize_online_cluster(self, image, image_id): ''' :param image: 将得到的图片进行识别,加入的LSH Forest,根据距离计算proba(不同的距离对应不同的准确率,根据已有的dist计算阈值); 和已经设定的阈值判断是不是一个新出现的人,确定是原来已有的人,还是不确定是原来已有的人 :return: ''' start = time.time() need_add = False need_save = False current_day = get_current_day() log_file = open(os.path.join(log_dir, current_day + '.txt'), 'a') log_file.write( '\t'.join(map(str, ["receive image", image_id, time.time()])) + '\n') feature_str = '' try: image = base64.decodestring(image) image = zlib.decompress(image) im = cv2.imdecode(np.fromstring(image, dtype=np.uint8), 1) log_file.write( '\t'.join(map(str, ['shape :', im.shape[0], im.shape[1]])) + '\n') # 图片尺寸过滤 if im.shape[0] < size_threshold or im.shape[1] < size_threshold: log_file.write('\t'.join( map(str, [ 'stat recognize_time :', (time.time() - start), 'small_size' ])) + '\n') log_file.close() return self.unknown, 1.0, feature_str, need_save # 清晰度过滤 blur_sign, blur_var = is_blur(cv2.resize(im, (96, 96))) if blur_sign: log_file.write('\t'.join( map(str, [ 'stat recognize_time :', (time.time() - start), 'blur_filter', blur_var ])) + '\n') log_file.close() return self.unknown, 1.0, feature_str, need_save # 保存传过来的图片 # img_file = '/tmp/research_face/%s.jpg' %image_id time_slot = get_time_slot(image_id) if time_slot == None: time_slot = 'error' time_slot_dir = os.path.join(tmp_face_dir, time_slot) if not os.path.exists(time_slot_dir): os.makedirs(time_slot_dir) img_file = os.path.join(time_slot_dir, image_id + '.jpg') cv2.imwrite(img_file, im) except: traceback.print_exc() log_file.close() return self.unknown, 1.0, feature_str, need_save try: # 流程 : 找距离最近的图片 ; 计算prob ; 在线聚类 ; 加入LSH Forest result = self.extract_pic_feature(img_file) if result == None: log_file.write('\t'.join( map(str, [ 'stat not_find_face', 'time :', (time.time() - start) ])) + '\n') log_file.close() return self.unknown, 1.0, feature_str, need_save face_pic, im_feature = result try: # nearest_sim_list的格式和dist_label_list的格式一样,这样可以将两个list合并,一起计算(这样不用考虑时间的因素) # 在识别出人名后将人名和feature放入到self.nearest nearest_sim_list = self.cal_nearest_sim( current_feature=im_feature) except: traceback.print_exc() nearest_sim_list = [] log_file.write('\t'.join( map(str, ['nearest_sim_list :', map(str, nearest_sim_list)])) + '\n') feature_str = base64.b64encode(msgpack_numpy.dumps(im_feature)) log_file.write('\t'.join( map(str, ['extract_feature_time :', (time.time() - start)])) + '\n') # 找距离最近的图片 --- 用LSH Forest 找出最近的10张图片,然后分别计算距离 tmp_list = self.find_k_neighbors_with_lsh(im_feature) nearest_sim_list.sort(key=lambda x: x[0], reverse=True) nearest_sim_list.extend(tmp_list) dist_label_list = nearest_sim_list[:] # 计算 log_file.write('\t'.join( map(str, ['dist_label_list :', map(str, dist_label_list)])) + '\n') if dist_label_list == None: this_id = self.must_be_not_same_id this_label = self.new_person_str + str( self.current_new_person_id) else: # 计算prob --- 根据距离计算prob this_id, this_label = self.evaluate_result(dist_label_list) # 不管概率, 都要将最新的一张图片加入到self.nearest self.nearest.append((this_label, im_feature)) log_file.write( '\t'.join(map(str, ['self.nearest :', map(str, self.nearest)])) + '\n') # 在线聚类 --- 根据dist确定是重新增加一个人还是加入到已有的人中 if this_id == self.same_pic_id: need_add = False elif this_id == self.must_be_same_id: need_add = False need_save = True this_person_pic_folder = os.path.join( self.all_pic_data_folder, this_label + self.must_same_str) this_person_feature_folder = os.path.join( self.all_pic_feature_data_folder, this_label + self.must_same_str) elif this_id == self.must_be_not_same_id: this_label = self.new_person_str + str( self.current_new_person_id) self.current_new_person_id += 1 this_person_pic_folder = os.path.join(self.all_pic_data_folder, this_label) this_person_feature_folder = os.path.join( self.all_pic_feature_data_folder, this_label) need_add = True need_save = True elif this_id == self.maybe_same_id: this_person_pic_folder = os.path.join( self.all_pic_data_folder, this_label + self.maybe_same_str) this_person_feature_folder = os.path.join( self.all_pic_feature_data_folder, this_label + self.maybe_same_str) need_add = False # prob在灰度区域的不如入,其余情况加入 need_save = True else: log_file.write('\t'.join(map(str, ['error para :', this_id])) + '\n') if need_save: try: if not os.path.exists(this_person_pic_folder): os.makedirs(this_person_pic_folder) if not os.path.exists(this_person_feature_folder): os.makedirs(this_person_feature_folder) # 直接存储图片对应的特征, 同时保存图片文件 this_pic_feature_name = os.path.join( this_person_feature_folder, image_id + '.p') msgpack_numpy.dump(im_feature, open(this_pic_feature_name, 'wb')) this_pic_face_name = os.path.join(this_person_pic_folder, image_id + '.jpg') cv2.imwrite(this_pic_face_name, face_pic) except: traceback.print_exc() return self.unknown, 1.0, feature_str, False # 加入LSH Forest --- partial_fit if need_add: self.add_one_pic(im_feature, this_label) # 根据label和image_id可以存生成文件名,确定是否要存储文件[可以选择在服务器和本地同时存储] if this_id == self.same_pic_id or this_id == self.must_be_not_same_id or this_id == self.must_be_same_id: end = time.time() log_file.write('\t'.join( map(str, [ 'stat recognize_time :', (end - start), 'this_id :', self.trans_dic.get(this_id) ])) + '\n') log_file.close() need_save = True return this_label.replace(self.must_same_str, ''), str( dist_label_list[0][0]), str(feature_str), str(need_save) else: # 灰度区域,不显示人名 end = time.time() log_file.write( '\t'.join(map(str, ['stat gray_area :', (end - start)])) + '\n') log_file.close() return self.unknown, str( dist_label_list[0][0]), str(feature_str), str(False) except: traceback.print_exc() log_file.close() return self.unknown, str(100.0), str(feature_str), str(False)