コード例 #1
0
 def _first_level_segment_upload(segm_i):
     l = ds_segments_bounds[segm_i][0, 0]
     r = ds_segments_bounds[segm_i][-1, 1]
     segm_start, segm_end = np.searchsorted(
         sp_mz_int_buf[:, 1], (l, r))  # mz expected to be in column 1
     segm = sp_mz_int_buf[segm_start:segm_end]
     return storage.put_cobject(msgpack.dumps(segm))
コード例 #2
0
    def upload_chunk(ch_i, storage):
        chunk_sp_inds = chunks[ch_i]
        # Get imzml_reader from COS because it's too big to include via pywren captured vars
        imzml_reader = pickle.loads(
            read_cloud_object_with_retry(storage, imzml_cobject))
        n_spectra = sum(imzml_reader.mzLengths[sp_i] for sp_i in chunk_sp_inds)
        sp_mz_int_buf = np.zeros((n_spectra, 3),
                                 dtype=imzml_reader.mzPrecision)

        chunk_start = 0
        for sp_i, mzs, ints in get_spectra(ibd_path, imzml_reader,
                                           chunk_sp_inds):
            chunk_end = chunk_start + len(mzs)
            sp_mz_int_buf[chunk_start:chunk_end, 0] = sp_id_to_idx[sp_i]
            sp_mz_int_buf[chunk_start:chunk_end, 1] = mzs
            sp_mz_int_buf[chunk_start:chunk_end, 2] = ints
            chunk_start = chunk_end

        by_mz = np.argsort(sp_mz_int_buf[:, 1])
        sp_mz_int_buf = sp_mz_int_buf[by_mz]
        del by_mz

        chunk = msgpack.dumps(sp_mz_int_buf)
        size = sys.getsizeof(chunk) * (1 / 1024**2)
        logger.info(f'Uploading spectra chunk {ch_i} - %.2f MB' % size)
        chunk_cobject = storage.put_cobject(chunk)
        logger.info(f'Spectra chunk {ch_i} finished')
        return chunk_cobject
コード例 #3
0
    def merge_spectra_chunk_segments(segm_cobjects, id, storage):
        print(f'Merging segment {id} spectra chunks')

        def _merge(ch_i):
            segm_spectra_chunk = read_cloud_object_with_retry(
                storage, segm_cobjects[ch_i], msgpack.load)
            return segm_spectra_chunk

        with ThreadPoolExecutor(max_workers=128) as pool:
            segm = list(pool.map(_merge, range(len(segm_cobjects))))

        segm = np.concatenate(segm)

        # Alternative in-place sorting (slower) :
        # segm.view(f'{ds_segm_dtype},{ds_segm_dtype},{ds_segm_dtype}').sort(order=['f1'], axis=0)
        segm = segm[segm[:, 1].argsort()]

        bounds_list = ds_segments_bounds[id]

        segms_len = []
        segms_cobjects = []
        for segm_j in range(len(bounds_list)):
            l, r = bounds_list[segm_j]
            segm_start, segm_end = np.searchsorted(
                segm[:, 1], (l, r))  # mz expected to be in column 1
            sub_segm = segm[segm_start:segm_end]
            segms_len.append(len(sub_segm))
            base_id = sum([len(bounds) for bounds in ds_segments_bounds[:id]])
            segm_i = base_id + segm_j
            print(f'Storing dataset segment {segm_i}')
            segms_cobjects.append(storage.put_cobject(msgpack.dumps(sub_segm)))

        return segms_len, segms_cobjects
コード例 #4
0
    def store_formula_to_id_chunk(ch_i, ibm_cos):
        print(f'Storing formula_to_id dictionary chunk {ch_i}')
        start_id = (N_FORMULAS_SEGMENTS // N_FORMULA_TO_ID) * ch_i
        end_id = (N_FORMULAS_SEGMENTS // N_FORMULA_TO_ID) * (ch_i + 1)
        keys = [
            f'{formulas_chunks_prefix}/{formulas_chunk}.msgpack'
            for formulas_chunk in range(start_id, end_id)
        ]

        def _get(key):
            formula_chunk = read_object_with_retry(ibm_cos, bucket, key,
                                                   pd.read_msgpack)
            formula_to_id_chunk = dict(
                zip(formula_chunk.formula, formula_chunk.index))
            return formula_to_id_chunk

        with ThreadPoolExecutor(max_workers=128) as pool:
            results = list(pool.map(_get, keys))

        formula_to_id = {}
        for chunk_dict in results:
            formula_to_id.update(chunk_dict)

        ibm_cos.put_object(Bucket=bucket,
                           Key=f'{formula_to_id_chunks_prefix}/{ch_i}.msgpack',
                           Body=msgpack.dumps(formula_to_id))
コード例 #5
0
    def merge_spectra_chunk_segments(segm_i, ibm_cos):
        print(f'Merging segment {segm_i} spectra chunks')

        keys = list_keys(bucket, f'{ds_segments_prefix}/chunk/{segm_i}/', ibm_cos)

        def _merge(key):
            segm_spectra_chunk = read_object_with_retry(ibm_cos, bucket, key, msgpack.load)
            return segm_spectra_chunk

        with ThreadPoolExecutor(max_workers=128) as pool:
            segm = list(pool.map(_merge, keys))

        segm = np.concatenate(segm)

        # Alternative in-place sorting (slower) :
        # segm.view(f'{segm_dtype},{segm_dtype},{segm_dtype}').sort(order=['f1'], axis=0)
        segm = segm[segm[:, 1].argsort()]

        clean_from_cos(None, bucket, f'{ds_segments_prefix}/chunk/{segm_i}/', ibm_cos)
        bounds_list = ds_segments_bounds[segm_i]

        segms_len = []
        for segm_j in range(len(bounds_list)):
            l, r = bounds_list[segm_j]
            segm_start, segm_end = np.searchsorted(segm[:, 1], (l, r))  # mz expected to be in column 1
            sub_segm = segm[segm_start:segm_end]
            segms_len.append(len(sub_segm))
            base_id = sum([len(bounds) for bounds in ds_segments_bounds[:segm_i]])
            id = base_id + segm_j
            print(f'Storing dataset segment {id}')
            ibm_cos.put_object(Bucket=bucket,
                               Key=f'{ds_segments_prefix}/{id}.msgpack',
                               Body=msgpack.dumps(sub_segm))

        return segms_len
コード例 #6
0
def convert_to_bytes(obj):
    """
    Serialize an object.

    Returns:
        Implementation-dependent bytes-like object.
    """
    return msgpack_numpy.dumps(obj, use_bin_type=True)
コード例 #7
0
 def _first_level_segment_upload(segm_i):
     l = ds_segments_bounds[segm_i][0, 0]
     r = ds_segments_bounds[segm_i][-1, 1]
     segm_start, segm_end = np.searchsorted(sp_mz_int_buf[:, 1], (l, r))  # mz expected to be in column 1
     segm = sp_mz_int_buf[segm_start:segm_end]
     ibm_cos.put_object(Bucket=bucket,
                        Key=f'{ds_segments_prefix}/chunk/{segm_i}/{id}.msgpack',
                        Body=msgpack.dumps(segm))
コード例 #8
0
def check_face_img(face_img):
    # pose_predict(姿势): [[pitch, yaw, roll]](Pitch: 俯仰; Yaw: 摇摆; Roll: 倾斜)
    '''
    :param face_img: 人脸对应的矩阵
    :param image_id: 图片id
    :return: 是否进行识别(False:不进行识别)
    '''

    current_day = get_current_day()
    log_file = open(os.path.join(log_dir, current_day + '.txt'), 'a')

    face_img_str = base64.b64encode(msgpack_numpy.dumps(face_img))
    request = {
        "request_type": 'check_pose',
        "face_img_str": face_img_str,
        "image_id": str(time.time())
    }
    result = requests.post(angle_url, data=request)

    try:
        if result.status_code == 200:
            pose_predict = json.loads(result.content)["pose_predict"]
            if not pose_predict:  # 加载失败
                log_file.write('\t'.join(map(str, ['pose filter request'])) +
                               '\n')
                log_file.close()
                return False
            else:
                pose_predict = msgpack_numpy.loads(
                    base64.b64decode(pose_predict))
                if pose_predict == None:
                    log_file.write(
                        '\t'.join(map(str, ['pose filter detect'])) + '\n')
                    log_file.close()
                    return False
                pitch, yaw, roll = pose_predict[0]
                if math.fabs(pitch) < pitch_threshold and math.fabs(
                        yaw) < yaw_threshold and math.fabs(
                            roll) < roll_threshold:
                    log_file.write('\t'.join(
                        map(str, ['pose not filter',
                                  str(pose_predict[0])])) + '\n')
                    log_file.close()
                    return True
                else:
                    log_file.write('\t'.join(
                        map(str,
                            ['pose filter threshold',
                             str(pose_predict[0])])) + '\n')
                    log_file.close()
                    return False
        else:
            return False
    except:
        traceback.print_exc()
        log_file.close()
        return False
コード例 #9
0
 def _upload_chunk(ch_i, sp_mz_int_buf):
     chunk = msgpack.dumps(sp_mz_int_buf)
     key = f'{input_data["ds_chunks"]}/{ch_i}.msgpack'
     size = sys.getsizeof(chunk) * (1 / 1024 ** 2)
     logger.info(f'Uploading spectra chunk {ch_i} - %.2f MB' % size)
     cos_client.put_object(Bucket=config["storage"]["ds_bucket"],
                           Key=key,
                           Body=chunk)
     logger.info(f'Spectra chunk {ch_i} finished')
     return key
コード例 #10
0
    def check_face_img(self, face_img, image_id):
        # 计算角度
        '''
        :param face_img: 人脸对应的矩阵
        :param image_id: 图片id
        :return: 是否进行识别(False:不进行识别)
        '''
        # 姿势检测

        current_day = get_current_day()
        log_file = open(os.path.join(log_dir, current_day + '.txt'), 'a')

        face_img_str = base64.b64encode(msgpack_numpy.dumps(face_img))
        request = {
            "request_type": 'check_pose',
            "face_img_str": face_img_str,
            "image_id": image_id,
        }
        url = "http://%s:%d/" % (check_ip, check_port)
        result = image_request(request, url)
        try:
            pose_predict = json.loads(result)["pose_predict"]
            if not pose_predict:  # 加载失败
                log_file.write(
                    '\t'.join(map(str, [image_id, 'pose filter request'])) +
                    '\n')
                log_file.close()
                return False
            else:
                pose_predict = msgpack_numpy.loads(
                    base64.b64decode(pose_predict))
                if pose_predict == None:
                    log_file.write(
                        '\t'.join(map(str, [image_id, 'pose filter detect'])) +
                        '\n')
                    log_file.close()
                    return False
                pitch, yaw, roll = pose_predict[0]
                if math.fabs(pitch) < self.pitch_threshold and \
                        math.fabs(yaw) < self.yaw_threshold and \
                        math.fabs(roll) < self.roll_threshold:
                    log_file.close()
                    return True
                else:
                    log_file.write('\t'.join(
                        map(str, [image_id, 'pose filter threshold'])) + '\n')
                    log_file.close()
                    return False
        except:
            traceback.print_exc()
            log_file.close()
            return False
コード例 #11
0
def get_all_img_feature():
    feature_result_file = '/tmp/annotate_light_cnn_feature.p'
    f = open(feature_result_file, 'w')
    folder = '/tmp/all_images'
    person_list = os.listdir(folder)
    for person in person_list:
        person_path = os.path.join(folder, person)
        pic_list = os.listdir(person_path)
        for pic in pic_list:
            pic_path = os.path.join(person_path, pic)
            feature = np.asarray(extract_feature_from_file(pic_path)[0])
            f.write(base64.b64encode(msgpack_numpy.dumps((feature, person_path)))+'\n')
    f.close()
コード例 #12
0
 def emit(self, topic, data):
     """ Emits data of some topic over TCP """
     if isinstance(topic, str):
         topic = topic.encode()
     log.debug("Emitting message: %s %s" % (topic, data))
     try:
         self.publisher.send_multipart([topic, dumps(data)])
     except (NameError, AttributeError):
         pass  # No dumps defined
     if topic == b'results':
         self.recorder_queue.put(data)
     elif topic == b'status' or topic == b'progress':
         self.monitor_queue.put((topic.decode(), data))
コード例 #13
0
 def emit(self, topic, data):
     """ Emits data of some topic over TCP """
     if isinstance(topic, str):
         topic = topic.encode()
     log.debug("Emitting message: %s %s" % (topic, data))
     try:
         self.publisher.send_multipart([topic, dumps(data)])
     except (NameError, AttributeError):
         pass # No dumps defined
     if topic == b'results':
         self.recorder_queue.put(data)
     elif topic == b'status' or topic == b'progress':
         self.monitor_queue.put((topic.decode(), data))
コード例 #14
0
def get_pic_feature(pic_folder, result_file):
    f_result = open(result_file, 'w')
    pic_list = os.listdir(pic_folder)
    for pic in pic_list:
        try:
            pic_path = os.path.join(pic_folder, pic)
            feature = extract_feature_from_file(pic_path)
            # print feature, feature.shape
            f_result.write(pic_path.decode('gbk')+'\t'+base64.b64encode(msgpack_numpy.dumps(feature))+'\n')
            print pic_path.decode('gbk')
        except:
            traceback.print_exc()
            continue
    f_result.close()
コード例 #15
0
def extract_all_feature(folder, result_file, extract_func):
    f = open(result_file, 'w')
    f.write('pic_path'+'\t'+'feature'+'\n')
    person_list = os.listdir(folder)
    for person in person_list:
        print person
        person_path = os.path.join(folder, person)
        pic_list = os.listdir(person_path)
        for pic in pic_list:
            pic_path = os.path.join(person_path, pic)
            # feature = extract_feature_from_file(pic_path)
            feature = extract_func(pic_path)
            feature_str = base64.b64encode(msgpack_numpy.dumps(feature))
            f.write(pic_path+'\t'+feature_str+'\n')
    f.close()
コード例 #16
0
def get_all_img_feature():
    feature_result_file = '/tmp/annotate_light_cnn_feature.p'
    f = open(feature_result_file, 'w')
    folder = '/tmp/all_images'
    person_list = os.listdir(folder)
    for person in person_list:
        person_path = os.path.join(folder, person)
        pic_list = os.listdir(person_path)
        for pic in pic_list:
            pic_path = os.path.join(person_path, pic)
            feature = np.asarray(extract_feature_from_file(pic_path)[0])
            f.write(
                base64.b64encode(msgpack_numpy.dumps((feature, person_path))) +
                '\n')
    f.close()
コード例 #17
0
def get_pic_feature(pic_folder, result_file):
    f_result = open(result_file, 'w')
    pic_list = os.listdir(pic_folder)
    for pic in pic_list:
        try:
            pic_path = os.path.join(pic_folder, pic)
            feature = extract_feature_from_file(pic_path)
            # print feature, feature.shape
            f_result.write(
                pic_path.decode('gbk') + '\t' +
                base64.b64encode(msgpack_numpy.dumps(feature)) + '\n')
            print pic_path.decode('gbk')
        except:
            traceback.print_exc()
            continue
    f_result.close()
コード例 #18
0
def store_formula_to_id(storage, bucket, formula_to_id_chunks_prefix,
                        formulas_df):
    num_formulas = len(formulas_df)
    n_formula_to_id = int(
        math.ceil(num_formulas * 200 / (FORMULA_TO_ID_CHUNK_MB * 1024**2)))
    for ch_i in range(n_formula_to_id):
        print(f'Storing formula_to_id dictionary chunk {ch_i}')
        start_idx = num_formulas * ch_i // n_formula_to_id
        end_idx = num_formulas * (ch_i + 1) // n_formula_to_id

        formula_to_id = formulas_df.iloc[start_idx:end_idx].formula.to_dict()

        storage.put_object(Bucket=bucket,
                           Key=f'{formula_to_id_chunks_prefix}/{ch_i}.msgpack',
                           Body=msgpack.dumps(formula_to_id))
    return n_formula_to_id
コード例 #19
0
def valid_one_pic_pose(face_img, image_id):

    face_img_str = base64.b64encode(msgpack_numpy.dumps(face_img))

    request = {
        "request_type": 'check_pose',
        "face_img_str": face_img_str,
        "image_id": image_id,
    }

    requestPOST = urllib2.Request(data=urllib.urlencode(request),
                                  url="http://10.160.164.26:%d/" % check_port)
    requestPOST.get_method = lambda: "POST"
    try:
        s = urllib2.urlopen(requestPOST).read()
    except urllib2.HTTPError, e:
        print e.code
コード例 #20
0
 def post(self):
     request_type = self.get_body_argument('request_type')
     if request_type == 'check_pose':
         try:
             image_id = self.get_body_argument("image_id")
             face_img_str = self.get_body_argument("face_img_str")
             print "receive image", image_id, time.time()
             face_img = msgpack_numpy.loads(base64.b64decode(face_img_str))
             start = time.time()
             pose_predict = angle_calculate_server.calculate_angle(face_img, image_id)
             end = time.time()
             pose_predict = base64.b64encode(msgpack_numpy.dumps(pose_predict))
             print 'pose predict time :', (end - start)
             self.write(json.dumps({"pose_predict": pose_predict}))
         except:
             traceback.print_exc()
             return
コード例 #21
0
    def check_face_img(self, face_img, image_id):
        # 计算角度
        '''
        :param face_img: 人脸对应的矩阵
        :param image_id: 图片id
        :return: 是否进行识别(False:不进行识别)
        '''
        # 姿势检测

        current_day = get_current_day()
        log_file = open(os.path.join(log_dir, current_day+'.txt'), 'a')

        face_img_str = base64.b64encode(msgpack_numpy.dumps(face_img))
        request = {
            "request_type": 'check_pose',
            "face_img_str": face_img_str,
            "image_id": image_id,
        }
        url = "http://%s:%d/" % (check_ip, check_port)
        result = image_request(request, url)
        try:
            pose_predict = json.loads(result)["pose_predict"]
            if not pose_predict:  # 加载失败
                log_file.write('\t'.join(map(str, [image_id, 'pose filter request'])) + '\n')
                log_file.close()
                return False
            else:
                pose_predict = msgpack_numpy.loads(base64.b64decode(pose_predict))
                if pose_predict == None:
                    log_file.write('\t'.join(map(str, [image_id, 'pose filter detect'])) + '\n')
                    log_file.close()
                    return False
                pitch, yaw, roll = pose_predict[0]
                if math.fabs(pitch) < self.pitch_threshold and \
                        math.fabs(yaw) < self.yaw_threshold and \
                        math.fabs(roll) < self.roll_threshold:
                    log_file.close()
                    return True
                else:
                    log_file.write('\t'.join(map(str, [image_id, 'pose filter threshold'])) + '\n')
                    log_file.close()
                    return False
        except:
            traceback.print_exc()
            log_file.close()
            return False
コード例 #22
0
def check_face_img(face_img):
    # pose_predict(姿势): [[pitch, yaw, roll]](Pitch: 俯仰; Yaw: 摇摆; Roll: 倾斜)
    '''
    :param face_img: 人脸对应的矩阵
    :param image_id: 图片id
    :return: 是否进行识别(False:不进行识别)
    '''

    current_day = get_current_day()
    log_file = open(os.path.join(log_dir, current_day + '.txt'), 'a')

    face_img_str = base64.b64encode(msgpack_numpy.dumps(face_img))
    request = {"request_type": 'check_pose', "face_img_str": face_img_str, "image_id": str(time.time())}
    result = requests.post(angle_url, data=request)

    try:
        if result.status_code == 200:
            pose_predict = json.loads(result.content)["pose_predict"]
            if not pose_predict:  # 加载失败
                log_file.write('\t'.join(map(str, ['pose filter request'])) + '\n')
                log_file.close()
                return False
            else:
                pose_predict = msgpack_numpy.loads(base64.b64decode(pose_predict))
                if pose_predict == None:
                    log_file.write('\t'.join(map(str, ['pose filter detect'])) + '\n')
                    log_file.close()
                    return False
                pitch, yaw, roll = pose_predict[0]
                if math.fabs(pitch) < pitch_threshold and math.fabs(yaw) < yaw_threshold and math.fabs(roll) < roll_threshold:
                    log_file.write('\t'.join(map(str, ['pose not filter', str(pose_predict[0])])) + '\n')
                    log_file.close()
                    return True
                else:
                    log_file.write('\t'.join(map(str, ['pose filter threshold', str(pose_predict[0])])) + '\n')
                    log_file.close()
                    return False
        else:
            return False
    except:
        traceback.print_exc()
        log_file.close()
        return False
コード例 #23
0
def valid_one_pic_pose(face_img, image_id):

    face_img_str = base64.b64encode(msgpack_numpy.dumps(face_img))

    request = {
        "request_type": 'check_pose',
        "face_img_str": face_img_str,
        "image_id": image_id,
    }

    requestPOST = urllib2.Request(
        data=urllib.urlencode(request),
        url="http://10.160.164.26:%d/" % check_port
    )
    requestPOST.get_method = lambda: "POST"
    try:
        s = urllib2.urlopen(requestPOST).read()
    except urllib2.HTTPError, e:
        print e.code
コード例 #24
0
 def post(self):
     request_type = self.get_body_argument('request_type')
     if request_type == 'check_pose':
         try:
             image_id = self.get_body_argument("image_id")
             face_img_str = self.get_body_argument("face_img_str")
             print "receive image", image_id, time.time()
             face_img = msgpack_numpy.loads(base64.b64decode(face_img_str))
             start = time.time()
             pose_predict = angle_calculate_server.calculate_angle(
                 face_img, image_id)
             end = time.time()
             pose_predict = base64.b64encode(
                 msgpack_numpy.dumps(pose_predict))
             print 'pose predict time :', (end - start)
             self.write(json.dumps({"pose_predict": pose_predict}))
         except:
             traceback.print_exc()
             return
コード例 #25
0
    def recognize_one_feature(self, im_feature, image_id):
        '''
            根据特征确定label
            :param image_id : 大图的文件名+face_id(第几个人脸) --- 方便定位
        '''
        start = time.time()
        feature_str = base64.b64encode(msgpack_numpy.dumps(im_feature))
        # im_feature = msgpack_numpy.loads(base64.b64decode(feature_str))
        current_day = get_current_day()
        log_file = open(os.path.join(self.log_dir, current_day + '.txt'), 'a')
        current_time = get_current_time()
        log_file.write('\t'.join(map(str, [current_time, "receive image", image_id])) + '\n')
        try:
            # 流程 : 找距离最近的图片 ; 计算prob ; 在线聚类 ; 加入LSH Forest
            try:
                current_time = float(image_id)
                nearest_sim_list = self.cal_nearest_sim(current_time=current_time, current_feature=im_feature)
                # print 'current_time :', current_time, 'nearest_sim_list :', nearest_sim_list
            except:
                traceback.print_exc()
                nearest_sim_list = []
            # 找距离最近的图片 --- 用LSH Forest 找出最近的10张图片,然后分别计算距离
            dist_label_list = self.find_k_neighbors_with_lsh(im_feature)
            dist_label_list.extend(nearest_sim_list)
            dist_label_list = self.filter_result(dist_label_list)
            dist_label_list.sort(key=lambda x: x[0], reverse=True)

            # 计算
            if dist_label_list == None:
                # 不考虑new_person的情况,小于阈值的都判断为new_person
                this_id = self.must_be_not_same_id
                this_label = 'new_person'
                # this_id = self.must_be_not_same_id
                # this_label = self.new_person_str + str(self.current_new_person_id)
            else:
                # 计算prob --- 根据距离计算prob
                this_id, this_label = self.evaluate_result(dist_label_list)
            # 在线聚类 --- 根据dist确定是重新增加一个人还是加入到已有的人中
            if dist_label_list != None and len(dist_label_list) > 0:
                current_time = get_current_time()
                log_file.write('\t'.join(map(str, [current_time, 'dist_label_list :', map(str, dist_label_list)])) + '\n')
            # need_add 决定是否加入LSHForest ;  need_save决定是否存入数据库
            if this_id == self.same_pic_id:
                need_add = False
                need_save = True
            elif this_id == self.must_be_same_id:
                need_add = False
                need_save = True
            elif this_id == self.must_be_not_same_id:
                # 现在的版本不用加入新人, 不能识别的全部返回new_person
                this_label = 'new_person'
                need_save = True
                need_add = False
                # this_label = self.new_person_str + str(self.current_new_person_id)
                # self.current_new_person_id += 1
                # need_add = True
                # need_save = True
            elif this_id == self.maybe_same_id:
                need_add = False
                need_save = False
            else:
                current_time = get_current_time()
                log_file.write('\t'.join(map(str, [current_time, 'error para :', this_id])) + '\n')
                return self.unknown, str(self.max_dist_threshold), feature_str, str(False)
            self.nearest.append((this_label, im_feature, image_id))
            # 现在不在增加new_person
            # # 加入LSH Forest --- partial_fit
            # if need_add:
            #     # 只将新人的图片加入LSHForest并保存到文件
            #     self.add_one_pic(im_feature, this_label)
            #     write_start = time.time()
            #     tmp_file = open(self.all_feature_label_file, 'a')
            #     tmp_file.write(base64.b64encode(msgpack_numpy.dumps((im_feature, this_label)))+'\n')
            #     tmp_file.close()
            #     print 'write time :', (time.time() - write_start)
            #     # 根据label和image_id可以存生成文件名,确定是否要存储文件[可以选择在服务器和本地同时存储]
            # 统计有多少图片在gray area
            log_file.write('\t'.join(map(str, ['stat', 'recognize_id', self.trans_dic[this_id], 'recog time :', (time.time() - start)])) + '\n')
            log_file.close()
            if this_id == self.same_pic_id or this_id == self.must_be_not_same_id or this_id == self.must_be_same_id:
                if this_label == None or dist_label_list == None:
                        # 数据库里可能一个人也没有, 这时this_label = None
                        return self.unknown, str(self.max_dist_threshold), feature_str, str(False)
                else:
                    return this_label.replace(self.must_same_str, ''), str(dist_label_list[0][0]), feature_str, str(need_save)
            else:
                # 灰度区域,不显示人名
                # return this_label.replace(self.maybe_same_str, ''), tr(dist_label_list[0][0]), str(has_save_num), str(need_add)
                return self.unknown, str(dist_label_list[0][0]), feature_str, str(need_save)
        except:
            traceback.print_exc()
            log_file.close()
            return self.unknown, str(self.max_dist_threshold), feature_str, str(False)
コード例 #26
0
    def recognize_online_cluster(self, image, image_id):
        '''
            :param image: 将得到的图片进行识别,加入的LSH Forest,根据距离计算proba(不同的距离对应不同的准确率,根据已有的dist计算阈值);
                            和已经设定的阈值判断是不是一个新出现的人,确定是原来已有的人,还是不确定是原来已有的人
            :return:
        '''
        start = time.time()
        need_add = False
        need_save = False
        current_day = get_current_day()
        log_file = open(os.path.join(log_dir, current_day+'.txt'), 'a')
        log_file.write('\t'.join(map(str, ["receive image", image_id, time.time()])) + '\n')
        feature_str = ''
        try:
            image = base64.decodestring(image)
            image = zlib.decompress(image)
            im = cv2.imdecode(np.fromstring(image, dtype=np.uint8), 1)
            log_file.write('\t'.join(map(str, ['shape :', im.shape[0], im.shape[1]])) + '\n')
            # 图片尺寸过滤
            if im.shape[0] < size_threshold or im.shape[1] < size_threshold:
                log_file.write('\t'.join(map(str, ['stat recognize_time :', (time.time() - start), 'small_size'])) + '\n')
                log_file.close()
                return self.unknown, 1.0, feature_str, need_save
            # 清晰度过滤
            blur_sign, blur_var = is_blur(cv2.resize(im, (96, 96)))
            if blur_sign:
                log_file.write('\t'.join(map(str, ['stat recognize_time :', (time.time() - start), 'blur_filter', blur_var])) + '\n')
                log_file.close()
                return self.unknown, 1.0, feature_str, need_save
            #  保存传过来的图片
            # img_file = '/tmp/research_face/%s.jpg' %image_id
            time_slot = get_time_slot(image_id)
            if time_slot == None:
                time_slot = 'error'
            time_slot_dir = os.path.join(tmp_face_dir, time_slot)
            if not os.path.exists(time_slot_dir):
                os.makedirs(time_slot_dir)
            img_file = os.path.join(time_slot_dir, image_id+'.jpg')
            cv2.imwrite(img_file, im)
        except:
            traceback.print_exc()
            log_file.close()
            return self.unknown, 1.0, feature_str, need_save
        try:
            # 流程 : 找距离最近的图片 ; 计算prob ; 在线聚类 ; 加入LSH Forest
            result = self.extract_pic_feature(img_file)
            if result == None:
                log_file.write('\t'.join(map(str, ['stat not_find_face', 'time :', (time.time() - start)]))+'\n')
                log_file.close()
                return self.unknown, 1.0, feature_str, need_save
            face_pic, im_feature = result

            try:
                # nearest_sim_list的格式和dist_label_list的格式一样,这样可以将两个list合并,一起计算(这样不用考虑时间的因素)
                # 在识别出人名后将人名和feature放入到self.nearest
                nearest_sim_list = self.cal_nearest_sim(current_feature=im_feature)
            except:
                traceback.print_exc()
                nearest_sim_list = []
            log_file.write('\t'.join(map(str, ['nearest_sim_list :', map(str, nearest_sim_list)])) + '\n')
            feature_str = base64.b64encode(msgpack_numpy.dumps(im_feature))
            log_file.write('\t'.join(map(str, ['extract_feature_time :', (time.time() - start)]))+'\n')
            # 找距离最近的图片 --- 用LSH Forest 找出最近的10张图片,然后分别计算距离

            tmp_list = self.find_k_neighbors_with_lsh(im_feature)
            nearest_sim_list.sort(key=lambda x: x[0], reverse=True)
            nearest_sim_list.extend(tmp_list)
            dist_label_list = nearest_sim_list[:]

            # 计算
            log_file.write('\t'.join(map(str, ['dist_label_list :', map(str, dist_label_list)])) + '\n')
            if dist_label_list == None:
                this_id = self.must_be_not_same_id
                this_label = self.new_person_str + str(self.current_new_person_id)
            else:
                # 计算prob --- 根据距离计算prob
                this_id, this_label = self.evaluate_result(dist_label_list)
            # 不管概率, 都要将最新的一张图片加入到self.nearest
            self.nearest.append((this_label, im_feature))
            log_file.write('\t'.join(map(str, ['self.nearest :', map(str, self.nearest)])) + '\n')
            # 在线聚类 --- 根据dist确定是重新增加一个人还是加入到已有的人中
            if this_id == self.same_pic_id:
                need_add = False
            elif this_id == self.must_be_same_id:
                need_add = False
                need_save = True
                this_person_pic_folder = os.path.join(self.all_pic_data_folder, this_label+self.must_same_str)
                this_person_feature_folder = os.path.join(self.all_pic_feature_data_folder, this_label+self.must_same_str)
            elif this_id == self.must_be_not_same_id:
                this_label = self.new_person_str + str(self.current_new_person_id)
                self.current_new_person_id += 1
                this_person_pic_folder = os.path.join(self.all_pic_data_folder, this_label)
                this_person_feature_folder = os.path.join(self.all_pic_feature_data_folder, this_label)
                need_add = True
                need_save = True
            elif this_id == self.maybe_same_id:
                this_person_pic_folder = os.path.join(self.all_pic_data_folder, this_label + self.maybe_same_str)
                this_person_feature_folder = os.path.join(self.all_pic_feature_data_folder, this_label + self.maybe_same_str)
                need_add = False # prob在灰度区域的不如入,其余情况加入
                need_save = True
            else:
                log_file.write('\t'.join(map(str, ['error para :', this_id]))+'\n')
            if need_save:
                try:
                    if not os.path.exists(this_person_pic_folder):
                        os.makedirs(this_person_pic_folder)
                    if not os.path.exists(this_person_feature_folder):
                        os.makedirs(this_person_feature_folder)
                    # 直接存储图片对应的特征, 同时保存图片文件
                    this_pic_feature_name = os.path.join(this_person_feature_folder, image_id+'.p')
                    msgpack_numpy.dump(im_feature, open(this_pic_feature_name, 'wb'))
                    this_pic_face_name = os.path.join(this_person_pic_folder, image_id+'.jpg')
                    cv2.imwrite(this_pic_face_name, face_pic)
                except:
                    traceback.print_exc()
                    return self.unknown, 1.0, feature_str, False
            # 加入LSH Forest --- partial_fit
            if need_add:
                self.add_one_pic(im_feature, this_label)
                # 根据label和image_id可以存生成文件名,确定是否要存储文件[可以选择在服务器和本地同时存储]
            if this_id == self.same_pic_id or this_id == self.must_be_not_same_id or this_id == self.must_be_same_id:
                end = time.time()
                log_file.write('\t'.join(map(str, ['stat recognize_time :',(end - start), 'this_id :', self.trans_dic.get(this_id)]))+'\n')
                log_file.close()
                need_save = True
                return this_label.replace(self.must_same_str, ''), str(dist_label_list[0][0]), str(feature_str), str(need_save)
            else:
                # 灰度区域,不显示人名
                end = time.time()
                log_file.write('\t'.join(map(str, ['stat gray_area :',(end - start)]))+'\n')
                log_file.close()
                return self.unknown, str(dist_label_list[0][0]), str(feature_str), str(False)
        except:
            traceback.print_exc()
            log_file.close()
            return self.unknown, str(100.0), str(feature_str), str(False)
コード例 #27
0
    def recognize_online_cluster(self, image, image_id):
        '''
            :param image: 将得到的图片进行识别,加入的LSH Forest,根据距离计算proba(不同的距离对应不同的准确率,根据已有的dist计算阈值);
                            和已经设定的阈值判断是不是一个新出现的人,确定是原来已有的人,还是不确定是原来已有的人
            :return:
        '''
        start = time.time()
        need_add = False
        need_save = False
        current_day = get_current_day()
        log_file = open(os.path.join(log_dir, current_day + '.txt'), 'a')
        log_file.write(
            '\t'.join(map(str, ["receive image", image_id,
                                time.time()])) + '\n')
        feature_str = ''
        try:
            image = base64.decodestring(image)
            image = zlib.decompress(image)
            im = cv2.imdecode(np.fromstring(image, dtype=np.uint8), 1)
            log_file.write(
                '\t'.join(map(str, ['shape :', im.shape[0], im.shape[1]])) +
                '\n')
            # 图片尺寸过滤
            if im.shape[0] < size_threshold or im.shape[1] < size_threshold:
                log_file.write('\t'.join(
                    map(str, [
                        'stat recognize_time :', (time.time() -
                                                  start), 'small_size'
                    ])) + '\n')
                log_file.close()
                return self.unknown, 1.0, feature_str, need_save
            # 清晰度过滤
            blur_sign, blur_var = is_blur(cv2.resize(im, (96, 96)))
            if blur_sign:
                log_file.write('\t'.join(
                    map(str, [
                        'stat recognize_time :',
                        (time.time() - start), 'blur_filter', blur_var
                    ])) + '\n')
                log_file.close()
                return self.unknown, 1.0, feature_str, need_save
            #  保存传过来的图片
            # img_file = '/tmp/research_face/%s.jpg' %image_id
            time_slot = get_time_slot(image_id)
            if time_slot == None:
                time_slot = 'error'
            time_slot_dir = os.path.join(tmp_face_dir, time_slot)
            if not os.path.exists(time_slot_dir):
                os.makedirs(time_slot_dir)
            img_file = os.path.join(time_slot_dir, image_id + '.jpg')
            cv2.imwrite(img_file, im)
        except:
            traceback.print_exc()
            log_file.close()
            return self.unknown, 1.0, feature_str, need_save
        try:
            # 流程 : 找距离最近的图片 ; 计算prob ; 在线聚类 ; 加入LSH Forest
            result = self.extract_pic_feature(img_file)
            if result == None:
                log_file.write('\t'.join(
                    map(str, [
                        'stat not_find_face', 'time :', (time.time() - start)
                    ])) + '\n')
                log_file.close()
                return self.unknown, 1.0, feature_str, need_save
            face_pic, im_feature = result

            try:
                # nearest_sim_list的格式和dist_label_list的格式一样,这样可以将两个list合并,一起计算(这样不用考虑时间的因素)
                # 在识别出人名后将人名和feature放入到self.nearest
                nearest_sim_list = self.cal_nearest_sim(
                    current_feature=im_feature)
            except:
                traceback.print_exc()
                nearest_sim_list = []
            log_file.write('\t'.join(
                map(str, ['nearest_sim_list :',
                          map(str, nearest_sim_list)])) + '\n')
            feature_str = base64.b64encode(msgpack_numpy.dumps(im_feature))
            log_file.write('\t'.join(
                map(str, ['extract_feature_time :', (time.time() - start)])) +
                           '\n')
            # 找距离最近的图片 --- 用LSH Forest 找出最近的10张图片,然后分别计算距离

            tmp_list = self.find_k_neighbors_with_lsh(im_feature)
            nearest_sim_list.sort(key=lambda x: x[0], reverse=True)
            nearest_sim_list.extend(tmp_list)
            dist_label_list = nearest_sim_list[:]

            # 计算
            log_file.write('\t'.join(
                map(str, ['dist_label_list :',
                          map(str, dist_label_list)])) + '\n')
            if dist_label_list == None:
                this_id = self.must_be_not_same_id
                this_label = self.new_person_str + str(
                    self.current_new_person_id)
            else:
                # 计算prob --- 根据距离计算prob
                this_id, this_label = self.evaluate_result(dist_label_list)
            # 不管概率, 都要将最新的一张图片加入到self.nearest
            self.nearest.append((this_label, im_feature))
            log_file.write(
                '\t'.join(map(str, ['self.nearest :',
                                    map(str, self.nearest)])) + '\n')
            # 在线聚类 --- 根据dist确定是重新增加一个人还是加入到已有的人中
            if this_id == self.same_pic_id:
                need_add = False
            elif this_id == self.must_be_same_id:
                need_add = False
                need_save = True
                this_person_pic_folder = os.path.join(
                    self.all_pic_data_folder, this_label + self.must_same_str)
                this_person_feature_folder = os.path.join(
                    self.all_pic_feature_data_folder,
                    this_label + self.must_same_str)
            elif this_id == self.must_be_not_same_id:
                this_label = self.new_person_str + str(
                    self.current_new_person_id)
                self.current_new_person_id += 1
                this_person_pic_folder = os.path.join(self.all_pic_data_folder,
                                                      this_label)
                this_person_feature_folder = os.path.join(
                    self.all_pic_feature_data_folder, this_label)
                need_add = True
                need_save = True
            elif this_id == self.maybe_same_id:
                this_person_pic_folder = os.path.join(
                    self.all_pic_data_folder, this_label + self.maybe_same_str)
                this_person_feature_folder = os.path.join(
                    self.all_pic_feature_data_folder,
                    this_label + self.maybe_same_str)
                need_add = False  # prob在灰度区域的不如入,其余情况加入
                need_save = True
            else:
                log_file.write('\t'.join(map(str, ['error para :', this_id])) +
                               '\n')
            if need_save:
                try:
                    if not os.path.exists(this_person_pic_folder):
                        os.makedirs(this_person_pic_folder)
                    if not os.path.exists(this_person_feature_folder):
                        os.makedirs(this_person_feature_folder)
                    # 直接存储图片对应的特征, 同时保存图片文件
                    this_pic_feature_name = os.path.join(
                        this_person_feature_folder, image_id + '.p')
                    msgpack_numpy.dump(im_feature,
                                       open(this_pic_feature_name, 'wb'))
                    this_pic_face_name = os.path.join(this_person_pic_folder,
                                                      image_id + '.jpg')
                    cv2.imwrite(this_pic_face_name, face_pic)
                except:
                    traceback.print_exc()
                    return self.unknown, 1.0, feature_str, False
            # 加入LSH Forest --- partial_fit
            if need_add:
                self.add_one_pic(im_feature, this_label)
                # 根据label和image_id可以存生成文件名,确定是否要存储文件[可以选择在服务器和本地同时存储]
            if this_id == self.same_pic_id or this_id == self.must_be_not_same_id or this_id == self.must_be_same_id:
                end = time.time()
                log_file.write('\t'.join(
                    map(str, [
                        'stat recognize_time :', (end - start), 'this_id :',
                        self.trans_dic.get(this_id)
                    ])) + '\n')
                log_file.close()
                need_save = True
                return this_label.replace(self.must_same_str, ''), str(
                    dist_label_list[0][0]), str(feature_str), str(need_save)
            else:
                # 灰度区域,不显示人名
                end = time.time()
                log_file.write(
                    '\t'.join(map(str, ['stat gray_area :', (end - start)])) +
                    '\n')
                log_file.close()
                return self.unknown, str(
                    dist_label_list[0][0]), str(feature_str), str(False)
        except:
            traceback.print_exc()
            log_file.close()
            return self.unknown, str(100.0), str(feature_str), str(False)