def sub_process_func(task_id, all_pic_file_list): # 子进程处理函数 global sub_task_id global run_flag run_flag = True sub_task_id = task_id signal.signal(signal.SIGTERM, on_sub_process_exit) # 每个子进程定制日志目录 for h in log_config['handlers']: if 'filename' in log_config['handlers'][h]: p1, p2 = os.path.split(log_config['handlers'][h]['filename']) log_config['handlers'][h]['filename'] = os.path.join( p1, 'task_{0}'.format(sub_task_id), p2, ) create_all_log_dirs(log_config) logging.config.dictConfig(log_config) create_all_log_dirs(log_config) pid = os.getpid() log.info('[{}][{}] sub_process_func start>>>>>>>>>>>>>>>>>>>>>>>>>'.format( sub_task_id, pid)) db_engine = create_engine(config['db']['uri'].format( password=config['db'].get('password') or get_db_password(config['db']['cyberark'])), pool_size=2, echo_pool=True, pool_recycle=3600) for org_ph_name in all_pic_file_list[sub_task_id::config['worker_count']]: if not run_flag: break dest_image_list = [] # 被移动的文件 new_image_list = [] # 新生成的文件 pic_list = [] # 需要入库的文件 try: wintone_resp_card_info = call_wintone(org_ph_name, wintone_card_type) # 身份证(2)的返回结果 # { # 'card_type': '2', # '保留': None, # '姓名': '史洁', # '性别': '女', # '民族': '汉', # '出生': '1969-07-16', # '住址': '山东省郯城县庙山镇仇村二组97号', # '公民身份号码': '372822196907167920', # '头像': 'xxxx' # } card_code = wintone_resp_card_info.get(u'公民身份号码') log.info('file_path={} card_code={}'.format( org_ph_name, card_code)) check_result, check_msg = check_card_code(card_code) if not check_result: log.info( 'file_path={} check_card_code fail'.format(org_ph_name)) continue # 将图片移动到目的文件夹 photo_id = 'P_{}'.format(uuid.uuid1()) face_photo_id = 'P_{}'.format(uuid.uuid1()) photo_id_list = [photo_id, face_photo_id] pic_path = org_ph_name.replace(config['pic_inpath'], config['pic_outpath']) pic_path = os.path.join( os.path.split(pic_path)[0], photo_id + os.path.splitext(pic_path)[-1]) face_pic_path = os.path.join( os.path.split(pic_path)[0], face_photo_id + os.path.splitext(pic_path)[-1]) try: os.makedirs(os.path.split(pic_path)[0]) except: pass os.rename(org_ph_name, pic_path) # shutil.copyfile(org_ph_name, pic_path) dest_image_list.append((pic_path, org_ph_name, photo_id)) pic_list.append( (pic_path, photo_id, '2', org_ph_name)) # ocr原图data_source为2 log.info('move file {} >> {}'.format(org_ph_name, pic_path)) # 保存头像文件 if wintone_resp_card_info.get(u'头像'): with open(face_pic_path, 'wb+') as f: f.write( base64.b64decode( wintone_resp_card_info[u'头像'].encode())) new_image_list.append( (face_pic_path, face_pic_path, face_photo_id)) pic_list.append((face_pic_path, face_photo_id, '3', photo_id)) # ocr头像的data_source为3 log.info('save face file {}'.format(face_pic_path)) # 更新数据库 with db_engine.begin() as db_conn: card_prov = int(card_code[:2]) stat_month = datetime.today().date().replace(day=1) if sys.version_info >= (3, 0): card_code_base64 = base64.b64encode( card_code.encode('utf8')).decode('utf8') else: card_code_base64 = base64.b64encode(card_code) # 判断人员基本信息表是否存在该数据 query_result = db_conn.execute( r''' SELECT card_id, person_nm, addr FROM facedata.fac_person_base_info WHERE card_code = :1 and card_prov = :2 ''', (card_code_base64, card_prov)).fetchall() # 更新人员基本信息表 if query_result: (card_id, person_nm, addr) = query_result[0] person_nm = wintone_resp_card_info.get(u'姓名') or person_nm addr = wintone_resp_card_info.get(u'住址') or addr log.info( 'file_path={} use old card_id={} card_code={}'.format( file_path, card_id, card_code)) db_conn.execute( r''' UPDATE facedata.fac_person_base_info SET person_nm=:1, addr=:2 WHERE card_code = :3 and card_prov = :4 ''', (person_nm, addr, card_code_base64, card_prov)) else: card_id = 'H_' + str(uuid.uuid1()) log.info( 'file_path={} use new card_id={} card_code={}'.format( file_path, card_id, card_code)) card_code = card_code card_type = '1' # 身份证默认是1 person_nm = wintone_resp_card_info.get(u'姓名') nationality = None sex = { '女': '0', '男': '1' }.get(wintone_resp_card_info.get(u'性别')) nation = wintone_resp_card_info.get(u'民族') try: birthday = wintone_resp_card_info.get(u'出生') birthday = birthday and datetime.strptime( birthday, '%Y-%m-%d') except: birthday = None addr = wintone_resp_card_info.get(u'住址') card_issued_by = None db_conn.execute( r''' insert into facedata.fac_person_base_info( card_id, card_code, card_type, card_prov, person_nm, nationality, sex, nation, birthday, addr, card_issued_by ) VALUES(:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11) ''', (card_id, card_code_base64, card_type, card_prov, person_nm, nationality, sex, nation, birthday, addr, card_issued_by)) # 图片信息入库 for (pic_path, photo_id, data_source, org_ph_name2) in pic_list: ph_path = os.path.split(pic_path)[0] ph_name = os.path.basename(pic_path) space_size = os.path.getsize(pic_path) / 1024 # 单位kb with Image.open(pic_path) as img: pixel = '{}*{}'.format(img.size[0], img.size[1]) dpi = img.info.get('dpi') and '{}*{}'.format( *img.info['dpi']) is_colour = picture_util.picture_is_colour(img) # 图片基本信息表 db_conn.execute( r''' insert into facedata.fac_photo_base_info( stat_month, photo_id, ph_path, ph_name, space_size, pixel, dpi, is_colour, data_source, org_ph_name, status ) VALUES(:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11) ''', ( stat_month, photo_id, ph_path, ph_name, space_size, pixel, dpi, is_colour, data_source, org_ph_name2, '1' # 人脸识别标识 1已识别 0未识别 )) # 关联表 db_conn.execute( r''' insert into facedata.fac_person_photo( stat_month, card_id, photo_id, ph_path, ph_name, person_nm ) VALUES(:1, :2,:3,:4, :5, :6) ''', (stat_month, card_id, photo_id, ph_path, ph_name, person_nm)) log.info( 'org_ph_name={} card_code={} card_id={} photo_id_list={} process file finish' .format(org_ph_name, card_code, card_id, photo_id_list)) except: log.exception('process file={} exception!!!'.format(org_ph_name)) for (pic_path, org_ph_name, photo_id) in dest_image_list: try: os.rename(pic_path, org_ph_name) log.info('[{}] rollback move file {} >>>> {}'.format( task_id, org_ph_name, pic_path)) except: pass for (face_pic_path, face_pic_path, face_photo_id) in new_image_list: try: os.remove(face_pic_path) log.info('[{}] rollback delete new file {}'.format( task_id, face_pic_path)) except: pass log.info( '[{}][{}] sub_process_func finish>>>>>>>>>>>>>>>>>>>>>>>>>'.format( task_id, pid))
def sub_process_func(task_id, all_pic_file_list): #子进程处理函数 # 每个子进程定制日志目录 for h in log_config['handlers']: if 'filename' in log_config['handlers'][h]: p1, p2 = os.path.split(log_config['handlers'][h]['filename']) log_config['handlers'][h]['filename'] = os.path.join( p1, 'task_{0}'.format(task_id), p2, ) create_all_log_dirs(log_config) logging.config.dictConfig(log_config) create_all_log_dirs(log_config) pid = os.getpid() log.info('[{}][{}] sub_process_func start>>>>>>>>>>>>>>>>>>>>>>>>>'.format( task_id, pid)) # 数据库连接池 db_engine = create_engine(config['db']['uri'].format( password=config['db'].get('password') or get_db_password(config['db']['cyberark'])), pool_size=2, echo_pool=True, pool_recycle=3600) for org_ph_name in all_pic_file_list[task_id::config['worker_count']]: dest_image_list = [] try: t = os.path.basename(org_ph_name).split('_') person_nm = t[0] um_code = t[1] card_code = um_card_code_data.get(um_code) if not card_code: log.warning('image_path={} um_code={} no card_code'.format( org_ph_name, um_code)) continue # 将图片移动到目的文件夹 if not os.path.exists(org_ph_name): log.warning( 'image_path={} um_code={} card_code={} file not exists!!!'. format(org_ph_name, um_code, card_code)) continue photo_id = 'P_{}'.format(uuid.uuid1()) pic_path = org_ph_name.replace(config['pic_inpath'], config['pic_outpath']) pic_path = os.path.join( os.path.split(pic_path)[0], photo_id + os.path.splitext(pic_path)[-1]) try: os.makedirs(os.path.split(pic_path)[0]) except: pass os.rename(org_ph_name, pic_path) dest_image_list.append((pic_path, org_ph_name)) log.info('move file {} >>>> {}'.format(org_ph_name, pic_path)) db_conn = db_engine.connect() with db_conn.begin() as db_trans: card_prov = int(card_code[:2]) stat_month = datetime.today().date().replace(day=1) if sys.version_info >= (3, 0): card_code = base64.b64encode( card_code.encode('utf8')).decode('utf8') else: card_code = base64.b64encode(card_code) query_result = db_conn.execute( r''' SELECT card_id FROM facedata.fac_person_base_info WHERE card_code = :1 and card_prov = :2 ''', (card_code, card_prov)).fetchall() # 人员基本信息表 if query_result: card_id = query_result[0][0] else: card_id = 'H_' + str(uuid.uuid1()) card_type = '1' # 身份证默认是1 db_conn.execute( r''' insert into facedata.fac_person_base_info( card_id, card_code, card_prov, card_type, person_nm ) VALUES(:1, :2,:3, :4, :5) ''', (card_id, card_code, card_prov, card_type, person_nm)) ph_path = os.path.split(pic_path)[0] ph_name = os.path.basename(pic_path) space_size = os.path.getsize(pic_path) / 1024 # 单位kb with Image.open(pic_path) as img: pixel = '{}*{}'.format(img.size[0], img.size[1]) dpi = img.info.get('dpi') and '{}*{}'.format( *img.info['dpi']) is_colour = picture_util.picture_is_colour(img) data_source = '6' # # 八卦岭考勤是6 # 图片基本信息表 db_conn.execute( r''' insert into facedata.fac_photo_base_info( stat_month, photo_id, ph_path, ph_name, space_size, pixel, dpi, is_colour, data_source, org_ph_name, status ) VALUES(:1, :2,:3,:4,:5,:6,:7,:8,:9,:10, :11) ''', ( stat_month, photo_id, ph_path, ph_name, space_size, pixel, dpi, is_colour, data_source, org_ph_name, '1' #人脸识别标识 1已识别 0未识别 )) # 关联表 db_conn.execute( r''' insert into facedata.fac_person_photo( stat_month, card_id, photo_id, ph_path, ph_name, person_nm ) VALUES(:1, :2,:3,:4, :5, :6) ''', (stat_month, card_id, photo_id, ph_path, ph_name, person_nm)) log.info( 'image_path={} um_code={} card_code={} card_id={} photo_id={} file finish' .format(org_ph_name, um_code, card_code, card_id, photo_id)) except: log.exception('process file={} exception!!!'.format(org_ph_name)) for (pic_path, org_ph_name) in dest_image_list: try: os.rename(pic_path, org_ph_name) log.info('[{}] rollback move file {} >>>> {}'.format( task_id, org_ph_name, pic_path)) except: pass log.info( '[{}][{}] sub_process_func finish>>>>>>>>>>>>>>>>>>>>>>>>>'.format( task_id, pid))
if sys.version_info >= (3, 0): config = yaml.load( open('./config_files/ocr_pic_wintone.yaml', 'r', encoding='utf8')) else: config = yaml.load(open('./config_files/ocr_pic_wintone.yaml', 'r')) config['pic_inpath_list'] = [ os.path.abspath(x) for x in config['pic_inpath_list'] ] config['pic_inpath'] = os.path.join(os.path.abspath(config['pic_inpath'])) config['pic_outpath'] = os.path.join(os.path.abspath(config['pic_outpath']), wintone_card_type) log_config = yaml.load(open('logging.yaml', 'r')) set_log_savepath(log_config, config['log_savepath']) create_all_log_dirs(log_config) logging.config.dictConfig(log_config) log = logging.getLogger('log') wintone_card_type_dict = { '1': '一代身份证', '2': '二代身份证正面', '3': '二代身份证证背面', '4': '临时身份证', '5': '驾照', '6': '行驶证', '7': '军官证', '8': '士兵证(暂不支持)', '9': '中华人民共和国往来港澳通行证', '10': '台湾居民往来大陆通行证', '11': '大陆居民往来台湾通行证',
def sub_process_func(task_id, all_log_file_path_list): #子进程处理函数 global sub_task_id global run_flag run_flag = True sub_task_id = task_id signal.signal(signal.SIGTERM, on_sub_process_exit) # 每个子进程定制日志目录 for h in log_config['handlers']: if 'filename' in log_config['handlers'][h]: p1, p2 = os.path.split(log_config['handlers'][h]['filename']) log_config['handlers'][h]['filename'] = os.path.join( p1, 'task_{0}'.format(sub_task_id), p2, ) create_all_log_dirs(log_config) logging.config.dictConfig(log_config) create_all_log_dirs(log_config) pid = os.getpid() log.info('[{}][{}] sub_process_func start>>>>>>>>>>>>>>>>>>>>>>>>>'.format( sub_task_id, pid)) db_engine = create_engine(config['db']['uri'].format( password=config['db'].get('password') or get_db_password(config['db']['cyberark'])), pool_size=2, echo_pool=True, pool_recycle=3600) for log_file_path in all_log_file_path_list[ sub_task_id::config['worker_count']]: if not run_flag: log.info('[{}][{}] run_flag=False break'.format(sub_task_id, pid)) break log.info('[{}][{}] process file {} start@@@@@@@@@@@'.format( sub_task_id, pid, log_file_path)) # time.sleep(5) # continue try: file_success_flag = True success_count = 0 with open(log_file_path) as f_log: # stat_month 从日志名中提取或者使用当月时间 try: # oa.log.2016-11-18 stat_month = datetime.strptime( os.path.basename(log_file_path).split('.')[-1], '%Y-%m-%d').date().replace(day=1) except: stat_month = datetime.today().date().replace(day=1) parent_path = os.path.abspath( os.path.join(os.path.split(log_file_path)[0], '../')) while True: if not run_flag: log.info('[{}][{}] run_flag=False break'.format( sub_task_id, pid)) file_success_flag = False break l = f_log.readline() if not l: break if 'bound_id' not in l: continue json_str = None card_code = None dest_image_list = [] try: json_str = l.split('[INFO]') if len(json_str) != 2: # 只处理包含INFO的日志放弃ERROR日志 continue json_str = json_str[-1] json_data = json.loads(json_str) link_id = 'L_' + str(uuid.uuid1()) card_code = json_data['user_id'] bound_id = json_data['bound_id'] if bound_id.startswith('6222'): #排除测试数据 log.info('[{}][{}] skip bound_id {}'.format( sub_task_id, pid, bound_id)) continue app_id = json_data.get('ext_info', {}).get('request_obj', {}).get('app_id') person_id = json_data.get('ext_info', {}).get( 'request_obj', {}).get('person_id') or json_data.get( 'ext_info', {}).get('request_obj', {}).get('userId') time_end = json_data['time_end'] time_start = json_data['time_start'] # 检测号码的合法性 card_code_result, card_code_msg = check_card_code( card_code) log.info( '[{}][{}] card_code={} card_code_result={} start>>>>' .format(sub_task_id, pid, card_code, card_code_result)) # 获取待处理图片的地址 try: origin_image_list = json_data[ 'local_features'].values() except: origin_image_list = json_data[ 'local_images'].values() # /wls/applogs/OAServer/20170614/person1043982867_065432_923583_6a165f02-6bf5-4658-a970-0ff2eb11d5f4.jpg origin_image_list = [ os.path.join(parent_path, 'image', p.strip().split('/')[-1]) for p in origin_image_list ] if not origin_image_list: continue # 将图片移动到目的文件夹 for org_ph_name in origin_image_list: # 判断文件是否存在 if not os.path.exists(org_ph_name): log.warning( '[{}][{}] card_code={} file={} not exists!!!!!' .format(sub_task_id, pid, card_code, org_ph_name)) continue # 判断是否是图片文件 try: with Image.open(org_ph_name) as f_img: f_img.verify() except: log.info( '[{}][{}] log_file={} image_file={} file broken!!! \n{}' .format(sub_task_id, pid, log_file_path, org_ph_name, json_str)) continue photo_id = 'P_{}'.format(uuid.uuid1()) pic_path = org_ph_name.replace( config['pic_inpath'], config['pic_outpath']) pic_path = os.path.join( os.path.split(pic_path)[0], photo_id + os.path.splitext(pic_path)[-1]) try: os.makedirs(os.path.split(pic_path)[0]) except: pass os.rename(org_ph_name, pic_path) dest_image_list.append( (pic_path, org_ph_name, photo_id)) log.info('[{}] move file {} {} >>>> {}'.format( sub_task_id, photo_id, org_ph_name, pic_path)) if not dest_image_list: continue # 对每一张图片进行入库操作 card_id = None if sys.version_info >= (3, 0): card_code_base64 = base64.b64encode( card_code.encode('utf8')).decode('utf8') else: card_code_base64 = base64.b64encode(card_code) db_conn = db_engine.connect() with db_conn.begin() as db_trans: if card_code_result: card_prov = int(card_code[:2]) query_result = db_conn.execute( r''' SELECT card_id FROM facedata.fac_person_base_info WHERE card_code = :1 and card_prov = :2 ''', (card_code_base64, card_prov)).fetchall() if query_result: card_id = query_result[0][0] else: card_id = 'H_' + str(uuid.uuid1()) card_type = '1' #身份证默认是1 # 插入FAC_PERSON_BASE_INFO db_conn.execute( r''' insert into facedata.fac_person_base_info( card_id, card_code, card_prov, card_type ) VALUES(:1, :2,:3, :4) ''', (card_id, card_code_base64, card_prov, card_type)) for (pic_path, org_ph_name, photo_id) in dest_image_list: ph_path = os.path.split(pic_path)[0] ph_name = os.path.basename(pic_path) space_size = os.path.getsize( pic_path) / 1024 # 单位kb with Image.open(pic_path) as img: pixel = '{}*{}'.format( img.size[0], img.size[1]) dpi = img.info.get( 'dpi') and '{}*{}'.format( *img.info['dpi']) is_colour = picture_util.picture_is_colour( img) data_source = '5' # H模型是5 if card_id: # 图片基本信息表 db_conn.execute( r''' insert into facedata.fac_photo_base_info( stat_month, photo_id, ph_path, ph_name, space_size, pixel, dpi, is_colour, data_source, org_ph_name ) VALUES(:1, :2,:3,:4,:5,:6,:7,:8, :9, :10) ''', (stat_month, photo_id, ph_path, ph_name, space_size, pixel, dpi, is_colour, data_source, org_ph_name)) # 关联表 db_conn.execute( r''' insert into facedata.fac_person_photo( stat_month, card_id, photo_id, ph_path, ph_name ) VALUES(:1, :2,:3,:4, :5) ''', (stat_month, card_id, photo_id, ph_path, ph_name)) # E模型数据表 db_conn.execute( r''' insert into facedata.fac_photo_base_info_model( stat_month, photo_id, ph_path, ph_name, space_size, pixel, dpi, is_colour, data_source, bound_id, app_id, person_id, link_id, time_start, time_end, user_id, status ) VALUES(:1,:2,:3,:4,:5,:6,:7,:8,:9,:10,:11,:12,:13,:14,:15,:16,:17) ''', ( stat_month, photo_id, ph_path, ph_name, space_size, pixel, dpi, is_colour, data_source, bound_id, app_id, person_id, link_id, time_start, time_end, card_code_base64, ['0', '1' ][card_id != None] # 有身份证的存1 )) success_count += 1 log.info('[{}][{}] card_code={} finish<<<<'.format( sub_task_id, pid, card_code)) except: file_success_flag = False log.exception( '[{}][{}] card_code={} process file {} json data exception!!!\n{}' .format(sub_task_id, pid, card_code, log_file_path, json_str)) for (pic_path, org_ph_name, photo_id) in dest_image_list: try: os.rename(pic_path, org_ph_name) log.info('[{}] rollback move file {} >>>> {}'. format(sub_task_id, org_ph_name, pic_path)) except: pass log.info( '[{}][{}] process file {} file_success_flag={} success_count={} finish@@@@@@@@@@' .format(sub_task_id, pid, log_file_path, file_success_flag, success_count)) # 移动日志文件 if file_success_flag: new_log_file_path = log_file_path.replace( config['pic_inpath'], config['pic_outpath']) try: os.makedirs(os.path.split(new_log_file_path)[0]) except: pass os.rename(log_file_path, new_log_file_path) log.info('[{}] move log file {} >>>> {}'.format( sub_task_id, log_file_path, new_log_file_path)) except: log.exception('[{}][{}] process file {} exception!!!'.format( sub_task_id, pid, log_file_path)) log.info( '[{}][{}] sub_process_func finish>>>>>>>>>>>>>>>>>>>>>>>>>'.format( sub_task_id, pid))