def stoploop(): now = time.time() if now < deadline and (io_loop._callbacks or io_loop._timeouts): io_loop.add_timeout(now + 1, stoploop) else: io_loop.stop() logger.info('Http Server shutdown')
def DoResumePayDown(companyid, siteid, syncid, fromsitecode, ownpay, taskid): try: if ownpay == 'T': handler = SiteConfig.GetTaskHandler(companyid, siteid, taskid) else: # 获取公用账号企业ID common_companyid = 0 handler = SiteConfig.GetTaskHandler(common_companyid, siteid, taskid) # 账号未验证通过 if handler.bind.check_status == 10: raise CompanyAccountInvalidError(companyid, siteid, u'账号登录失败') try: handler.resume_searcher_down(fromsitecode, syncid, companyid) # 任务状态回写 task = ResSyncDistribute.queryWithId(syncid) if HANDLE_STATUS.SUCCESS == handler.status: task.sync_status = 2010 logger.info(u'<{}>下载付费简历<{}>成功!'.format( handler.name, fromsitecode, handler.message)) else: task.sync_status = 10 task.error_message = handler.message logger.error(u'<{}>下载付费简历<{}>失败,原因:{}'.format( handler.name, fromsitecode, handler.message)) task.save() except BaseException as e: raise UnHandleRuntimeError(e) except BaseError as e: task = DBTask.queryWithId(syncid) task.sync_status = 10 task.fail_num += 1 task.log_info = e.message task.save()
def CheckResumeSearchDownStat(taskid): key = Conf.RESUME_IMPORT_HKEY % taskid stat = ResumeRedisCli.hgetall(key) if stat: # finish finish = int(stat.get('finish', '0')) if finish: # 统计信息 total = int(stat['total']) grab = int(stat['grab']) succ = int(stat['success']) ignore = int(stat['ignore']) fail = int(stat['failure']) # 来源信息 siteid = int(stat['siteid']) importid = stat['importid'] companyid = int(stat['companyid']) imphistoryid = int(stat['imphistoryid']) # task = ResSyncDistribute.queryWithId(importid) if task: if total > 0 and total <= succ + ignore + fail: if total <= succ + ignore + fail: if ResumeRedisCli.delete(key): imp = ImpHistory.queryByHistoryId(imphistoryid) if imp: imp.succ_num = succ imp.fail_num = fail imp.end_time = datetime.today() imp.proc_status = 1 if imp.succ_num == 0: imp.is_valid = 'F' imp.save() # 更改简历状态 ResumeBase.changeStat(task.resume_code) # 回写任务状态 msg = u'<{}>下载付费简历总数<{}>,下载数<{}>,成功数<{}>,重复数<{}>,失败数<{}>.'.format( SiteConfig.getSiteNameById(siteid), total, grab, succ, ignore, fail) task.sync_status = 20 task.error_message = '' task.process_time = datetime.today() task.save() logger.info(msg) dtc. async ('zpb.service.handleservice.DoInfo', *(companyid, siteid, taskid)) elif total == 0: if ResumeRedisCli.delete(key): ImpHistory.removeByHistoryId(imphistoryid) msg = u'<{}>下载付费简历总数:0'.format( SiteConfig.getSiteNameById(siteid)) task.sync_status = 20 task.error_message = '' task.process_time = datetime.today() task.save() logger.info(msg)
def stop(self): if not self.sentinel.is_alive(): return False logger.info('Application-{} stopping.'.format(self.pid)) self.stop_event.set() self.sentinel.join() logger.info('Application-{} has stopped.'.format(self.pid)) self.start_event = None self.stop_event = None return True
def stop(self): name = current_process().name logger.info('{} stopping application processes'.format(name)) # Stopping pusher self.event_out.set() # Wait for it to stop while self.cluster.is_alive(): sleep(0.1) while self.webapper.is_alive(): sleep(0.1)
def wait(self): while 1: try: self.poll(True) except NoResponsePending: logger.info(u'Thread Pool stoped by NoResponsePending') break except NoWorkerAvailable: logger.info(u'Thread Pool stoped by NoWorkerAvailable') break
def start(self): # start Sentinel self.stop_event = Event() self.start_event = Event() self.sentinel = Process(target=Sentinel, args=(self.stop_event, self.start_event)) self.sentinel.start() logger.info('Application-{} starting.'.format(self.pid)) while not self.start_event.is_set(): sleep(0.1) return self.pid
def DoResumeSearcher(companyid, siteid, taskid, searcherid, **kwargs): try: handler = SiteConfig.GetTaskHandler(companyid, siteid, taskid) ResumeSearcher.updateImportTimeBySearcherId(searcherid) if handler.bind.check_status in [0, 50]: handler.resume_search(searcherid) else: handler.message = u'绑定账号登录失败' logger.info(handler.message) except BaseError: pass
def GetBaikLink(linkid, linkname): logger.info(u'正在处理关键词<{}>'.format(linkname)) linkval = _getHyperLink(linkname) if linkval: try: if linkval.find('none') == -1: HyperLink.appendLink(linkid, linkval, 'T') else: HyperLink.appendLink(linkid, linkval, 'F') except: pass
def Do20(companyid, siteid, syncid, taskid, **kwargs): task = DBTask.queryWithId(syncid) try: handler = SiteConfig.GetTaskHandler(companyid, siteid, taskid) # 账号已解除绑定 if handler.bind.is_valid != 'T': DBTask.newSchedule(companyid, siteid, False) raise CompanyAccountUnBindError(companyid, siteid, u'账号已解除绑定') # 账号未验证通过 if handler.bind.check_status == 10: raise CompanyAccountInvalidError(companyid, siteid, u'账号登录失败') try: handler.position_import() if HANDLE_STATUS.SUCCESS == handler.status: task.sync_status = 2000 task.succ_num += 1 task.log_info = handler.message # 账号绑定 handler.bind.check_status = 50 handler.bind.login_result = '登录成功' handler.bind.last_succ_time = datetime.today() handler.bind.save() # 下发其它异步任务 infouid = md5(taskid + 'info') dtc. async ('zpb.service.handleservice.DoInfo', *(companyid, siteid, infouid), uid=infouid) resuid = md5(taskid + '201') dtc. async ('zpb.service.handleservice.Do201', *(companyid, siteid, syncid, resuid), uid=resuid) logger.info(handler.message) elif HANDLE_STATUS.AGAIN == handler.status: task.sync_status = 11 task.fail_num += 1 task.log_info = handler.message logger.error(handler.message) else: task.sync_status = 10 task.fail_num += 1 task.log_info = handler.message logger.error(handler.message) except BaseException as e: raise UnHandleRuntimeError(e) except BaseError as e: task.sync_status = 10 task.fail_num += 1 task.log_info = e.message task.save()
def guard(self): logger.info('{} guarding Application at {}'.format( current_process().name, self.pid)) self.start_event.set() logger.info('Application-{} running.'.format(self.parent_pid)) cycle = 0.5 # guard loop sleep in seconds # guard loop. Runs at least once while not self.stop_event.is_set(): # Check dispatcher if not self.dispatcher.is_alive(): self.dispatcher = self.spawn_dispatcher() logger.error( 'reincarnated dispatcher {} after sudden death'.format( self.dispatcher.name)) sleep(cycle) self.stop()
def shutdown(): logger.info('Stopping Http Server') http_server.stop() logger.info('Http Server will shutdown in %s seconds ...', 60) io_loop = tornado.ioloop.IOLoop.instance() deadline = time.time() + 60 def stoploop(): now = time.time() if now < deadline and (io_loop._callbacks or io_loop._timeouts): io_loop.add_timeout(now + 1, stoploop) else: io_loop.stop() logger.info('Http Server shutdown') stoploop()
def damaByUrl(self, dtype, url): parseurl = urlparse(url) fmturl = parseurl.scheme + '://' + parseurl.hostname + parseurl.path if self.bind: damakey = 'dama:{}:{}:{}:{}'.format(self.bind.company_id, self.bind.site_id, self.bind.login_name, fmturl) else: damakey = 'dama:{}'.format(fmturl) ResumeRedisCli.set(damakey, url) try: logger.info(u'<{}>进行验证码打码'.format(self.name)) ret = Dama2().d2Url(dtype, url) logger.info(u'<{}>验证码打码完成'.format(self.name)) return ret['verify'] finally: ResumeRedisCli.delete(damakey)
def download_email(pop, emailconf, mid, uidl, taskid): logger.info(u'[*] 正在下载邮箱<{}>第<{}>封邮件'.format(emailconf.email_user, mid)) try: typ, data, octets = pop.retr(mid) msg = email.message_from_string('\n'.join(data)) except error_proto as e: logger.error(u'[*] 邮箱<{}>中的第<{}>封邮件下载失败,原因:{}'.format( emailconf.email_user, mid, e)) return # Parse and save email content/attachments try: parse_email(emailconf, msg, mid, taskid) setkey = MAIL_SET_KEY % emailconf.email_user MailRedisCli.sadd(setkey, uidl) except BaseException as e: logger.error(u'[*] 邮箱<{}>中的第<{}>封邮件解析失败,原因:{}'.format( emailconf.email_user, mid, e))
def CheckResumeSearchStat(taskid): key = Conf.RESUME_IMPORT_HKEY % taskid stat = ResumeRedisCli.hgetall(key) if stat: # finish finish = int(stat.get('finish', '0')) if finish: # 统计信息 total = int(stat['total']) grab = int(stat['grab']) succ = int(stat['success']) ignore = int(stat['ignore']) fail = int(stat['failure']) # 来源信息 siteid = int(stat['siteid']) importid = stat['importid'] companyid = int(stat['companyid']) imphistoryid = int(stat['imphistoryid']) # if total > 0 and total <= succ + ignore + fail: if total <= succ + ignore + fail: if ResumeRedisCli.delete(key): imp = ImpHistory.queryByHistoryId(imphistoryid) if imp: imp.succ_num = succ imp.fail_num = fail imp.end_time = datetime.today() imp.proc_status = 1 if imp.succ_num == 0: imp.is_valid = 'F' imp.save() # 更新bind简历最后导入时间(siteid取自key,task中的site_id可能=0) AuthService().updateBindImportTimeByCompanyIdAndSiteId( companyid, siteid) # 回写任务状态 msg = u'<{}>简历总数<{}>,下载数<{}>,成功数<{}>,重复数<{}>,失败数<{}>.'.format( SiteConfig.getSiteNameById(siteid), total, grab, succ, ignore, fail) logger.info(msg) elif total == 0: if ResumeRedisCli.delete(key): ImpHistory.removeByHistoryId(imphistoryid) # 更新bind简历最后导入时间(siteid取自key,task中的site_id可能=0) AuthService().updateBindImportTimeByCompanyIdAndSiteId( companyid, siteid)
def damaByUrlImage(self, dtype, url): parseurl = urlparse(url) fmturl = parseurl.scheme + '://' + parseurl.hostname + parseurl.path if self.bind: damakey = 'dama:{}:{}:{}:{}'.format(self.bind.company_id, self.bind.site_id, self.bind.login_name, fmturl) else: damakey = 'dama:{}'.format(fmturl) ResumeRedisCli.set(damakey, url) try: logger.info(u'<{}>进行验证码打码'.format(self.name)) response = self._httpClient.call(url, authentication=False) ret = Dama2().d2File(dtype, response.data) logger.info(u'<{}>验证码打码完成'.format(self.name)) return ret['verify'] finally: ResumeRedisCli.delete(damakey)
def dispatcher(event): def _async_task(tasks): # 分发异步任务 for task in tasks: taskname = task.get('name', False) if taskname: logger.debug(u'dispatcher task named <{}>'.format(taskname)) args = task.get('args', ()) kwargs = task.get('kwargs', {}) kwargs['uid'] = task['id'] dtc. async (task['func'], *args, **kwargs) # name = current_process().name pid = current_process().pid logger.info('{} dispatch tasks at {}'.format(name, pid)) while not event.is_set(): try: # 任务指令中心 task_set = DBTask.queryPending() or [] _async_task(task_set) # 手动简历解析 task_set = ImpLocalFile.queryPending() or [] _async_task(task_set) # 付费简历下载 task_set = ResSyncDistribute.queryPending() or [] _async_task(task_set) # 邮件搜索定时任务 task_set = EmailConf.queryPending() or [] _async_task(task_set) # 简历搜索定时任务 task_set = ResumeSearcher.queryPending() or [] _async_task(task_set) # 关键词百科超链接 task_set = HyperLink.queryPending() or [] _async_task(task_set) # sleep(2) except BaseException as e: logger.error(e) break logger.info('{} stopped dispatch tasks'.format(name))
def pop3(host, port, username, password, usessl): try: conn = poplib.POP3_SSL(host) if usessl else poplib.POP3(host) if debug_mode: conn.set_debuglevel(2) conn.user(username) conn.pass_(password) logger.info(u'[+] 邮箱<{}> 登录成功'.format(username)) return conn, '' except error_proto: error_message = u'登录账号或密码错误' logger.error(u'[+] 邮箱<{}> 登录失败,原因:'.format(username, error_message)) return None, error_message except socket.gaierror: error_message = u'非法POP3服务器名' logger.error(u'[+] 邮箱<{}> 登录失败,原因:<{}>为{}'.format( username, host, error_message)) return None, error_message except BaseException as e: # 可能网络异常,可以再次执行 logger.error(u'[+] 邮箱<{}> 登录失败,原因:{}'.format(username, str(e))) return None, None
def post(self): ret = {'status': 1, 'message': ''} try: session = self.get_argument('sessionid', '') resumecodes = self.get_argument('resumecodes', '') if session: if resumecodes: messages = decryptBindPasswd('zpb', session).split(':') if len(messages) == 2: companyid = messages[0] logger.info(u'企业编码<{}>确认已下载简历'.format(companyid)) data = ResumeBase.ackExport(companyid, resumecodes.split(',')) ret['status'] = 0 ret['message'] = u'简历状态确认成功!' else: ret['message'] = u'非法的请求!' else: ret['message'] = u'未提交待确认简历编码!' else: ret['message'] = u'非法的请求!' finally: self.write(json.dumps(ret))
def Do201(companyid, siteid, syncid, taskid, **kwargs): task = DBTask.queryWithId(syncid) try: handler = SiteConfig.GetTaskHandler(companyid, siteid, taskid) # 账号已解除绑定 if handler.bind.is_valid != 'T': DBTask.newSchedule(companyid, siteid, False) raise CompanyAccountUnBindError(companyid, siteid, u'账号已解除绑定') # 账号未验证通过 if handler.bind.check_status == 10: raise CompanyAccountInvalidError(companyid, siteid, u'账号登录失败') try: handler.resume_import(syncid) except BaseException as e: raise UnHandleRuntimeError(e) if HANDLE_STATUS.SUCCESS == handler.status: task.succ_num += 1 task.log_info = handler.message task.sync_status = 20 # dtc. async ('zpb.service.handleservice.DoInfo', *(companyid, siteid, taskid)) logger.info(handler.message) else: task.fail_num += 1 task.log_info = handler.message if HANDLE_STATUS.AGAIN == handler.status: task.sync_status = 11 else: task.sync_status = 10 logger.error(handler.message) except BaseError as e: task.sync_status = 10 task.fail_num += 1 task.log_info = e.message task.save()
def DownResume(checkstatservice, **kwargs): data = kwargs.copy() taskid = data['taskid'] companyid = data['companyid'] siteid = data['siteid'] username = data['username'] resumeid = data['resumeid'] postdate = data['postdate'] # 强制刷新简历(用於付费简历下载) force = data.get('force', False) # sitename = SiteConfig.getSiteNameById(siteid) importkey = Conf.RESUME_IMPORT_HKEY % taskid try: try: # 简历更新度验证(投递日期) if not force: if not ResumeBase.isNew(companyid, siteid, resumeid, postdate): # 未更新的简历将会被忽略 logger.debug(u'<{}>简历<{}, {}>重复下载!'.format( sitename, username, resumeid)) ResumeRedisCli.hincrby(importkey, 'ignore') return try: handler = SiteConfig.GetTaskHandler(companyid, siteid, taskid) logger.info(u'正在下载<{}>简历<{}, {}>'.format( sitename, username, resumeid)) handler.resume_down(data) if handler.status == HANDLE_STATUS.SUCCESS: logger.info(handler.message) dtc. async ('zpb.service.resumeservice.ParseResume', checkstatservice, **data), elif handler.status == HANDLE_STATUS.AGAIN: logger.info(u'<{}>简历<{}, {}>需要重新下载'.format( sitename, username, resumeid)) data['retry'] = data.get('retry', 0) + 1 dtc. async ('zpb.service.resumeservice.DownResume', checkstatservice, **data), else: ResumeRedisCli.hincrby(importkey, 'failure') logger.error(handler.message) except BaseError as e: pass except BaseException as e: dtc. async ('zpb.service.resumeservice.DownResume', checkstatservice, **data), logger.error(u'<{}>简历<{}, {}>下载异常,原因:{}'.format( sitename, username, resumeid, e)) finally: dtc. async (checkstatservice, taskid)
def Do101(companyid, siteid, syncid, taskid, **kwargs): task = DBTask.queryWithId(syncid) try: jobid = kwargs.pop('jobid', None) if not jobid: raise InvalidParamError(companyid, siteid, u'未指定发布职位编号') handler = SiteConfig.GetTaskHandler(companyid, siteid, taskid) # 账号已解除绑定 if handler.bind.is_valid != 'T': DBTask.newSchedule(companyid, siteid, False) raise CompanyAccountUnBindError(companyid, siteid, u'账号已解除绑定') # 账号未验证通过 if handler.bind.check_status == 10: raise CompanyAccountInvalidError(companyid, siteid, u'账号登录失败') # step 1 dist = JobSyncDistribute.queryByJobIdAndCompanyIdWithSiteId( jobid, companyid, siteid) if not dist: raise JobNotDistributeError(companyid, siteid, u'未找到职位同步记录') try: if not dist.third_job_code: handler.position_add(jobid) else: handler.position_modify(jobid, dist.third_job_code, dist.last_sync_time) # step 2 if HANDLE_STATUS.SUCCESS == handler.status: if hasattr(handler, 'thirdjobcode'): dist.third_job_code = handler.thirdjobcode dist.sync_succ_num += 1 dist.sync_status = 20 dist.error_message = '' dist.last_sync_time = datetime.today() # BaseJob.updateSyncTimeByJobId(jobid) # dtc. async ('zpb.service.handleservice.DoInfo', *(companyid, siteid, taskid)) else: dist.sync_fail_num += 1 dist.sync_status = 10 dist.error_message = handler.message dist.save() # step 3 jmm = JobMemo(jobid) if HANDLE_STATUS.SUCCESS == handler.status: jmm.memo_content = u'[{}]发布成功'.format(handler.name) else: jmm.memo_content = u'[{}]发布失败,{}'.format( handler.name, handler.message) jmm.save() # step 4 if HANDLE_STATUS.SUCCESS == handler.status: task.succ_num += 1 task.log_info = handler.message task.sync_status = 20 logger.info(handler.message) else: task.fail_num += 1 task.log_info = handler.message if HANDLE_STATUS.AGAIN == handler.status: task.sync_status = 11 else: task.sync_status = 10 logger.error(handler.message) except BaseException as e: raise UnHandleRuntimeError(e) except BaseError as e: task.sync_status = 10 task.fail_num += 1 task.log_info = e.message task.save()
def post(self): ret = {'status': 1, 'message': ''} try: # 提取post参数中name=file的文件元数据 if self.request.files.has_key('file'): # try: soapCli = SudsClient(Conf.YGYS['soapuri'], timeout=Conf.YGYS['timeout']) except BaseException as e: logger.error(e) ret['message'] = u'无法连接简历解析WebService服务' return # try: sitecode = self.get_argument('sitefromcode', '0') maildate = self.get_argument('maildate', '') filedata = self.request.files['file'][0] filename = filedata['filename'] logger.info(u'解析简历文件<{}>'.format(filename)) # 文件扩展名 ext = os.path.splitext(filename)[-1] if not ext: ext = '.text' res = soapCli.service.TransResumeByJsonStringForFileBase64( Conf.YGYS['username'], Conf.YGYS['password'], base64.b64encode(filedata['body']), ext) if res: js = json.loads(res) if js['Type'] == 0: ret['message'] = js['Name'] elif not js['Name']: ret['message'] = u'非完整简历' else: js['companyid'] = 0 js['jobid'] = 0 js['source'] = 0 js['apply_job_id'] = 0 js['siteid'] = sitecode js['apply_time'] = maildate js['websiteresumeid'] = js['WebSiteResumeID'] js['matching'] = 0 (res, message, new) = AssembelResumeByJson(js) if res: data = ResumeBase.queryAndExportByResumeCode( message) ret['status'] = 0 ret['count'] = 1 ret['message'] = u'简历文件解析成功!' ret['res_resume_base'] = data else: ret['message'] = message else: ret['message'] = u'解析结果空白!' except BaseException as e: logger.error(e) ret['message'] = u'简历解析内部服务错误!' else: ret['message'] = u'未上传需要解析的简历文件!' finally: if ret['status'] == 1: logger.error(ret['message']) self.write(json.dumps(ret))
def CheckEmailImportStat(taskid): key = Conf.RESUME_IMPORT_HKEY % taskid stat = ResumeRedisCli.hgetall(key) if stat: # finish finish = int(stat.get('finish', '0')) if finish: # 统计信息 total = int(stat['total']) grab = int(stat['grab']) succ = int(stat['success']) ignore = int(stat['ignore']) fail = int(stat['failure']) # 来源信息 siteid = stat['siteid'] importid = stat['importid'] companyid = stat['companyid'] imphistoryid = int(stat['imphistoryid']) syncid = int(stat.get('syncid', None)) # if total > 0 and total <= succ + ignore + fail: if total <= succ + ignore + fail: if ResumeRedisCli.delete(key): imp = ImpHistory.queryByHistoryId(imphistoryid) if imp: imp.succ_num = succ imp.fail_num = fail imp.end_time = datetime.today() imp.proc_status = 1 if imp.succ_num == 0: imp.is_valid = 'F' imp.save() # 更新邮箱信息 EmailConf.updateImportTimeAndNumberByImportId( importid, succ) # 回写任务状态 msg = u'<{}>邮箱简历总数<{}>,下载数<{}>,成功数<{}>,重复数<{}>,失败数<{}>.'.format( siteid, total, grab, succ, ignore, fail) logger.info(msg) # 任务状态回写 if syncid: task = DBTask.queryWithId(syncid) if task: task.succ_num += 1 task.sync_status = 20 task.log_info = msg task.save() elif total == 0: if ResumeRedisCli.delete(key): ImpHistory.removeByHistoryId(imphistoryid) # 更新邮箱信息 EmailConf.updateImportTimeAndNumberByImportId(importid, 0) msg = u'<{}>邮箱简历总数:0'.format(siteid) logger.info(msg) # 任务状态回写 if syncid: task = DBTask.queryWithId(syncid) if task: task.succ_num += 1 task.sync_status = 20 task.log_info = msg task.save()
def pull_email(emailconf, companyid, taskid, importid, syncid): pop, error_message = pop3(emailconf.pop3_host, emailconf.pop3_port, emailconf.email_user, emailconf.email_password, emailconf.is_ssl == 'T') if pop: try: try: typ, uidls, octets = pop.uidl() except error_proto as e: logger.error(u'[-] 获取邮箱<{}>状态失败,原因:{0}'.format( emailconf.email_user, e)) return if len(uidls) > 0: msgs = [] setkey = MAIL_SET_KEY % emailconf.email_user # 过滤已下载邮件 for item in uidls: mid, uidl = item.split() if not MailRedisCli.sismember(setkey, uidl): msgs.append((mid, uidl)) if len(msgs) > 0: logger.info(u'[+] 邮箱<{}>待下载 {} 封未读邮件...'.format( emailconf.email_user, len(msgs))) imp = ImpHistory.new(emailconf.company_id, 0, emailconf.import_id, 3) imp.src_memo = emailconf.email_user if imp.save(): key = Conf.RESUME_IMPORT_HKEY % taskid ResumeRedisCli.hmset( key, { 'total': 0, 'grab': 0, 'success': 0, 'ignore': 0, 'failure': 0, 'finish': 0, 'siteid': emailconf.email_user, # 邮箱地址 'importid': importid, # 来源id,用於追溯 'companyid': companyid, 'imphistoryid': imp.history_id, # 后续存储imp_history_resume时使用 'syncid': syncid }) for mid, uidl in msgs: download_email(pop, emailconf, mid, uidl, taskid) ResumeRedisCli.hincrby( Conf.RESUME_IMPORT_HKEY % taskid, 'finish') dtc. async ( 'zpb.service.stateservice.CheckEmailImportStat', taskid) logger.info(u'[+] 邮箱<{}>已下载 {} 封未读邮件!'.format( emailconf.email_user, len(msgs))) else: logger.info(u'[+] 邮箱<{}>没有未读邮件!'.format( emailconf.email_user)) else: logger.info(u'[-] 邮箱<{}>没有任何邮件!'.format(emailconf.email_user)) finally: pop.quit() elif error_message: emailconf.is_valid = 'F' emailconf.import_memo = error_message emailconf.save()
def ParseResume(checkstatservice, **kwargs): data = kwargs.copy() taskid = data['taskid'] companyid = data['companyid'] siteid = data['siteid'] jobid = data['jobid'] source = data.get('source', 0) username = data['username'] resumeid = data['resumeid'] postdate = data['postdate'] # 强制刷新简历(用於付费简历下载) force = data.get('force', False) # 简历与职位匹配度 matching = data.get('matching', 0) # sitename = SiteConfig.getSiteNameById(siteid) importkey = Conf.RESUME_IMPORT_HKEY % taskid try: try: # 开始解析 logger.info(u'正在解析<{}>简历<{}>, <{}>'.format(sitename, username, resumeid)) filepath = data['filepath'] if os.path.isfile(filepath): # 用於文件备份 # basename = os.path.basename(filepath) # dirname = os.path.dirname(filepath) ext = os.path.splitext(filepath)[-1] ret = _doResumeParseByFile( base64.b64encode(open(filepath, 'rb').read()), ext) if ret: js = json.loads(ret) if js['Type'] == 0: ResumeRedisCli.hincrby(importkey, 'failure') logger.error(u'<{}>简历<{}, {}>解析失败,原因:{}'.format( sitename, username, resumeid, js['Name'])) return if not js['Name']: ResumeRedisCli.hincrby(importkey, 'failure') logger.error( u'<{}>简历<{}, {}>解析失败,原因:文件不是一份完整的简历!'.format( sitename, username, resumeid)) return # js['companyid'] = companyid js['siteid'] = siteid if siteid == 4: if resumeid: js['websiteresumeid'] = resumeid elif js['WebSiteResumeID']: js['websiteresumeid'] = js[ 'WebSiteResumeID'].replace('J', '') else: js['websiteresumeid'] = '' else: js['websiteresumeid'] = resumeid if resumeid else js[ 'WebSiteResumeID'] js['jobid'] = jobid js['source'] = source js['force'] = force js['matching'] = matching js['apply_job_id'] = 0 js['apply_time'] = postdate (res, message, new) = AssembelResumeByJson(js) if res: logger.info(u'<{}>简历<{}, {}>解析成功!'.format( sitename, username, resumeid)) # 简历存储 imphistoryid = ResumeRedisCli.hget( importkey, 'imphistoryid') # 简历刷新,不必新增 if new: ResumeRedisCli.hincrby(importkey, 'success') # 保存简历历史详细记录 if ImpHistoryResume.newAndSave( imphistoryid, companyid, message): ImpHistory.incSuccessByHistoryId(imphistoryid) else: logger.error(u'<{}>简历<{}, {}>历史详情保存异常!'.format( sitename, username, resumeid)) else: ResumeRedisCli.hincrby(importkey, 'ignore') else: ResumeRedisCli.hincrby(importkey, 'failure') logger.error(u'<{}>简历<{}, {}>解析失败,原因:{}'.format( sitename, username, resumeid, message)) else: ResumeRedisCli.hincrby(importkey, 'failure') message = u'简历服务器解析简历返回结果异常,<{}><{}, {}>'.format( sitename, username, resumeid) logger.error(message) else: ResumeRedisCli.hincrby(importkey, 'failure') message = u'简历解析失败,磁盘文件<{}>不存在'.format(sitename, username, resumeid) logger.error(message) except BaseException as e: dtc. async ('zpb.service.resumeservice.ParseResume', checkstatservice, **data), logger.error(u'简历解析服务异常,message:{}'.format(e)) finally: dtc. async (checkstatservice, taskid)
def RunWebApp(): logger.info('Http Server listen at port:<{}>'.format(options.port)) http_server.listen(options.port) signal.signal(signal.SIGINT, sig_handler) signal.signal(signal.SIGTERM, sig_handler) tornado.ioloop.IOLoop.current().start()
def ParseLocalResume(companyid, taskid, importid): row = ImpLocalFile.queryByImportId(importid) if row: sitename = SiteConfig.getSiteNameById(row.from_site_id) logger.info(u'开始解析<{}>的本地简历'.format(sitename)) row.proc_status = 10 imp = ImpHistory.new(row.company_id, row.from_site_id, row.import_id, row.input_type) if row.input_type == 1: imp.src_memo = row.user_file_name if not imp.save(): return # 异常信息提示 log_msg = u'' try: if row.input_type == 1: log_msg = u'简历文件<{}>解析'.format(row.user_file_name) # 数据库存储类型为hex编码,此处进行解码 content = base64.b64encode(row.file_content) ext = os.path.splitext(row.user_file_name)[-1] ret = _doResumeParseByFile(content, ext) else: log_msg = u'简历文本解析' content = row.input_content ret = _doResumeParseByString(content) if ret: js = json.loads(ret) if js['Type'] > 0 and js['Name']: js['companyid'] = row.company_id js['siteid'] = row.from_site_id if js['WebSiteResumeID']: js['websiteresumeid'] = js['WebSiteResumeID'] else: js['websiteresumeid'] = 'Local{0}'.format( row.import_id) js['jobid'] = '' js['source'] = 0 js['apply_job_id'] = row.apply_job_id js['apply_time'] = datetime2str(datetime.today()) (res, message, new) = AssembelResumeByJson(js) if res: row.resume_code = message row.proc_status = 20 # 保存简历历史详细记录 if ImpHistoryResume.newAndSave(imp.history_id, row.company_id, message): # 保存简历历史记录 imp.succ_num = 1 imp.proc_status = 1 message = u'{}成功'.format(log_msg) logger.info(message) else: message = u'数据存储失败' logger.error('{}失败,{}'.format(log_msg, message)) imp.fail_num = 1 imp.proc_status = 2 imp.fail_reason = message else: logger.error('{}失败,{}'.format(log_msg, message)) imp.fail_num = 1 imp.proc_status = 2 imp.fail_reason = message else: message = u'简历内容为空' logger.error('{}失败,{}'.format(log_msg, message)) imp.fail_num = 1 imp.proc_status = 2 imp.fail_reason = message else: message = u'解析结果为空' logger.error('{}失败,{}'.format(log_msg, message)) imp.fail_num = 1 imp.proc_status = 2 imp.fail_reason = message except BaseException as e: message = u'{}异常,原因:{}'.format(log_msg, e) logger.error(message) imp.fail_num = 1 imp.proc_status = 2 imp.fail_reason = u'内部服务错误!' # 历史结果存储 row.save() imp.end_time = datetime.today() imp.save()