def need_login(self, url, con, hint): if r'/e/account/' in con.request.url: return True # if u'登录猎聘通' in con.text: # return True if u'系统检测到异常浏览行为,建议您立即进行验证,否则可能被封禁账号。' in con.text \ or u'警告:由于您浏览过于频繁,请验证后重试!' in con.text or '/validation/captcha?object_type=1' in con.request.url: retry_time = 0 # 暂时 人工验证 if 'secret.liepin.com' in con.request.url: sendmail(['*****@*****.**'], "liepin 验证", "\n\naccout: %r 需要验证" % self.ac) exit(1) while not self.validate_user(con.request.url) and retry_time < self._retry_max_times: retry_time += 1 print "retry validate user, time: %d" % retry_time time.sleep(5) continue if retry_time >= self._retry_max_times: print "validation more than %d times fail..." % self._retry_max_times sys.exit(1) return True if u'您的操作过于频繁,为确保账号安全,请稍后再试' in con.text: print u'您的操作过于频繁,为确保账号安全,请稍后再试' sys.exit(1) return False
def check_download_num(channel, remain_num, acc): if remain_num == 0: common_log.warn(r'账号可下载简历为0') raise LoginErrors.AccountHoldError() elif remain_num < config.DOWNLOAD_NUM_LIMIT_LOW: sendmail(config.NOTIFY_EMAILS, "%s 简历下载数通知" % channel, """\n\n账号: %r, 剩余简历下载数:%d""" % (acc, remain_num))
def recruit_jobs(accs, sleep=1.0): start = datetime.now() try: runner = FullJobRunner( gen_jobs_params(accs, sleep=sleep, detail_parser=False)) runner.run() except Exception as e: msg = '' Log.error(e) traceback.print_exc() msg += 'end time:%s\n' % datetime.now() msg += 'start time:%s\n' % start msg += 'exception:%s\n' % e.__class__ msg += 'message:%s\n' % e.message msg += 'level:0\n' sendmail(['*****@*****.**'], 'chsi spider exception', msg) return 0
def smart_full_job(accs, name, prefix, score=750, sleep=1.0, level=0): if level >= 0: start = datetime.now() try: runner = FullJobRunner( gen_jobs_params(accs, name, prefix, score=score, sleep=sleep, detail_parser=False)) runner.run() except Exception as e: msg = '' Log.error(e) traceback.print_exc() msg += 'end time:%s\n' % datetime.now() msg += 'start time:%s\n' % start msg += 'exception:%s\n' % e.__class__ msg += 'message:%s\n' % e.message msg += 'level:0\n' sendmail(['*****@*****.**'], 'chsi spider exception', msg) return 0 if level >= 1: start = datetime.now() try: gen_spec_unfetched_seeds(accs, name, prefix) re_do_spec(accs, name, prefix) except Exception as e: msg = '' Log.error(e) traceback.print_exc() msg += 'end time:%s\n' % datetime.now() msg += 'start time:%s\n' % start msg += 'exception:%s\n' % e.__class__ msg += 'message:%s\n' % e.message msg += 'level:1\n' sendmail(['*****@*****.**'], 'chsi spider exception', msg) return 1 if level >= 2: start = datetime.now() try: gen_detail_unfetched_seeds(accs, name, prefix) re_do_detail(accs, name, prefix) except Exception as e: msg = '' Log.error(e) traceback.print_exc() msg += 'end time:%s\n' % datetime.now() msg += 'start time:%s\n' % start msg += 'exception:%s\n' % e.__class__ msg += 'message:%s\n' % e.message msg += 'level:2\n' sendmail(['*****@*****.**'], 'chsi spider exception', msg) return 2 return 3
#!/usr/bin/env python # encoding: utf-8 from spider.util import sendmail import sys if __name__ == '__main__': mails = [ '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**' ] title = "program %s failed" % sys.argv[1] msg = "time used : %s\n" % sys.argv[2] msg += "exit code: %s\n" % sys.argv[3] sendmail(mails, title, msg)