def __init__(self, aa='', task_fpath=''): BaseFetch.__init__(self) self.account = libaccount.Manage(source='cjol', option='down') self.host = r'rms.cjol.com' self.domain = 'cjol.com' self.module_name = 'cjolsearch' self.init_path() self.login_wait = 300 self.ctmname = '' self.username = '' self.ck_str = '' self.password = '' self.refer = '' self.headers = { 'Host': self.host, 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags = [ '<span id="valUserName" style="color:Red;visibility:hidden;">请输入用户名</span>', '<input id="LoginName" name="UserID" type="text" value="" placeholder="请输入用户名" />' ] self.resume_tags = ['基本信息', '简历编号'] self.login_success_tag = [] self.taskfpath = task_fpath self.inuse_taskfpath = '' #用于记录执行号段任务的参数,起始/结束/当前 self.start_num = 0 self.end_num = 0 self.current_num = self.start_num self.maxsleeptime = 2 self.rp = Rdsreport() # init other log with open(json_config_path) as f: ff = f.read() logger = logging.getLogger(__name__) log_dict = json.loads(ff) log_dict['handlers']['file']['filename'] = os.path.join( log_dir, 'cjolsearch.log') logging.config.dictConfig(log_dict) logging.debug('hahahahha') self.time_period = 400 self.time_num = 150 # 这个跟上面的可以限制选择账号的时候的抓取频率 self.hour_num = 0 self.day_num = 0 self.switch_num = 30
def mocookie(): try: t0 = time.time() kwargs = {} source = request.args.get('source') option = request.args.get('option') city = request.args.get('city') cookie_str = request.args.get('cookie_str') username = request.args.get('username') if source not in ['51job', 'zhilian', 'cjol'] : message = json.dumps({'error': 'source is required'}) else: # 选取合适的 cookie 文件 position = city if position == 'gz': ppp = '广州' elif position == 'sz': ppp = '深圳' elif position == 'bj': ppp = '北京' elif position == 'hz': ppp = '杭州' elif position == 'sh': ppp = '上海' else: ppp = '%' ppp = '%' # 忽略地区,插件默认带地区参数,只随机几个账户容易出问题。 acc = libaccount.Manage(source=source, option='down', location=ppp) if option is None or option == 'get': redis_key_list = acc.uni_user() if len(redis_key_list) > 0: username1 = random.choice(redis_key_list) logging.info('mocookie source {} select username is {}'.format(source, username1)) username2 = username1.split('_')[1] ck_str = acc.redis_ck_get(username2) message = {'status': 'ok', 'msg': 'get cookie success', 'username': username2, 'cookie_str': ck_str} else: message = {'status': 'error', 'msg': 'cannot find valid cookie for source {} and location {}'.format(source, city)} elif option == 'set' and cookie_str is not None and username is not None: acc.redis_ck_set(username, cookie_str) message = {'status': 'ok', 'msg': 'set username {} cookie success'.format(username)} else: message = {'status': 'error', 'msg': 'wrong argument'} t1 = str('%.2f' % (time.time() - t0)) + 's' logger.info('get set cookie use time %s =======>' % t1) return json.dumps(message) except Exception, e: print e, traceback.format_exc() return json.dumps({'status': 'error', 'msg': str(traceback.format_exc())}) logger.error('buy resume error %s ------' % str(traceback.format_exc()))
def update_num(): try: sql = """select grap_source, user_name from grapuser_info where account_type = '购买账号'""" db = MySQLdb.connect(**sql_config) cursor = db.cursor() cursor.execute(sql) data = cursor.fetchall() for i in data: a = libaccount.Manage(i[0]) ck_str = a.redis_ck_get(i[1]) a.num_update(i[0], i[1], ck_str) time.sleep(3) except Exception as e: print Exception, e
def __init__(self, position='', id_number='', adviser_user=''): BaseFetch.__init__(self) # 确定是否是调试模式 ### DEBUG 最后一个参数标示 debug if len(sys.argv) >= 5: if sys.argv[4] == "debug": self.debug = True else: self.debug = False else: self.debug = False # 选取合适的 cookie 文件 if position == 'gz': ppp = '广州' elif position == 'sz': ppp = '深圳' elif position == 'bj': ppp = '北京' elif position == 'hz': ppp = '杭州' elif position == 'sh': ppp = '上海' else: ppp = '%' self.ctmname = '' self.username = '' self.password = '' self.rp = Rdsreport( ) # 将这个放在前面,避免 redispipe 初始化的时候,将logger 的保存位置改到别的地方 acc = libaccount.Manage(source='zhilian', option='buy', location=ppp) # init other log with open(json_config_path) as f: ff = f.read() logger = logging.getLogger(__name__) log_dict = json.loads(ff) log_dict['loggers'][""]['handlers'] = [ "file", "stream", "buy", "error" ] logging.config.dictConfig(log_dict) logging.debug('hahahahha') self.adviser_user = adviser_user self.id_number = id_number self.position = position self.host = r'rd.zhaopin.com' self.domain = 'zhaopin.com' self.module_name = 'zhiliandown' self.init_path() self.login_wait = 300 self.refer = '' self.headers = { 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Origin': 'http://rdsearch.zhaopin.com', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'Referer': 'http://rdsearch.zhaopin.com/Home/ResultForCustom?SF_1_1_1=java&SF_1_1_4=2%2C99&SF_1_1_18=765&orderBy=DATE_MODIFIED,1&pageSize=60&SF_1_1_27=0&exclude=1', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags = [ 'name="login"', '<input id="LoginName" name="UserID" type="text" value="" placeholder="请输入用户名" />' ] self.resume_tags = ['个人信息', '求职意向'] self.login_success_tag = [] # self.cookie_fpath=cookie_fpath self.inuse_taskfpath = '' #用于记录执行号段任务的参数,起始/结束/当前 self.maxsleeptime = 4 self.area_list = ['530', '538', '763', '765'] self.now_time = datetime.datetime.now() self.yes_time = self.now_time + datetime.timedelta(days=-3) self.yester_time = self.yes_time.strftime('%Y-%m-%d').replace('20', '') self.logger = common.log_init(__name__, 'zlbuy2.log') username1 = acc.uni_user() self.logger.info('get buy username is {}'.format(username1)) self.has_cookie = True if username1: self.username = username1 logging.info('zhilian buy select username is {}'.format( self.username)) self.logger.info('zhilian buy select username is {}'.format( self.username)) self.headers['Cookie'] = acc.redis_ck_get(self.username) else: logging.error('no avail login cookie for zldown') self.logger.error('no avail login cookie for zldown') self.send_mails('Warining, no account for zldown', 'no avail login cookie for zldown') print '没有已经登陆的 zldown cookie文件' self.has_cookie = False # quit() # 这里不退出,在runwork那里才 return something print 'id num is {}'.format(id_number) print 'position is {}'.format(position) logging.info('trying to buy id {}, position is {}'.format( self.id_number, self.position)) self.logger.info('trying to buy id {}, position is {}'.format( self.id_number, self.position))
def __init__(self, position='', id_number=''): BaseFetch.__init__(self) # 确定是否是调试模式 ### DEBUG 最后一个参数标示 debug if len(sys.argv) >= 5: if sys.argv[4] == "debug": self.debug = True else: self.debug = False else: self.debug = False # if dbug ==1: # self.debug = True # else: # self.debug = False # 选取合适的 cookie 文件 self.ctmname = '' self.username = '' self.password = '' self.headers = { 'Host': self.host, 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', } acc = libaccount.Manage(source='cjol', option='buy') # init other log with open(json_config_path) as f: ff = f.read() logger = logging.getLogger(__name__) log_dict = json.loads(ff) log_dict['handlers']['file']['filename'] = os.path.join( log_dir, 'cjolbuy.log') logging.config.dictConfig(log_dict) logging.debug('hahahahha') username1 = acc.uni_user() if username1: self.username = username1 logging.info('cjol buy select username is {}'.format( self.username)) self.headers['Cookie'] = acc.redis_ck_get(self.username) else: logging.error('no avail login cookie for cjol') # print '没有已经登陆的 cjol cookie文件' quit() print 'id num is {}'.format(id_number) print 'position is {}'.format(position) logging.info('trying to buy id {}'.format(self.id_number)) self.id_number = id_number self.position = position self.host = r'rms.cjol.com' self.domain = 'cjol.com' self.module_name = 'cjoldown' self.init_path() self.login_wait = 300 self.refer = '' self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags = [ '<span id="valUserName" style="color:Red;visibility:hidden;">请输入用户名</span>', '<input id="LoginName" name="UserID" type="text" value="" placeholder="请输入用户名" />' ] self.resume_tags = ['基本信息', '简历编号'] self.login_success_tag = [] # self.cookie_fpath=cookie_fpath #用于记录执行号段任务的参数,起始/结束/当前 self.start_num = 0 self.end_num = 0 self.current_num = self.start_num self.maxsleeptime = 2
def __init__(self, cookie_fpath='', task_fpath=''): BaseFetch.__init__(self) if os.path.exists(cookie_fpath): self.load_cookie(cookie_fpath) else: logging.debug('cookie file %s not exit.' % cookie_fpath) exit() self.account = libaccount.Manage(source='51job', option='down') self.host = r'ehire.51job.com' self.domain = '51job.com' self.module_name = '51job' self.init_path() self.login_wait = 300 self.ctmname = '' self.username = '' self.password = '' self.refer = '' self.headers = { 'Host': self.host, 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags = [ '<td colspan="2" class="loginbar">', '<input type="button" onclick="loginSubmit' ] self.resume_tags = ['<div id="divResume"><style>', '简历编号'] self.login_success_tag = [] self.cookie_fpath = cookie_fpath self.taskfpath = task_fpath self.inuse_taskfpath = '' #用于记录执行号段任务的参数,起始/结束/当前 self.start_num = 0 self.end_num = 0 self.current_num = self.start_num self.maxsleeptime = 11 self.switch_num = 300 self.rp = Rdsreport() # 下面几个参数是用来选择账号的 self.time_period = 400 self.time_num = 150 # 这个跟上面的可以限制选择账号的时候的抓取频率 self.hour_num = 0 self.day_num = 0 self.switch_num = 30 self.error_username = [ 'spxx373', 'spxx336', 'huasheng123', u'北京事业部2', u'北京事业部3', u'广州事业部1', u'深圳事业部2' ] # 拼接id方式下载失效的帐号 self.rp = Rdsreport() self.task_name = '' logger = logging.getLogger(__name__) with open(common.json_config_path) as f: ff = f.read() log_dict = json.loads(ff) log_dict['handlers']['file']['filename'] = os.path.join( log_dir, 'job51_id_fetch.log') logging.config.dictConfig(log_dict) logging.debug('hahahahha')
def __init__(self, position='', id_number='', adviser_user=''): BaseFetch.__init__(self) # 确定是否是调试模式 ### DEBUG 最后一个参数标示 debug if len(sys.argv) >= 5: if sys.argv[4] == "debug": self.debug = True else: self.debug = False else: self.debug = False # 选取合适的 cookie 文件 if position == 'gz': ppp = '广州' elif position == 'sz': ppp = '深圳' elif position == 'bj': ppp = '北京' elif position == 'hz': ppp = '杭州' elif position == 'sh': ppp = '上海' else: ppp = '%' self.ctmname = '' self.username = '' self.password = '' self.rp = Rdsreport( ) # 将这个放在前面,避免 redispipe 初始化的时候,将logger 的保存位置改到别的地方 acc = libaccount.Manage(source='51job', option='buy', location=ppp) # init other log with open(json_config_path) as f: ff = f.read() logger = logging.getLogger(__name__) log_dict = json.loads(ff) log_dict['loggers'][""]['handlers'] = [ "file", "stream", "buy", "error" ] logging.config.dictConfig(log_dict) logging.debug('hahahahha') self.host = r'ehire.51job.com' self.domain = '51job.com' self.module_name = '51jobdown' self.init_path() self.login_wait = 300 self.refer = '' self.headers = { 'Host': self.host, 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags = [ '<td colspan="2" class="loginbar">', '<input type="button" onclick="loginSubmit' ] self.resume_tags = ['<div id="divResume"><style>', '简历编号', '简历信息'] self.login_success_tag = [] self.adviser_user = adviser_user self.id_number = id_number self.position = position self.inuse_taskfpath = '' #用于记录执行号段任务的参数,起始/结束/当前 self.start_num = 0 self.end_num = 0 self.current_num = self.start_num self.maxsleeptime = 5 self.logger = common.log_init(__name__, '51buy2.log') username1 = acc.uni_user() self.logger.info('select username is {}'.format(username1)) self.has_cookie = True if username1: self.username = username1 logging.info('cjol buy select username is {}'.format( self.username)) self.headers['Cookie'] = acc.redis_ck_get(self.username) else: logging.error('no avail login cookie for 51down') self.send_mails('Warining, no account for 51down', 'no avail login cookie for 51down') print '没有已经登陆的 51job cookie文件' self.has_cookie = False # quit() # 这里不退出,在runwork那里才 return something print 'id num is {}'.format(id_number) print 'position is {}'.format(position) logging.info('trying to buy id {}, position is {}'.format( self.id_number, self.position))
from bs4 import BeautifulSoup import libaccount def p_key(source): pre_key = '' if '51' in source: pre_key = 'cookie51_' elif 'cjol' in source: pre_key = 'cookiecjol_' elif 'zhilian' in source: pre_key = 'cookiezl_' else: print 'no valid source' return pre_key if __name__ == '__main__': source = raw_input('please input your source:\n') a = libaccount.Manage(source=source, option='down') while 1: username = raw_input('input username:\n') ck_str = raw_input('input cookie string:\n') print ck_str print type(ck_str) if len(ck_str) > 0: # username = p_key(source) + username a.redis_ck_set(username, ck_str) else: print 'ck_str not right, please retry' print '\n' * 2
cookie_cjol = [n for n in cookie_file_list if n.startswith('cjol')] cookie_zl = [ n for n in cookie_file_list if n not in cookie_51 if n not in cookie_cjol ] for c_51 in cookie_51: fpath = os.path.join(cookie_dir, c_51) # print fpath, 9999999999 with open(fpath) as f: ff = f.read() # print ff ck_dict = urlparse.parse_qs(ff) # print ck_dict if ck_dict.keys().count('UserName') == 1: username = ck_dict['UserName'][0] a = libaccount.Manage(source='51job') a.redis_ck_set(username, ff) for c_cjol in cookie_cjol: fpath = os.path.join(cookie_dir, c_cjol) # print fpath with open(fpath) as f: ff = f.read() ck_dict = urlparse.parse_qs(ff) if ck_dict.keys().count(' CompanyID') == 1: company_id = ck_dict[' CompanyID'][0] if str(company_id) == '317080': username = '******' if str(company_id) == '308380': username = '******' a = libaccount.Manage(source='cjol')