def __init__(self, systemId, baseURL, channelLabel, savePath): BaseFetch.__init__(self) self.systemId = systemId self.baseURL = baseURL self.rhnComm = RHNComm(baseURL, self.systemId) self.channelLabel = channelLabel self.savePath = savePath
def __init__(self, cookie_fpath, payload): BaseFetch.__init__(self) if os.path.exists(cookie_fpath): self.load_cookie(cookie_fpath) else: logging.debug('cookie file %s not exit.' % cookie_fpath) exit() # self.payload = urllib.urlencode(j_payload(self.json_string)) self.payload = urllib.urlencode(payload) self.host=r'ehire.51job.com' self.domain='51job.com' self.module_name='51search' self.init_path() self .login_wait=300 self.ctmname='' self.username='' self.password='' self.refer='' self.headers={ 'Host':self.host, 'Origin':'http://ehire.51job.com', 'Referer':'http://ehire.51job.com/Candidate/SearchResume.aspx', 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Content-Type':'application/x-www-form-urlencoded', 'Accept-Language':'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags=['<td colspan="2" class="loginbar">', '<input type="button" onclick="loginSubmit'] self.resume_tags=['<div id="divResume"><style>','简历编号'] self.login_success_tag=[] self.cookie_fpath=cookie_fpath # self.taskfpath=task_fpath self.inuse_taskfpath='' # self.years=['7C1', '7C2', '7C3', '7C4', '7C5', '7C6', '7C7', '7C8'] self.gender = ['7C0', '7C1'] self.degree = ['7C5', '7C6', '7C7', '7C8'] # self.degree = ['7C5', '7C6'] self.area = ['7C040000', '7C030200', '7C010000', '7C020000'] # self.area = ['7C040000', '7C030200'] self.now_time = datetime.datetime.now() self.yes_time = self.now_time + datetime.timedelta(days=-2) self.yester_time = self.yes_time.strftime('%Y-%m-%d') self.years_age_gender = [] self.area_degree = [] #用于记录执行号段任务的参数,起始/结束/当前 self.start_num=0 self.end_num=0 self.current_num=self.start_num self.maxsleeptime = 5
def __init__(self, aa='', task_fpath=''): BaseFetch.__init__(self) self.account = libaccount.Manage(source='cjol', option='down') self.host = r'rms.cjol.com' self.domain = 'cjol.com' self.module_name = 'cjolsearch' self.init_path() self.login_wait = 300 self.ctmname = '' self.username = '' self.ck_str = '' self.password = '' self.refer = '' self.headers = { 'Host': self.host, 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags = [ '<span id="valUserName" style="color:Red;visibility:hidden;">请输入用户名</span>', '<input id="LoginName" name="UserID" type="text" value="" placeholder="请输入用户名" />' ] self.resume_tags = ['基本信息', '简历编号'] self.login_success_tag = [] self.taskfpath = task_fpath self.inuse_taskfpath = '' #用于记录执行号段任务的参数,起始/结束/当前 self.start_num = 0 self.end_num = 0 self.current_num = self.start_num self.maxsleeptime = 2 self.rp = Rdsreport() # init other log with open(json_config_path) as f: ff = f.read() logger = logging.getLogger(__name__) log_dict = json.loads(ff) log_dict['handlers']['file']['filename'] = os.path.join( log_dir, 'cjolsearch.log') logging.config.dictConfig(log_dict) logging.debug('hahahahha') self.time_period = 400 self.time_num = 150 # 这个跟上面的可以限制选择账号的时候的抓取频率 self.hour_num = 0 self.day_num = 0 self.switch_num = 30
def __init__(self, repo_label, repourl, cacert=None, clicert=None, clikey=None, mirrorlist=None, download_dir='./'): BaseFetch.__init__(self, cacert=cacert, clicert=clicert, clikey=clikey) self.repo_label = repo_label self.repourl = repourl self.mirrorlist = mirrorlist self.local_dir = download_dir self.repo_dir = os.path.join(self.local_dir, self.repo_label)
def __init__(self, aa='', task_fpath=''): BaseFetch.__init__(self) self.account = libaccount.Manage(source='cjol', option='down') self.host=r'rms.cjol.com' self.domain='cjol.com' self.module_name='cjolsearch' self.init_path() self .login_wait=300 self.ctmname='' self.username = '' self.ck_str = '' self.password='' self.refer='' self.headers={ 'Host':'newrms.cjol.com', 'Origin': 'http://newrms.cjol.com', 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags=['<span id="valUserName" style="color:Red;visibility:hidden;">请输入用户名</span>', '<input id="LoginName" name="UserID" type="text" value="" placeholder="请输入用户名" />'] self.resume_tags=['基本信息','简历编号'] self.login_success_tag=[] self.taskfpath=task_fpath self.inuse_taskfpath='' #用于记录执行号段任务的参数,起始/结束/当前 self.start_num=0 self.end_num=0 self.current_num=self.start_num self.maxsleeptime=18 self.rp = Rdsreport() # init other log with open(json_config_path) as f: ff = f.read() logger = logging.getLogger(__name__) log_dict = json.loads(ff) log_dict['handlers']['file']['filename'] = os.path.join(log_dir, 'cjolsearch.log') logging.config.dictConfig(log_dict) logging.debug('hahahahha') self.time_period = 400 self.time_num = 150 # 这个跟上面的可以限制选择账号的时候的抓取频率 self.hour_num = 0 self.day_num = 0 self.switch_num = 30
def __init__(self,cookie_fpath='',task_fpath=''): BaseFetch.__init__(self) if os.path.exists(cookie_fpath): self.load_cookie(cookie_fpath) else: logging.debug('cookie file %s not exit.' % cookie_fpath) exit() self.host=r'ehire.51job.com' self.domain='51job.com' self.module_name='51search' self.init_path() self .login_wait=300 self.ctmname='' self.username='' self.password='' self.refer='' self.headers={ 'Host':self.host, 'Origin':'http://ehire.51job.com', 'Referer':'http://ehire.51job.com/Candidate/SearchResume.aspx', 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Content-Type':'application/x-www-form-urlencoded', 'Accept-Language':'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags=['<td colspan="2" class="loginbar">', '<input type="button" onclick="loginSubmit'] self.resume_tags=['<div id="divResume"><style>','简历编号'] self.login_success_tag=[] self.cookie_fpath=cookie_fpath self.taskfpath=task_fpath self.inuse_taskfpath='' # self.years=['7C1','7C2','7C3','7C4','7C5','7C6','7C7','7C8'] # self.years=['7C5', '7C6', '7C7', '7C8', '7C99'] self.now_time = datetime.datetime.now() self.yes_time = self.now_time + datetime.timedelta(days=-2) self.seven_ago = (datetime.datetime.now()+datetime.timedelta(days=-7)).strftime('%Y%m%d') self.yester_time = self.yes_time.strftime('%Y-%m-%d') self.convert_dict = {'7C010000':'北京', '7C020000':'上海', '7C030200':'广州', '7C040000':'深圳', '7C5':'大专', '7C6':'本科', '7C7':'硕士', '7C8':'MBA以上'} #用于记录执行号段任务的参数,起始/结束/当前 self.start_num=0 self.end_num=0 self.area_list = ['7C040000', '7C020000', '7C030200', '7C010000'] self.skill_list='' self.current_num=self.start_num self.maxsleeptime = 5
def __init__(self, cookie_fpath='', task_fpath=''): BaseFetch.__init__(self) if os.path.exists(cookie_fpath): self.load_cookie(cookie_fpath) else: logging.debug('cookie file %s not exit.' % cookie_fpath) exit() self.host = r'ehire.51job.com' self.domain = '51job.com' self.module_name = '51job' self.init_path() self.login_wait = 300 self.ctmname = '' self.username = '' self.password = '' self.refer = '' self.headers = { 'Host': self.host, 'Origin': 'http://ehire.51job.com', 'Referer': 'http://ehire.51job.com/Candidate/SearchResume.aspx', 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Content-Type': 'application/x-www-form-urlencoded', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags = [ '<td colspan="2" class="loginbar">', '<input type="button" onclick="loginSubmit' ] self.resume_tags = ['<div id="divResume"><style>', '简历编号'] self.login_success_tag = [] self.cookie_fpath = cookie_fpath self.taskfpath = task_fpath self.inuse_taskfpath = '' #用于记录执行号段任务的参数,起始/结束/当前 self.start_num = 0 self.end_num = 0 self.area_list = '' self.skill_list = '' self.current_num = self.start_num self.maxsleeptime = 5
def __init__(self, cookie_fpath='', task_fpath=''): BaseFetch.__init__(self) if os.path.exists(cookie_fpath): self.load_cookie(cookie_fpath) else: logging.debug('cookie file %s not exit.' % cookie_fpath) exit() self.host = r'rdsearch.zhaopin.com' self.domain = 'zhaopin.com' self.module_name = 'zhilian' self.init_path() self.login_wait = 300 self.ctmname = '' self.username = '' self.password = '' self.refer = '' self.headers = { 'Host': self.host, 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'Referer': 'http://rdsearch.zhaopin.com/Home/ResultForCustom?SF_1_1_1=java&SF_1_1_4=2%2C99&SF_1_1_18=765&orderBy=DATE_MODIFIED,1&pageSize=60&SF_1_1_27=0&exclude=1', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags = [ '<span id="valUserName" style="color:Red;visibility:hidden;">请输入用户名</span>', '<input id="LoginName" name="UserID" type="text" value="" placeholder="请输入用户名" />' ] self.resume_tags = ['基本信息', '简历编号'] self.login_success_tag = [] self.cookie_fpath = cookie_fpath self.taskfpath = task_fpath self.inuse_taskfpath = '' #用于记录执行号段任务的参数,起始/结束/当前 self.start_num = 0 self.end_num = 0 self.current_num = self.start_num self.maxsleeptime = 0 self.skill_list = ''
def __init__(self,cookie_fpath='',task_fpath=''): BaseFetch.__init__(self) if os.path.exists(cookie_fpath): self.load_cookie(cookie_fpath) else: logging.debug('cookie file %s not exit.' % cookie_fpath) exit() self.host=r'ehire.51job.com' self.domain='51job.com' self.module_name='51job' self.init_path() self .login_wait=300 self.ctmname='' self.username='' self.password='' self.refer='' self.headers={ 'Host':self.host, 'Origin':'http://ehire.51job.com', 'Referer':'http://ehire.51job.com/Candidate/SearchResume.aspx', 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Content-Type':'application/x-www-form-urlencoded', 'Accept-Language':'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags=['<td colspan="2" class="loginbar">', '<input type="button" onclick="loginSubmit'] self.resume_tags=['<div id="divResume"><style>','简历编号'] self.login_success_tag=[] self.cookie_fpath=cookie_fpath self.taskfpath=task_fpath self.inuse_taskfpath='' #用于记录执行号段任务的参数,起始/结束/当前 self.start_num=0 self.end_num=0 self.area_list='' self.skill_list='' self.current_num=self.start_num self.maxsleeptime = 5
def __init__(self,cookie_fpath='',task_fpath=''): BaseFetch.__init__(self) if os.path.exists(cookie_fpath): self.load_cookie(cookie_fpath) else: logging.debug('cookie file %s not exit.' % cookie_fpath) exit() self.host=r'rms.cjol.com' self.domain='cjol.com' self.module_name='cjol' self.init_path() self .login_wait=300 self.ctmname='' self.username='' self.password='' self.refer='' self.headers={ 'Host':self.host, 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags=['<span id="valUserName" style="color:Red;visibility:hidden;">请输入用户名</span>', '<input id="LoginName" name="UserID" type="text" value="" placeholder="请输入用户名" />'] self.resume_tags=['基本信息','简历编号'] self.login_success_tag=[] self.cookie_fpath=cookie_fpath self.taskfpath=task_fpath self.inuse_taskfpath='' #用于记录执行号段任务的参数,起始/结束/当前 self.start_num=0 self.end_num=0 self.current_num=self.start_num self.maxsleeptime=0
def __init__(self, cookie_fpath='', task_fpath=''): BaseFetch.__init__(self) if os.path.exists(cookie_fpath): self.load_cookie(cookie_fpath) else: logging.debug('cookie file %s not exit.' % cookie_fpath) exit() self.host = r'ehire.51job.com' self.domain = '51job.com' self.module_name = '51job' self.init_path() self.login_wait = 100 self.ctmname = '' self.username = '' self.password = '' self.refer = '' self.headers = { 'Host': self.host, 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags = [ '<span id="valUserName" style="color:Red;visibility:hidden;">请输入用户名</span>', '<input id="LoginName" name="UserID" type="text" value="" placeholder="请输入用户名" />' ] self.resume_tags = ['基本信息', '简历编号'] self.login_success_tag = [] self.cookie_fpath = cookie_fpath self.taskfpath = task_fpath self.inuse_taskfpath = '' self.start_num = 0 self.end_num = 0 self.current_num = self.start_num
def __init__(self, position = '', id_number='', adviser_user=''): BaseFetch.__init__(self) # 确定是否是调试模式 ### DEBUG 最后一个参数标示 debug if len(sys.argv) >= 5: if sys.argv[4] == "debug": self.debug = True else: self.debug = False else: self.debug = False # 选取合适的 cookie 文件 if position == 'gz': ppp = '广州' elif position == 'sz': ppp = '深圳' elif position == 'bj': ppp = '北京' elif position == 'hz': ppp = '杭州' elif position == 'sh': ppp = '上海' else: ppp = '%' self.ctmname='' self.username='' self.password='' self.rp = Rdsreport() # 将这个放在前面,避免 redispipe 初始化的时候,将logger 的保存位置改到别的地方 acc = libaccount.Manage(source='51job', option='buy', location=ppp) # init other log with open(json_config_path) as f: ff = f.read() logger = logging.getLogger(__name__) log_dict = json.loads(ff) log_dict['loggers'][""]['handlers'] = ["file", "stream", "buy", "error"] logging.config.dictConfig(log_dict) logging.debug('hahahahha') self.host=r'ehire.51job.com' self.domain='51job.com' self.module_name='51jobdown' self.init_path() self.login_wait=300 self.refer='' self.headers={ 'Host':self.host, 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags=['<td colspan="2" class="loginbar">', '<input type="button" onclick="loginSubmit'] self.resume_tags=['<div id="divResume"><style>','简历编号', '简历信息'] self.login_success_tag=[] self.adviser_user = adviser_user self.id_number=id_number self.position = position self.inuse_taskfpath='' #用于记录执行号段任务的参数,起始/结束/当前 self.start_num=0 self.end_num=0 self.current_num=self.start_num self.maxsleeptime = 5 self.logger = common.log_init(__name__, '51buy2.log') username1 = acc.uni_user() self.logger.info('select username is {}'.format(username1)) self.has_cookie = True if username1: self.username = username1 logging.info('cjol buy select username is {}'.format(self.username)) self.headers['Cookie'] = acc.redis_ck_get(self.username) else: logging.error('no avail login cookie for 51down') self.send_mails('Warining, no account for 51down', 'no avail login cookie for 51down') print '没有已经登陆的 51job cookie文件' self.has_cookie = False # quit() # 这里不退出,在runwork那里才 return something print 'id num is {}'.format(id_number) print 'position is {}'.format(position) logging.info('trying to buy id {}, position is {}'.format(self.id_number, self.position))
def __init__(self, cookie_fpath, payload): BaseFetch.__init__(self) if os.path.exists(cookie_fpath): self.load_cookie(cookie_fpath) else: logging.debug('cookie file %s not exit.' % cookie_fpath) exit() # try: # with open(json_file) as f: # self.json_string = json.load(f) # except Exception, e: # print 'load json_file error', Exception, e # self.payload = urllib.urlencode(j_payload(self.json_string)) self.payload = payload self.host = r'jobads.zhaopin.com' self.domain = 'zhaopin.com' self.module_name = 'zlpublish' self.init_path() self.login_wait = 300 self.ctmname = '' self.username = '' self.password = '' self.refer = '' self.headers = { 'Host': self.host, 'Origin': 'http://jobads.zhaopin.com', 'Referer': 'http://jobads.zhaopin.com/Position/PositionAdd', 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', 'Accept-Language': 'zh-CN,zh;q=0.8', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags = [ '<td colspan="2" class="loginbar">', '<input type="button" onclick="loginSubmit' ] self.resume_tags = ['<div id="divResume"><style>', '简历编号'] self.login_success_tag = [] self.cookie_fpath = cookie_fpath # self.taskfpath=task_fpath self.inuse_taskfpath = '' # self.years=['7C1', '7C2', '7C3', '7C4', '7C5', '7C6', '7C7', '7C8'] self.gender = ['7C0', '7C1'] self.degree = ['7C5', '7C6', '7C7', '7C8'] # self.degree = ['7C5', '7C6'] self.area = ['7C040000', '7C030200', '7C010000', '7C020000'] # self.area = ['7C040000', '7C030200'] self.now_time = datetime.datetime.now() self.yes_time = self.now_time + datetime.timedelta(days=-2) self.yester_time = self.yes_time.strftime('%Y-%m-%d') self.years_age_gender = [] self.area_degree = [] #用于记录执行号段任务的参数,起始/结束/当前 self.start_num = 0 self.end_num = 0 self.current_num = self.start_num self.maxsleeptime = 5
def __init__(self, cookie_fpath='', task_fpath=''): BaseFetch.__init__(self) # if os.path.exists(cookie_fpath): # self.load_cookie(cookie_fpath) # else: # logging.debug('cookie file %s not exit.' % cookie_fpath) # exit() self.account = libaccount.Manage(source='51job', option='down') self.host = r'ehire.51job.com' self.domain = '51job.com' self.module_name = '51search' self.init_path() self.login_wait = 300 self.ctmname = '' self.username = '' self.password = '' self.ck_str = '' self.refer = '' self.headers = { 'Host': self.host, 'Origin': 'http://ehire.51job.com', 'Referer': 'http://ehire.51job.com/Candidate/SearchResume.aspx', 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Content-Type': 'application/x-www-form-urlencoded', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags = ['<td colspan="2" class="loginbar">', '<input type="button" onclick="loginSubmit'] self.resume_tags = ['<div id="divResume"><style>', '简历编号'] self.login_success_tag = [] # self.cookie_fpath = cookie_fpath self.taskfpath = task_fpath self.inuse_taskfpath = '' # self.years=['7C1', '7C2', '7C3', '7C4', '7C5', '7C6', '7C7', '7C8'] self.gender = ['7C0', '7C1'] self.degree = ['7C5', '7C6', '7C7', '7C8'] # self.degree = ['7C5', '7C6'] self.area = ['7C040000', '7C030200', '7C010000', '7C020000'] # self.area = ['7C040000', '7C030200'] self.now_time = datetime.datetime.now() self.yes_time = self.now_time + datetime.timedelta(days=-2) self.seven_ago = (datetime.datetime.now() + datetime.timedelta(days=-7)).strftime('%Y%m%d') self.yester_time = self.yes_time.strftime('%Y-%m-%d') self.years_age_gender = [] self.area_degree = [] self.convert_dict = {'7C010000': '北京', '7C020000': '上海', '7C030200': '广州', '7C040000': '深圳', '7C5': '大专', '7C6': '本科', '7C7': '硕士', '7C8': 'MBA以上', '7C1%7C1': '在读学生', '7C2%7C2': '应届毕业生', '7C3%7C3': '1-2年', '7C4%7C4': '2-3年', '7C5%7C5': '3-4年', '7C6%7C6': '5-7年', '7C7%7C7': '8-9年', '7C8%7C8': '10年以上', '7C8%7C99': '10年以上'} # 用于记录执行号段任务的参数,起始/结束/当前 self.start_num = 0 self.end_num = 0 self.current_num = self.start_num self.maxsleeptime = 5 self.current_circle = 0 self.rp = Rdsreport() # 下面几个参数是用来选择账号的 self.time_period = 400 self.time_num = 150 # 这个跟上面的可以限制选择账号的时候的抓取频率 self.hour_num = 0 self.day_num = 0 self.switch_num = 30 """ :param time_period: 跟下面的额num 综合利用来限制频率的 :param time_num: 同上, 可以设置短时间 抓取的个数,限制频率,60秒 5 个那样子 :param hour_num: 限制一个小时的个数 :param day_num: 限制一天的个数 :return: 是否有可用账号,没有可用账号就循环等待,发 """ # init other log with open(json_config_path) as f: ff = f.read() logger = logging.getLogger(__name__) log_dict = json.loads(ff) log_dict['handlers']['file']['filename'] = os.path.join(log_dir, 'job51search.log') logging.config.dictConfig(log_dict) logging.debug('hahahahha') self.other_post_count = 3 self.dynamic = 0
def __init__(self,cookie_fpath='',task_fpath=''): BaseFetch.__init__(self) if os.path.exists(cookie_fpath): self.load_cookie(cookie_fpath) else: logging.debug('cookie file %s not exit.' % cookie_fpath) exit() self.account = libaccount.Manage(source='51job', option='down') self.host=r'ehire.51job.com' self.domain='51job.com' self.module_name='51job' self.init_path() self .login_wait=300 self.ctmname='' self.username='' self.password='' self.refer='' self.headers={ 'Host':self.host, 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags=['<td colspan="2" class="loginbar">', '<input type="button" onclick="loginSubmit'] self.resume_tags=['<div id="divResume"><style>','简历编号'] self.login_success_tag=[] self.cookie_fpath=cookie_fpath self.taskfpath=task_fpath self.inuse_taskfpath='' #用于记录执行号段任务的参数,起始/结束/当前 self.start_num=0 self.end_num=0 self.current_num=self.start_num self.maxsleeptime = 11 self.switch_num = 300 self.rp = Rdsreport() # 下面几个参数是用来选择账号的 self.time_period = 400 self.time_num = 150 # 这个跟上面的可以限制选择账号的时候的抓取频率 self.hour_num = 0 self.day_num = 0 self.switch_num = 30 self.error_username = ['spxx373', 'spxx336', 'huasheng123', u'北京事业部2', u'北京事业部3', u'广州事业部1', u'深圳事业部2'] # 拼接id方式下载失效的帐号 self.rp = Rdsreport() self.task_name = '' logger = logging.getLogger(__name__) with open(common.json_config_path) as f: ff = f.read() log_dict = json.loads(ff) log_dict['handlers']['file']['filename'] = os.path.join(log_dir, 'job51_id_fetch.log') logging.config.dictConfig(log_dict) logging.debug('hahahahha')
def __init__(self, systemId, baseURL): BaseFetch.__init__(self) self.baseURL = baseURL self.systemId = systemId self.rhnComm = RHNComm(baseURL, self.systemId)
def __init__(self, cookie_fpath='', task_fpath=''): BaseFetch.__init__(self) if os.path.exists(cookie_fpath): self.load_cookie(cookie_fpath) else: logging.debug('cookie file %s not exit.' % cookie_fpath) exit() self.account = libaccount.Manage(source='51job', option='down') self.host = r'ehire.51job.com' self.domain = '51job.com' self.module_name = '51job' self.init_path() self.login_wait = 300 self.ctmname = '' self.username = '' self.password = '' self.refer = '' self.headers = { 'Host': self.host, 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags = [ '<td colspan="2" class="loginbar">', '<input type="button" onclick="loginSubmit' ] self.resume_tags = ['<div id="divResume"><style>', '简历编号'] self.login_success_tag = [] self.cookie_fpath = cookie_fpath self.taskfpath = task_fpath self.inuse_taskfpath = '' #用于记录执行号段任务的参数,起始/结束/当前 self.start_num = 0 self.end_num = 0 self.current_num = self.start_num self.maxsleeptime = 11 self.switch_num = 300 self.rp = Rdsreport() # 下面几个参数是用来选择账号的 self.time_period = 400 self.time_num = 150 # 这个跟上面的可以限制选择账号的时候的抓取频率 self.hour_num = 0 self.day_num = 0 self.switch_num = 30 self.error_username = [ 'spxx373', 'spxx336', 'huasheng123', u'北京事业部2', u'北京事业部3', u'广州事业部1', u'深圳事业部2' ] # 拼接id方式下载失效的帐号 self.rp = Rdsreport() self.task_name = '' logger = logging.getLogger(__name__) with open(common.json_config_path) as f: ff = f.read() log_dict = json.loads(ff) log_dict['handlers']['file']['filename'] = os.path.join( log_dir, 'job51_id_fetch.log') logging.config.dictConfig(log_dict) logging.debug('hahahahha')
def __init__(self, position='', id_number='', adviser_user=''): BaseFetch.__init__(self) # 确定是否是调试模式 ### DEBUG 最后一个参数标示 debug if len(sys.argv) >= 5: if sys.argv[4] == "debug": self.debug = True else: self.debug = False else: self.debug = False # 选取合适的 cookie 文件 if position == 'gz': ppp = '广州' elif position == 'sz': ppp = '深圳' elif position == 'bj': ppp = '北京' elif position == 'hz': ppp = '杭州' elif position == 'sh': ppp = '上海' else: ppp = '%' self.ctmname = '' self.username = '' self.password = '' self.rp = Rdsreport( ) # 将这个放在前面,避免 redispipe 初始化的时候,将logger 的保存位置改到别的地方 acc = libaccount.Manage(source='51job', option='buy', location=ppp) # init other log with open(json_config_path) as f: ff = f.read() logger = logging.getLogger(__name__) log_dict = json.loads(ff) log_dict['loggers'][""]['handlers'] = [ "file", "stream", "buy", "error" ] logging.config.dictConfig(log_dict) logging.debug('hahahahha') self.host = r'ehire.51job.com' self.domain = '51job.com' self.module_name = '51jobdown' self.init_path() self.login_wait = 300 self.refer = '' self.headers = { 'Host': self.host, 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags = [ '<td colspan="2" class="loginbar">', '<input type="button" onclick="loginSubmit' ] self.resume_tags = ['<div id="divResume"><style>', '简历编号', '简历信息'] self.login_success_tag = [] self.adviser_user = adviser_user self.id_number = id_number self.position = position self.inuse_taskfpath = '' #用于记录执行号段任务的参数,起始/结束/当前 self.start_num = 0 self.end_num = 0 self.current_num = self.start_num self.maxsleeptime = 5 self.logger = common.log_init(__name__, '51buy2.log') username1 = acc.uni_user() self.logger.info('select username is {}'.format(username1)) self.has_cookie = True if username1: self.username = username1 logging.info('cjol buy select username is {}'.format( self.username)) self.headers['Cookie'] = acc.redis_ck_get(self.username) else: logging.error('no avail login cookie for 51down') self.send_mails('Warining, no account for 51down', 'no avail login cookie for 51down') print '没有已经登陆的 51job cookie文件' self.has_cookie = False # quit() # 这里不退出,在runwork那里才 return something print 'id num is {}'.format(id_number) print 'position is {}'.format(position) logging.info('trying to buy id {}, position is {}'.format( self.id_number, self.position))
def __init__(self, position = '', id_number=''): BaseFetch.__init__(self) # 确定是否是调试模式 ### DEBUG 最后一个参数标示 debug if len(sys.argv) >= 5: if sys.argv[4] == "debug": self.debug = True else: self.debug = False else: self.debug = False # if dbug ==1: # self.debug = True # else: # self.debug = False # 选取合适的 cookie 文件 self.ctmname='' self.username='' self.password='' self.headers={ 'Host':self.host, 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', } acc = libaccount.Manage(source='cjol', option='buy') # init other log with open(json_config_path) as f: ff = f.read() logger = logging.getLogger(__name__) log_dict = json.loads(ff) log_dict['handlers']['file']['filename'] = os.path.join(log_dir, 'cjolbuy.log') logging.config.dictConfig(log_dict) logging.debug('hahahahha') username1 = acc.uni_user() if username1: self.username = username1 logging.info('cjol buy select username is {}'.format(self.username)) self.headers['Cookie'] = acc.redis_ck_get(self.username) else: logging.error('no avail login cookie for cjol') # print '没有已经登陆的 cjol cookie文件' quit() print 'id num is {}'.format(id_number) print 'position is {}'.format(position) logging.info('trying to buy id {}'.format(self.id_number)) self.id_number=id_number self.position = position self.host=r'rms.cjol.com' self.domain='cjol.com' self.module_name='cjoldown' self.init_path() self .login_wait=300 self.refer='' self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags=['<span id="valUserName" style="color:Red;visibility:hidden;">请输入用户名</span>', '<input id="LoginName" name="UserID" type="text" value="" placeholder="请输入用户名" />'] self.resume_tags=['基本信息','简历编号'] self.login_success_tag=[] # self.cookie_fpath=cookie_fpath #用于记录执行号段任务的参数,起始/结束/当前 self.start_num=0 self.end_num=0 self.current_num=self.start_num self.maxsleeptime=2
def __init__(self, position='', id_number=''): BaseFetch.__init__(self) # 确定是否是调试模式 ### DEBUG 最后一个参数标示 debug if len(sys.argv) >= 5: if sys.argv[4] == "debug": self.debug = True else: self.debug = False else: self.debug = False # if dbug ==1: # self.debug = True # else: # self.debug = False # 选取合适的 cookie 文件 self.ctmname = '' self.username = '' self.password = '' self.headers = { 'Host': self.host, 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', } acc = libaccount.Manage(source='cjol', option='buy') # init other log with open(json_config_path) as f: ff = f.read() logger = logging.getLogger(__name__) log_dict = json.loads(ff) log_dict['handlers']['file']['filename'] = os.path.join( log_dir, 'cjolbuy.log') logging.config.dictConfig(log_dict) logging.debug('hahahahha') username1 = acc.uni_user() if username1: self.username = username1 logging.info('cjol buy select username is {}'.format( self.username)) self.headers['Cookie'] = acc.redis_ck_get(self.username) else: logging.error('no avail login cookie for cjol') # print '没有已经登陆的 cjol cookie文件' quit() print 'id num is {}'.format(id_number) print 'position is {}'.format(position) logging.info('trying to buy id {}'.format(self.id_number)) self.id_number = id_number self.position = position self.host = r'rms.cjol.com' self.domain = 'cjol.com' self.module_name = 'cjoldown' self.init_path() self.login_wait = 300 self.refer = '' self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags = [ '<span id="valUserName" style="color:Red;visibility:hidden;">请输入用户名</span>', '<input id="LoginName" name="UserID" type="text" value="" placeholder="请输入用户名" />' ] self.resume_tags = ['基本信息', '简历编号'] self.login_success_tag = [] # self.cookie_fpath=cookie_fpath #用于记录执行号段任务的参数,起始/结束/当前 self.start_num = 0 self.end_num = 0 self.current_num = self.start_num self.maxsleeptime = 2
def __init__(self,cookie_fpath='',task_fpath=''): BaseFetch.__init__(self) # if os.path.exists(cookie_fpath): # self.load_cookie(cookie_fpath) # else: # logging.debug('cookie file %s not exit.' % cookie_fpath) # exit() self.account = libaccount.Manage(source='zhilian', option='down') self.host=r'rd.zhaopin.com' self.domain='zhaopin.com' self.module_name='zhilian' self.init_path() self .login_wait=300 self.ctmname='' self.username='' self.password='' self.refer='' self.headers={ 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Origin': 'http://rdsearch.zhaopin.com', 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'Referer':'http://rdsearch.zhaopin.com/Home/ResultForCustom?SF_1_1_1=java&SF_1_1_4=2%2C99&SF_1_1_18=765&orderBy=DATE_MODIFIED,1&pageSize=60&SF_1_1_27=0&exclude=1', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags=['name="login"', '<input id="LoginName" name="UserID" type="text" value="" placeholder="请输入用户名" />'] self.resume_tags=['个人信息', '求职意向'] self.login_success_tag=[] self.cookie_fpath=cookie_fpath self.taskfpath=task_fpath self.inuse_taskfpath='' #用于记录执行号段任务的参数,起始/结束/当前 self.start_num=0 self.end_num=0 self.current_num=self.start_num self.maxsleeptime = 6 self.circle_count = 0 # self.skill_list = ['php','c%2B%2B','javascript','html5','%E5%AE%89%E5%8D%93','android','ios','java','%E8%AE%BE%E8%AE%A1','%E4%BA%A7%E5%93%81','%E8%81%8C%E8%83%BD','%E5%B8%82%E5%9C%BA'] self.area_list=['530','538','763','765'] # self.area_list=['530','538','763'] # self.years=['1%2C1','2%2C2','3%2C3','4%2C4','5%2C5','6%2C6','7%27','8%2C8','9%2C9','10%2C99'] # self.years=['1%2C1','2%2C2'] self.now_time = datetime.datetime.now() self.yes_time = self.now_time + datetime.timedelta(days=-3) self.yester_time = self.yes_time.strftime('%Y-%m-%d').replace('20','') self.rp = Rdsreport() # 下面几个参数是用来选择账号的 self.time_period = 400 self.time_num = 150 # 这个跟上面的可以限制选择账号的时候的抓取频率 self.hour_num = 0 self.day_num = 0 self.switch_num = 30 """ :param time_period: 跟下面的额num 综合利用来限制频率的 :param time_num: 同上, 可以设置短时间 抓取的个数,限制频率,1分钟 5 个那样子 :param hour_num: 限制一个小时的个数 :param day_num: 限制一天的个数 :return: 是否有可用账号,没有可用账号就循环等待,发 """ # init other log with open(json_config_path) as f: ff = f.read() logger = logging.getLogger(__name__) log_dict = json.loads(ff) log_dict['handlers']['file']['filename'] = os.path.join(log_dir, 'zhiliansearch.log') logging.config.dictConfig(log_dict) logging.debug('hahahahha') self.address = ''
def __init__(self, position='', id_number='', adviser_user=''): BaseFetch.__init__(self) # 确定是否是调试模式 ### DEBUG 最后一个参数标示 debug if len(sys.argv) >= 5: if sys.argv[4] == "debug": self.debug = True else: self.debug = False else: self.debug = False # 选取合适的 cookie 文件 if position == 'gz': ppp = '广州' elif position == 'sz': ppp = '深圳' elif position == 'bj': ppp = '北京' elif position == 'hz': ppp = '杭州' elif position == 'sh': ppp = '上海' else: ppp = '%' self.ctmname = '' self.username = '' self.password = '' self.rp = Rdsreport( ) # 将这个放在前面,避免 redispipe 初始化的时候,将logger 的保存位置改到别的地方 acc = libaccount.Manage(source='zhilian', option='buy', location=ppp) # init other log with open(json_config_path) as f: ff = f.read() logger = logging.getLogger(__name__) log_dict = json.loads(ff) log_dict['loggers'][""]['handlers'] = [ "file", "stream", "buy", "error" ] logging.config.dictConfig(log_dict) logging.debug('hahahahha') self.adviser_user = adviser_user self.id_number = id_number self.position = position self.host = r'rd.zhaopin.com' self.domain = 'zhaopin.com' self.module_name = 'zhiliandown' self.init_path() self.login_wait = 300 self.refer = '' self.headers = { 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Origin': 'http://rdsearch.zhaopin.com', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'Referer': 'http://rdsearch.zhaopin.com/Home/ResultForCustom?SF_1_1_1=java&SF_1_1_4=2%2C99&SF_1_1_18=765&orderBy=DATE_MODIFIED,1&pageSize=60&SF_1_1_27=0&exclude=1', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags = [ 'name="login"', '<input id="LoginName" name="UserID" type="text" value="" placeholder="请输入用户名" />' ] self.resume_tags = ['个人信息', '求职意向'] self.login_success_tag = [] # self.cookie_fpath=cookie_fpath self.inuse_taskfpath = '' #用于记录执行号段任务的参数,起始/结束/当前 self.maxsleeptime = 4 self.area_list = ['530', '538', '763', '765'] self.now_time = datetime.datetime.now() self.yes_time = self.now_time + datetime.timedelta(days=-3) self.yester_time = self.yes_time.strftime('%Y-%m-%d').replace('20', '') self.logger = common.log_init(__name__, 'zlbuy2.log') username1 = acc.uni_user() self.logger.info('get buy username is {}'.format(username1)) self.has_cookie = True if username1: self.username = username1 logging.info('zhilian buy select username is {}'.format( self.username)) self.logger.info('zhilian buy select username is {}'.format( self.username)) self.headers['Cookie'] = acc.redis_ck_get(self.username) else: logging.error('no avail login cookie for zldown') self.logger.error('no avail login cookie for zldown') self.send_mails('Warining, no account for zldown', 'no avail login cookie for zldown') print '没有已经登陆的 zldown cookie文件' self.has_cookie = False # quit() # 这里不退出,在runwork那里才 return something print 'id num is {}'.format(id_number) print 'position is {}'.format(position) logging.info('trying to buy id {}, position is {}'.format( self.id_number, self.position)) self.logger.info('trying to buy id {}, position is {}'.format( self.id_number, self.position))
def __init__(self, position = '', id_number=''): BaseFetch.__init__(self) # 确定是否是调试模式 ### DEBUG 最后一个参数标示 debug if len(sys.argv) >= 5: if sys.argv[4] == "debug": self.debug = True else: self.debug = False else: self.debug = False # 选取合适的 cookie 文件 if position == 'gz': ppp = '广州' elif position == 'sz': ppp = '深圳' elif position == 'bj': ppp = '北京' elif position == 'hz': ppp = '杭州' elif position == 'sh': ppp = '上海' else: ppp = '%' self.ctmname='' self.username='' self.password='' acc = libaccount.Manage(source='zhilian', option='buy', location=ppp) # init other log with open(json_config_path) as f: ff = f.read() logger = logging.getLogger(__name__) log_dict = json.loads(ff) log_dict['handlers']['file']['filename'] = os.path.join(log_dir, 'zlbuy.log') logging.config.dictConfig(log_dict) logging.debug('hahahahha') self.id_number=id_number self.position = position self.host=r'rd.zhaopin.com' self.domain='zhaopin.com' self.module_name='zhilian' self.init_path() self .login_wait=300 self.refer='' self.headers={ 'User-Agent': 'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0', 'Origin': 'http://rdsearch.zhaopin.com', 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'Referer':'http://rdsearch.zhaopin.com/Home/ResultForCustom?SF_1_1_1=java&SF_1_1_4=2%2C99&SF_1_1_18=765&orderBy=DATE_MODIFIED,1&pageSize=60&SF_1_1_27=0&exclude=1', } self.login_type = 2 self.login_at = None self.logout_at = None self.need_login_tags=['name="login"', '<input id="LoginName" name="UserID" type="text" value="" placeholder="请输入用户名" />'] self.resume_tags=['个人信息', '求职意向'] self.login_success_tag=[] # self.cookie_fpath=cookie_fpath self.inuse_taskfpath='' #用于记录执行号段任务的参数,起始/结束/当前 self.maxsleeptime = 4 self.area_list=['530','538','763','765'] self.now_time = datetime.datetime.now() self.yes_time = self.now_time + datetime.timedelta(days=-3) self.yester_time = self.yes_time.strftime('%Y-%m-%d').replace('20','') username1 = acc.uni_user() if username1: self.username = username1 print self.username logging.info('cjol buy select username is {}'.format(self.username)) self.headers['Cookie'] = acc.redis_ck_get(self.username) else: logging.error('no avail login cookie for cjol') # print '没有已经登陆的 cjol cookie文件' quit() print 'id num is {}'.format(id_number) print 'position is {}'.format(position) logging.info('trying to buy id {}, position is {}'.format(self.id_number, self.position))