예제 #1
0
    def __init__(self, aa='', task_fpath=''):
        BaseFetch.__init__(self)

        self.account = libaccount.Manage(source='cjol', option='down')

        self.host = r'rms.cjol.com'
        self.domain = 'cjol.com'
        self.module_name = 'cjolsearch'
        self.init_path()
        self.login_wait = 300

        self.ctmname = ''
        self.username = ''
        self.ck_str = ''
        self.password = ''

        self.refer = ''
        self.headers = {
            'Host': self.host,
            'User-Agent':
            'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
        }

        self.login_type = 2
        self.login_at = None
        self.logout_at = None
        self.need_login_tags = [
            '<span id="valUserName" style="color:Red;visibility:hidden;">请输入用户名</span>',
            '<input id="LoginName" name="UserID" type="text" value="" placeholder="请输入用户名" />'
        ]
        self.resume_tags = ['基本信息', '简历编号']
        self.login_success_tag = []

        self.taskfpath = task_fpath
        self.inuse_taskfpath = ''

        #用于记录执行号段任务的参数,起始/结束/当前
        self.start_num = 0
        self.end_num = 0
        self.current_num = self.start_num
        self.maxsleeptime = 2
        self.rp = Rdsreport()
        # init other log
        with open(json_config_path) as f:
            ff = f.read()
        logger = logging.getLogger(__name__)
        log_dict = json.loads(ff)
        log_dict['handlers']['file']['filename'] = os.path.join(
            log_dir, 'cjolsearch.log')
        logging.config.dictConfig(log_dict)
        logging.debug('hahahahha')
        self.time_period = 400
        self.time_num = 150  # 这个跟上面的可以限制选择账号的时候的抓取频率
        self.hour_num = 0
        self.day_num = 0
        self.switch_num = 30
예제 #2
0
def mocookie():
	try:
		t0 = time.time()
		kwargs = {}
		source = request.args.get('source')
		option = request.args.get('option')
		city = request.args.get('city')
		cookie_str = request.args.get('cookie_str')
		username = request.args.get('username')
		if source not in ['51job', 'zhilian', 'cjol'] :
			message = json.dumps({'error': 'source is required'})
		else:
			# 选取合适的 cookie 文件
			position = city
			if position == 'gz':
				ppp = '广州'
			elif position == 'sz':
				ppp = '深圳'
			elif position == 'bj':
				ppp = '北京'
			elif position == 'hz':
				ppp = '杭州'
			elif position == 'sh':
				ppp = '上海'
			else:
				ppp = '%'
			ppp = '%'  #  忽略地区,插件默认带地区参数,只随机几个账户容易出问题。
			acc = libaccount.Manage(source=source, option='down', location=ppp)
			if option is None or option == 'get':
				redis_key_list = acc.uni_user()
				if len(redis_key_list) > 0:
					username1 = random.choice(redis_key_list)
					logging.info('mocookie source {} select username is {}'.format(source, username1))
					username2 = username1.split('_')[1]
					ck_str = acc.redis_ck_get(username2)
					message = {'status': 'ok', 'msg': 'get cookie success', 'username': username2, 'cookie_str': ck_str}
				else:
					message = {'status': 'error', 'msg': 'cannot find valid cookie for source {} and location {}'.format(source, city)}
			elif option == 'set' and cookie_str is not None and username is not None:
				acc.redis_ck_set(username, cookie_str)
				message = {'status': 'ok', 'msg': 'set username {} cookie success'.format(username)}
			else:
				message = {'status': 'error', 'msg': 'wrong argument'}
		t1 = str('%.2f' % (time.time() - t0)) + 's'
		logger.info('get set cookie use time %s =======>' % t1)
		return json.dumps(message)
	except Exception, e:
		print e, traceback.format_exc()
		return json.dumps({'status': 'error', 'msg': str(traceback.format_exc())})
		logger.error('buy resume error %s ------' % str(traceback.format_exc()))
예제 #3
0
def update_num():
    try:
        sql = """select grap_source, user_name from grapuser_info where account_type  = '购买账号'"""
        db = MySQLdb.connect(**sql_config)
        cursor = db.cursor()
        cursor.execute(sql)
        data = cursor.fetchall()
        for i in data:
            a = libaccount.Manage(i[0])
            ck_str = a.redis_ck_get(i[1])
            a.num_update(i[0], i[1], ck_str)
            time.sleep(3)
    except Exception as e:
        print Exception, e
예제 #4
0
    def __init__(self, position='', id_number='', adviser_user=''):
        BaseFetch.__init__(self)

        # 确定是否是调试模式
        ### DEBUG 最后一个参数标示 debug
        if len(sys.argv) >= 5:
            if sys.argv[4] == "debug":
                self.debug = True
            else:
                self.debug = False
        else:
            self.debug = False

        # 选取合适的 cookie 文件
        if position == 'gz':
            ppp = '广州'
        elif position == 'sz':
            ppp = '深圳'
        elif position == 'bj':
            ppp = '北京'
        elif position == 'hz':
            ppp = '杭州'
        elif position == 'sh':
            ppp = '上海'
        else:
            ppp = '%'
        self.ctmname = ''
        self.username = ''
        self.password = ''
        self.rp = Rdsreport(
        )  # 将这个放在前面,避免 redispipe 初始化的时候,将logger 的保存位置改到别的地方
        acc = libaccount.Manage(source='zhilian', option='buy', location=ppp)
        # init other log
        with open(json_config_path) as f:
            ff = f.read()
        logger = logging.getLogger(__name__)
        log_dict = json.loads(ff)
        log_dict['loggers'][""]['handlers'] = [
            "file", "stream", "buy", "error"
        ]
        logging.config.dictConfig(log_dict)
        logging.debug('hahahahha')

        self.adviser_user = adviser_user
        self.id_number = id_number
        self.position = position

        self.host = r'rd.zhaopin.com'
        self.domain = 'zhaopin.com'
        self.module_name = 'zhiliandown'
        self.init_path()
        self.login_wait = 300

        self.refer = ''
        self.headers = {
            'User-Agent':
            'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0',
            'Origin':
            'http://rdsearch.zhaopin.com',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language':
            'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
            'Referer':
            'http://rdsearch.zhaopin.com/Home/ResultForCustom?SF_1_1_1=java&SF_1_1_4=2%2C99&SF_1_1_18=765&orderBy=DATE_MODIFIED,1&pageSize=60&SF_1_1_27=0&exclude=1',
        }

        self.login_type = 2
        self.login_at = None
        self.logout_at = None
        self.need_login_tags = [
            'name="login"',
            '<input id="LoginName" name="UserID" type="text" value="" placeholder="请输入用户名" />'
        ]
        self.resume_tags = ['个人信息', '求职意向']
        self.login_success_tag = []

        # self.cookie_fpath=cookie_fpath
        self.inuse_taskfpath = ''

        #用于记录执行号段任务的参数,起始/结束/当前
        self.maxsleeptime = 4
        self.area_list = ['530', '538', '763', '765']
        self.now_time = datetime.datetime.now()
        self.yes_time = self.now_time + datetime.timedelta(days=-3)
        self.yester_time = self.yes_time.strftime('%Y-%m-%d').replace('20', '')
        self.logger = common.log_init(__name__, 'zlbuy2.log')
        username1 = acc.uni_user()
        self.logger.info('get buy username is {}'.format(username1))
        self.has_cookie = True
        if username1:
            self.username = username1
            logging.info('zhilian buy select username is {}'.format(
                self.username))
            self.logger.info('zhilian buy select username is {}'.format(
                self.username))
            self.headers['Cookie'] = acc.redis_ck_get(self.username)
        else:
            logging.error('no avail login cookie for zldown')
            self.logger.error('no avail login cookie for zldown')
            self.send_mails('Warining, no account for zldown',
                            'no avail login cookie for zldown')
            print '没有已经登陆的 zldown cookie文件'
            self.has_cookie = False
            # quit()  # 这里不退出,在runwork那里才 return something
        print 'id num is {}'.format(id_number)
        print 'position is {}'.format(position)
        logging.info('trying to buy id {}, position is {}'.format(
            self.id_number, self.position))
        self.logger.info('trying to buy id {}, position is {}'.format(
            self.id_number, self.position))
예제 #5
0
    def __init__(self, position='', id_number=''):
        BaseFetch.__init__(self)

        # 确定是否是调试模式
        ### DEBUG 最后一个参数标示 debug
        if len(sys.argv) >= 5:
            if sys.argv[4] == "debug":
                self.debug = True
            else:
                self.debug = False
        else:
            self.debug = False
        # if dbug ==1:
        #     self.debug = True
        # else:
        #     self.debug = False
        # 选取合适的 cookie 文件
        self.ctmname = ''
        self.username = ''
        self.password = ''
        self.headers = {
            'Host': self.host,
            'User-Agent':
            'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
        }
        acc = libaccount.Manage(source='cjol', option='buy')
        # init other log
        with open(json_config_path) as f:
            ff = f.read()
        logger = logging.getLogger(__name__)
        log_dict = json.loads(ff)
        log_dict['handlers']['file']['filename'] = os.path.join(
            log_dir, 'cjolbuy.log')
        logging.config.dictConfig(log_dict)
        logging.debug('hahahahha')

        username1 = acc.uni_user()
        if username1:
            self.username = username1
            logging.info('cjol buy select username is {}'.format(
                self.username))
            self.headers['Cookie'] = acc.redis_ck_get(self.username)
        else:
            logging.error('no avail login cookie for cjol')
            # print '没有已经登陆的 cjol cookie文件'
            quit()
        print 'id num is {}'.format(id_number)
        print 'position is {}'.format(position)
        logging.info('trying to buy id {}'.format(self.id_number))

        self.id_number = id_number
        self.position = position
        self.host = r'rms.cjol.com'
        self.domain = 'cjol.com'
        self.module_name = 'cjoldown'
        self.init_path()
        self.login_wait = 300
        self.refer = ''

        self.login_type = 2
        self.login_at = None
        self.logout_at = None
        self.need_login_tags = [
            '<span id="valUserName" style="color:Red;visibility:hidden;">请输入用户名</span>',
            '<input id="LoginName" name="UserID" type="text" value="" placeholder="请输入用户名" />'
        ]
        self.resume_tags = ['基本信息', '简历编号']
        self.login_success_tag = []

        # self.cookie_fpath=cookie_fpath

        #用于记录执行号段任务的参数,起始/结束/当前
        self.start_num = 0
        self.end_num = 0
        self.current_num = self.start_num
        self.maxsleeptime = 2
예제 #6
0
    def __init__(self, cookie_fpath='', task_fpath=''):
        BaseFetch.__init__(self)
        if os.path.exists(cookie_fpath):
            self.load_cookie(cookie_fpath)
        else:
            logging.debug('cookie file %s not exit.' % cookie_fpath)
            exit()

        self.account = libaccount.Manage(source='51job', option='down')
        self.host = r'ehire.51job.com'
        self.domain = '51job.com'
        self.module_name = '51job'
        self.init_path()
        self.login_wait = 300

        self.ctmname = ''
        self.username = ''
        self.password = ''

        self.refer = ''
        self.headers = {
            'Host': self.host,
            'User-Agent':
            'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
        }

        self.login_type = 2
        self.login_at = None
        self.logout_at = None
        self.need_login_tags = [
            '<td colspan="2" class="loginbar">',
            '<input type="button" onclick="loginSubmit'
        ]

        self.resume_tags = ['<div id="divResume"><style>', '简历编号']
        self.login_success_tag = []

        self.cookie_fpath = cookie_fpath
        self.taskfpath = task_fpath
        self.inuse_taskfpath = ''

        #用于记录执行号段任务的参数,起始/结束/当前
        self.start_num = 0
        self.end_num = 0
        self.current_num = self.start_num
        self.maxsleeptime = 11
        self.switch_num = 300
        self.rp = Rdsreport()
        # 下面几个参数是用来选择账号的
        self.time_period = 400
        self.time_num = 150  # 这个跟上面的可以限制选择账号的时候的抓取频率
        self.hour_num = 0
        self.day_num = 0
        self.switch_num = 30
        self.error_username = [
            'spxx373', 'spxx336', 'huasheng123', u'北京事业部2', u'北京事业部3',
            u'广州事业部1', u'深圳事业部2'
        ]  # 拼接id方式下载失效的帐号
        self.rp = Rdsreport()
        self.task_name = ''
        logger = logging.getLogger(__name__)
        with open(common.json_config_path) as f:
            ff = f.read()
        log_dict = json.loads(ff)
        log_dict['handlers']['file']['filename'] = os.path.join(
            log_dir, 'job51_id_fetch.log')
        logging.config.dictConfig(log_dict)
        logging.debug('hahahahha')
예제 #7
0
    def __init__(self, position='', id_number='', adviser_user=''):
        BaseFetch.__init__(self)

        # 确定是否是调试模式
        ### DEBUG 最后一个参数标示 debug
        if len(sys.argv) >= 5:
            if sys.argv[4] == "debug":
                self.debug = True
            else:
                self.debug = False
        else:
            self.debug = False

        # 选取合适的 cookie 文件
        if position == 'gz':
            ppp = '广州'
        elif position == 'sz':
            ppp = '深圳'
        elif position == 'bj':
            ppp = '北京'
        elif position == 'hz':
            ppp = '杭州'
        elif position == 'sh':
            ppp = '上海'
        else:
            ppp = '%'

        self.ctmname = ''
        self.username = ''
        self.password = ''
        self.rp = Rdsreport(
        )  # 将这个放在前面,避免 redispipe 初始化的时候,将logger 的保存位置改到别的地方
        acc = libaccount.Manage(source='51job', option='buy', location=ppp)
        # init other log
        with open(json_config_path) as f:
            ff = f.read()
        logger = logging.getLogger(__name__)
        log_dict = json.loads(ff)
        log_dict['loggers'][""]['handlers'] = [
            "file", "stream", "buy", "error"
        ]
        logging.config.dictConfig(log_dict)
        logging.debug('hahahahha')

        self.host = r'ehire.51job.com'
        self.domain = '51job.com'
        self.module_name = '51jobdown'
        self.init_path()
        self.login_wait = 300

        self.refer = ''
        self.headers = {
            'Host': self.host,
            'User-Agent':
            'Mozilla/5.0 (Ubuntu; X11; Linux i686; rv:8.0) Gecko/20100101 Firefox/8.0',
            'Accept':
            'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
            'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
        }

        self.login_type = 2
        self.login_at = None
        self.logout_at = None
        self.need_login_tags = [
            '<td colspan="2" class="loginbar">',
            '<input type="button" onclick="loginSubmit'
        ]

        self.resume_tags = ['<div id="divResume"><style>', '简历编号', '简历信息']
        self.login_success_tag = []

        self.adviser_user = adviser_user
        self.id_number = id_number
        self.position = position
        self.inuse_taskfpath = ''

        #用于记录执行号段任务的参数,起始/结束/当前
        self.start_num = 0
        self.end_num = 0
        self.current_num = self.start_num
        self.maxsleeptime = 5
        self.logger = common.log_init(__name__, '51buy2.log')
        username1 = acc.uni_user()
        self.logger.info('select username is {}'.format(username1))
        self.has_cookie = True
        if username1:
            self.username = username1
            logging.info('cjol buy select username is {}'.format(
                self.username))
            self.headers['Cookie'] = acc.redis_ck_get(self.username)
        else:
            logging.error('no avail login cookie for 51down')
            self.send_mails('Warining, no account for 51down',
                            'no avail login cookie for 51down')
            print '没有已经登陆的 51job cookie文件'
            self.has_cookie = False
            # quit()  # 这里不退出,在runwork那里才 return something
        print 'id num is {}'.format(id_number)
        print 'position is {}'.format(position)
        logging.info('trying to buy id {}, position is {}'.format(
            self.id_number, self.position))
예제 #8
0
from bs4 import BeautifulSoup
import libaccount

def p_key(source):
    pre_key = ''
    if '51' in source:
        pre_key = 'cookie51_'
    elif 'cjol' in source:
        pre_key = 'cookiecjol_'
    elif 'zhilian' in source:
        pre_key = 'cookiezl_'
    else:
        print 'no valid source'
    return pre_key

if __name__ == '__main__':
    source = raw_input('please input your source:\n')
    a = libaccount.Manage(source=source, option='down')
    while 1:
        username = raw_input('input username:\n')
        ck_str = raw_input('input cookie string:\n')
        print ck_str
        print type(ck_str)
        if len(ck_str) > 0:
            # username = p_key(source) + username
            a.redis_ck_set(username, ck_str)
        else:
            print 'ck_str not right, please retry'
        print '\n' * 2

예제 #9
0
cookie_cjol = [n for n in cookie_file_list if n.startswith('cjol')]
cookie_zl = [
    n for n in cookie_file_list if n not in cookie_51 if n not in cookie_cjol
]

for c_51 in cookie_51:
    fpath = os.path.join(cookie_dir, c_51)
    # print fpath, 9999999999
    with open(fpath) as f:
        ff = f.read()
    # print ff
    ck_dict = urlparse.parse_qs(ff)
    # print ck_dict
    if ck_dict.keys().count('UserName') == 1:
        username = ck_dict['UserName'][0]
        a = libaccount.Manage(source='51job')
        a.redis_ck_set(username, ff)

for c_cjol in cookie_cjol:
    fpath = os.path.join(cookie_dir, c_cjol)
    # print fpath
    with open(fpath) as f:
        ff = f.read()
    ck_dict = urlparse.parse_qs(ff)
    if ck_dict.keys().count(' CompanyID') == 1:
        company_id = ck_dict[' CompanyID'][0]
        if str(company_id) == '317080':
            username = '******'
        if str(company_id) == '308380':
            username = '******'
        a = libaccount.Manage(source='cjol')