Ejemplo n.º 1
0
def fetch_contact(search_data, resume_id, username, password, proxies=None):
    logger.info("start fetch contact with search_data: %s, \nresume_id: %s" % (search_data, resume_id))
    user_agent = nautil.user_agent()
    logger.info("登录中......")
    is_login, session = Login(username, password, user_agent, proxies=proxies).login()
    if not is_login:
        return {"err_code": 101, "err_msg": session}
    ids = []
    flag_7002 = False
    for resume in GetResume(session, resume_id, search_data, user_agent, proxies=proxies).get_resume_by_keywords():
        # print resume["err_msg"]
        upload_resume = json.dumps(resume["err_msg"], ensure_ascii=False)
        if resume["err_code"] == 7002:
            # print json.dumps(resume["err_msg"], ensure_ascii=False)
            res = upload(upload_resume, "yifeng", get_contact=True, logger_in=logger)
            ids.append(res["resume_id"])
            flag_7002 = True
        elif resume["err_code"] == 0:
            # print json.dumps(resume["err_msg"], ensure_ascii=False)
            return upload(upload_resume, "yifeng", get_contact=True, logger_in=logger)
        else:
            return resume
    if flag_7002:
        resume_ids = " ".join(ids)
        return {"err_code": 7002, "err_msg": "找到了%s个简历, ids: %s" %(len(ids), resume_ids)}
    if False:
        resume = GetResume(session, resume_id, search_data, user_agent, proxies=proxies).get_resume_by_id()
Ejemplo n.º 2
0
def fetch_contact_impl(search_data,
                       resume_id,
                       user_name,
                       passwd,
                       proxies=None,
                       logger_name=None):
    logger.info("登录中.....")
    is_login, session = LoginJianLiKa(user_name, passwd,
                                      proxies=proxies).login()
    if not is_login:
        return session
    resume = __get_reusme_by_id(session, resume_id, proxies=proxies)
    if resume["err_code"] == 0:
        return upload(resume["err_msg"],
                      "jianlika",
                      get_contact=True,
                      logger_in=logger)
    if False:
        #(备用,使用关键字加上公司名称获取联系方式)
        resume = __get_resume_by_keywords(session,
                                          search_data,
                                          resume_id,
                                          proxies=proxies)
        if resume["err_code"] == 0:
            # print resume["err_msg"]
            return upload(resume["err_msg"],
                          "jianlika",
                          get_contact=True,
                          logger_in=logger)
        else:
            return resume
Ejemplo n.º 3
0
def fetch_contact_impl(search_data,
                       resume_id,
                       username,
                       password,
                       user_agent,
                       proxies=None):
    logger.info("登录中......")
    is_login, session = Login(username, password, user_agent,
                              proxies=proxies).login()
    if not is_login:
        return {"err_code": 101, "err_msg": session}
    resume = GetResume(session,
                       resume_id,
                       search_data,
                       user_agent,
                       proxies=proxies).get_resume_by_id()
    if False:
        resume = GetResume(session,
                           resume_id,
                           search_data,
                           user_agent,
                           proxies=proxies).get_resume_by_keywords()
    # print resume["err_msg"]
    if resume["err_code"] == 0:
        return upload(resume["err_msg"],
                      "lie8",
                      get_contact=True,
                      logger_in=logger)
Ejemplo n.º 4
0
def __fetch_contact(session, resume_id, proxies):
    assert isinstance(resume_id, (str, unicode))
    encrypt_resume_id = base64.b64encode(str(int(resume_id)))
    user_agent = nautil.user_agent()
    search_headers = {
        "Accept": "application/json, text/javascript, */*; q=0.01",
        "User-Agent": user_agent,
        "Host": "www.fenjianli.com",
        "Origin": "http://www.fenjianli.com",
        "Connection": "keep-alive",
        "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
        "X-Requested-With": "XMLHttpRequest",
        "Referer": "http://www.fenjianli.com/search/detail.htm?ids=%s" %
        encrypt_resume_id,
        "X-Requested-With": "XMLHttpRequest"
    }
    logger.info(
        'fetching resume detail >> http://www.fenjianli.com/search/detail.htm?ids=%s'
        % encrypt_resume_id)

    r = session.post('http://www.fenjianli.com/search/getDetail.htm',
                     headers=search_headers,
                     proxies=proxies,
                     data={
                         'id': resume_id,
                         '_random': random.random()
                     })
    assert r.status_code == 200, r.status_code
    data = json.loads(r.text)
    assert 'originalFilePath' in data
    logger.info('fetching path %s' % data['originalFilePath'])

    raw_headers = {
        'Accept':
        'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate, sdch',
        'Accept-Language': 'zh-CN,zh;q=0.8',
        'Cache-Control': 'max-age=0',
        'Connection': 'keep-alive',
        "User-Agent": user_agent,
        # "Host": "demo.fenjianli.com:9344",
        'Upgrade-Insecure-Requests': '1',
    }
    r = session.get(data['originalFilePath'],
                    headers=raw_headers,
                    proxies=proxies)
    assert r.status_code == 200, '%s\n%s' % (r.status_code, r.content)
    return upload(r.content, 'fjl', get_contact=True, fjl_id=resume_id)
Ejemplo n.º 5
0
def fetch_contact_impl(search_data,
                       resume_id,
                       user_name,
                       passwd,
                       proxies=None,
                       logger_name=None):
    if logger_name:
        global logger
        logger = logging.getLogger(logger_name)
    result, session = login(user_name, passwd, proxies=proxies, logger=logger)
    if not result:
        return session
    context = __get_resumes_by_keywords(session, search_data, resume_id)
    if context["err_code"] == 101:
        logger.error("__get_resumes_by_keywords return None")
        return context
    elif context["err_code"] == 001:
        logger.info("简历ID为智联老ID,但是只搜索到一个简历,下载中.....")
        for _resume in __get_resume_url(session, context["err_msg"],
                                        resume_id):
            resume = _resume
            if resume["err_code"] == 0:
                return upload(resume["err_msg"],
                              "zhaopin",
                              get_contact=True,
                              logger_in=logger)
            else:
                return resume
    elif context["err_code"] == 002:
        resume_total_num = pq(context["err_msg"]).find(
            "div.rd-resumelist-span").find("span").text()
        logger.info("简历ID为智联老ID,搜索到简历%s封,下载中....." % resume_total_num)
        ids = []
        for _resume in __get_resume_url(session,
                                        context["err_msg"],
                                        resume_id,
                                        flag_7002=True):
            resume = _resume
            if resume["err_code"] == 0:
                res = upload(resume["err_msg"],
                             "zhaopin",
                             get_contact=True,
                             logger_in=logger)
                _id = res["resume_id"]
                ids.append(_id)
        resume_ids = " ".join(ids)
        return {
            "err_code": 7002,
            "err_msg": "找到了%s个简历, ids: %s" % (len(ids), resume_ids)
        }
    else:
        logger.info("搜索简历%s成功....." % resume_id)
        for _resume in __get_resume_url(session, context["err_msg"],
                                        resume_id):
            resume = _resume
            if resume["err_code"] == 0:
                return upload(resume["err_msg"],
                              "zhaopin",
                              get_contact=True,
                              logger_in=logger)
            else:
                return resume
Ejemplo n.º 6
0
def fetch_contact_impl(args,
                       user_name,
                       passwd,
                       proxies=None,
                       logger_name=None):
    if logger_name:
        global logger
        logger = _logging.getLogger(logger_name)
    _timeout = 30
    _result, _session = login(user_name, passwd, proxies=proxies)
    if not _result:
        return _session
    result, url = search_resume(_session, args)
    if result["err_code"] != 0:
        return result
    mobile = pq(result["err_msg"]).find("#mobile").text()
    email = pq(result["err_msg"]).find("#email").text()
    if mobile != '**********' and email != '**********':
        logger.info("联系方式已存在,开始上传简历.....")
        return upload(result["err_msg"],
                      "zhuopin",
                      get_contact=True,
                      logger_in=logger)
    job_id = __get_jobid(_session, proxies=proxies)
    if job_id["err_code"] != 0:
        return job_id
    if "collectresumedownloadbtn" in result["err_msg"]:
        logger.info("获取联系方式.....")
        post_headers = {
            "Accept":
            "*/*",
            "Accept-Encoding":
            "gzip, deflate, br",
            "Accept-Language":
            "zh-CN,zh;q=0.8",
            "Connection":
            "keep-alive",
            "Origin":
            "http://h.highpin.cn",
            "Referer":
            url,
            "User-Agent":
            "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36",
        }
        seekerUser_id = re.search(r"seekerUserID=(\d+)&resumeID",
                                  url).groups()[0]
        resumeid = re.search(r"resumeID=(\d+)&", url).groups()[0]
        post_data = {
            "seekerUserID": seekerUser_id,
            "resumeID": resumeid,
            "jobID": job_id["err_msg"],
        }

        try:
            resume_response = _session.post(
                "http://h.highpin.cn/ResumeManage/DownLoadResume",
                data=post_data,
                headers=post_headers,
                timeout=_timeout,
                proxies=proxies)
            assert resume_response
            assert resume_response.status_code == 200
        except Exception, e:
            logger.warning("获取简历ID:%s, 联系方式失败:\n%s" % (re))
            return {"err_code": 20019, "err_msg": "获取简历联系方式失败!"}
        resume_response.encoding = "utf-8"
        if "简历下载已成功,您的职位信息已同时发给该候选人" not in resume_response.text:
            logger.warning("获取简历ID%s\n失败,\n%s")
        time.sleep(random.uniform(1, 3))
        resume = download_resume(_session, url, proxies=proxies)
        logger.info("获取联系方式成功,上传简历.....")
        return upload(resume["err_msg"],
                      "zhuopin",
                      get_contact=True,
                      logger_in=logger)
Ejemplo n.º 7
0
def __fetch_contact(session,
                    resume_id,
                    user_name,
                    user_password,
                    proxies=None):
    user_agent = nautil.user_agent()
    proxies = None

    def __session(method, url, headers={}, data=None):
        logger.info('------\nRequesting %s On %s With Data:\n%s\n------' %
                    (method, url, data))
        # time.sleep(random.uniform(4, 15))
        time.sleep(random.uniform(1, 2))

        assert method in ('get', 'post')
        request_headers = {
            "User-Agent": user_agent,
        }
        for k, v in headers.iteritems():
            request_headers[k] = v

        if method == 'get':
            response = session.get(url,
                                   headers=request_headers,
                                   proxies=proxies,
                                   params=data)
        if method == 'post':
            response = session.post(url,
                                    headers=request_headers,
                                    proxies=proxies,
                                    data=data)

        assert response
        assert response.status_code == 200
        response.encoding = 'utf-8'
        return response.text

    main_page = __session('get', 'http://www.ganji.com/vip')
    if '赶集用户登录' in main_page:
        logger.info('cookie fail, try login')
        logger.info('re-login')
        hash_value = re.search('''window.PAGE_CONFIG.__hash__ = '([^']*)';''',
                               main_page)
        assert hash_value
        hash_value = hash_value.group(1)
        logger.info('login hash_value:%s' % hash_value)

        check_code_url = re.search(
            '''<img[^>]*id="login_img_checkcode"[^>]*src=['"]*([^'"]*)['"]*[^>]*>''',
            main_page)
        assert check_code_url
        check_code_url = check_code_url.group(1)
        logger.info('login check_code_url:%s' % check_code_url)
        time_stamp = str(int(time.time() * 1000))

        counter = 0
        while True:
            counter += 1
            if counter > 10:
                raise Exception('try too many times to login')
            login_result = __session(
                'get',
                'https://passport.ganji.com/login.php',
                headers={
                    "Host": "passport.ganji.com",
                    "Referer": "https://passport.ganji.com/login.php?next=/",
                    "X-Requested-With": "XMLHttpRequest",
                    "Connection": "keep-alive"
                },
                data={
                    "callback":
                    "jQuery1820229177205394230_%s" % time_stamp,
                    "username":
                    user_name,
                    "password":
                    user_password,
                    "checkCode":
                    parse_check_code(session, check_code_url, 'ganji',
                                     proxies),
                    "setcookies":
                    "14",
                    "second":
                    "",
                    "parentfunc":
                    "",
                    "redirect_in_iframe":
                    "",
                    "next":
                    '/',
                    "__hash__":
                    hash_value,
                    "_":
                    time_stamp
                })
            open(session.temp_folder + os.path.sep + 'login_result.html',
                 'w').write(login_result)
            if 'error_msg' in login_result:
                logger.warning('login fail with response:\n%s' % login_result)
            else:
                break

    logger.info('trying to buy contact')
    message = __session('get',
                        'http://www.ganji.com/findjob/download_resume.php',
                        headers={
                            "Host": "www.ganji.com",
                            "Referer":
                            "http://www.ganji.com/jianli/%sx.htm" % resume_id,
                            "Upgrade-Insecure-Requests": 1,
                        },
                        data={
                            "source": "detail",
                            "resume_type": "0",
                            "findjob_puid": resume_id,
                            "job_postion": "",
                            "callback": "show_contact",
                            "is_batch_view_resume": 0
                        })
    open(session.temp_folder + os.path.sep + 'message.html',
         'w').write(message)
    if '您已下载过该简历' not in message:
        if '简历下载数不足' in message:
            raise Exception('The Ganji Account Can Not Afford this Resumes')
        elif '此帖子已删除' in message:
            raise Exception('The Ganji Resume Deleted')
        else:
            assert '确认查看' in message
            buy_url = 'http://www.ganji.com/findjob/download_resume.php?source=detail&resume_type=0&findjob_puid=%s&job_postion=&callback=show_contact&is_batch_view_resume=0' % resume_id
            tel_message = __session('post',
                                    buy_url,
                                    headers={
                                        "Host": "www.ganji.com",
                                        "Origin": "http://www.ganji.com",
                                        "Referer": buy_url,
                                        "Upgrade-Insecure-Requests": 1,
                                    },
                                    data={"one_key_download_setting": 1})
            assert 'tel-code' in tel_message

    logger.info('buy contact done, try upload resume')
    resume = __session('get',
                       'http://www.ganji.com/jianli/%sx.htm' % resume_id)
    shutil.rmtree(session.temp_folder)
    return upload(resume, 'ganji', get_contact=True)
Ejemplo n.º 8
0
def fetch_contact_impl(resume_id,
                       user_name,
                       passwd,
                       proxies=None,
                       logger_name=None):
    if logger_name:
        global logger
        logger = logging.getLogger(logger_name)
    __timeout = 30
    # proxies = {'http': 'http://120.26.80.194:60762', 'https': 'http://120.26.80.194:60762'}
    user_agent = nautil.user_agent()
    result, session = login(user_name, passwd, proxies=proxies)
    if not result:
        return session
    session.temp_folder = os.path.join(tempfile.gettempdir(), "naren",
                                       str(random.randint(1, 10000)))
    if not os.path.isdir(session.temp_folder):
        os.makedirs(session.temp_folder)
    result = __fet_contanct(session, resume_id, user_agent, proxies=proxies)
    if result["err_code"] != 0:
        return result
    url = "http://ehire.51job.com/%s" % result["err_msg"]
    resume_page_result = __get_resume_page(session, url, proxies=proxies)
    if resume_page_result["err_code"] != 0:
        return resume_page_result
    resume_page_text = resume_page_result["err_msg"]
    tel_mail = pq(resume_page_text).find(".infr").text()
    if u"电 话:" in resume_page_text and u"E-mail:" in resume_page_text:
        logger.info("简历联系方式已存在")
        shutil.rmtree(session.temp_folder)
        return upload(resume_page_text,
                      "j51",
                      get_contact=True,
                      logger_in=logger)
    if "*" not in tel_mail:
        logger.info("简历联系方式已存在")
        shutil.rmtree(session.temp_folder)
        return upload(resume_page_text,
                      "j51",
                      get_contact=True,
                      logger_in=logger)
    is_download = pq(resume_page_text).find(
        ".btn_down[id=UndownloadLink]").attr("onclick")
    if not is_download:
        logger.warning("当前账号没有下载权限,获取简历页失败")
        return {"err_code": 101, "err_msg": "当前账号没有下载权限!"}
    if u"点击查看联系方式!" in resume_page_text and u"简历信息" in resume_page_text:
        post_data = {
            "doType": "SearchToCompanyHr",
            "userId": resume_id,
            "strWhere": "",
        }
        post_headers = {
            "Accept": "application/xml, text/xml, */*",
            "Accept-Encoding": "gzip,deflate",
            "Content-Type": "application/x-www-form-urlencoded",
            "Accept-Language": "zh-CN,zh;q=0.8",
            "Cache-Control": "max-age=0",
            "Connection": "keep-alive",
            "Host": "ehire.51job.com",
            "Origin": "http://ehire.51job.com",
            "Referer": url,
            "X-Requested-With": "XMLHttpRequest",
        }
        logger.info("获取简历详情......")
        resume_text = session.post(
            url="http://ehire.51job.com/Ajax/Resume/GlobalDownload.aspx",
            headers=post_headers,
            data=post_data,
            timeout=__timeout,
            proxies=proxies)
        if u"不属于以上地区" in resume_text.text:
            return {
                "err_code":
                20022,
                "err_msg":
                "对不起,您暂时不能下载该份简历,原因是:您选中的简历中存在应聘者所在地超出合同范围的情况。请核实您的情况,若有疑问请与销售或客服人员联系。"
            }
        if resume_text.status_code != 200:
            return {"err_code": 20019, "err_msg": "获取简历失败"}
        resume_text.encoding = "utf-8"
        resume_result = __get_resume_page(session, url, proxies=proxies)
        logger.info('fetch resume_id %s done, try upload resume' % resume_id)
        shutil.rmtree(session.temp_folder)
        return upload(resume_result["err_msg"],
                      "j51",
                      get_contact=True,
                      logger_in=logger)
    else:
        return {"err_code": 20020, "err_msg": "抱歉,没有搜到您想找的简历!"}
Ejemplo n.º 9
0
def __fetch_contact(session,
                    resume_id,
                    user_name,
                    user_password,
                    proxies=None):
    user_agent = nautil.user_agent()
    proxies = None

    def __session(method, url, headers={}, data=None):
        logger.info('------\nRequesting %s On %s With Data:\n%s\n------' %
                    (method, url, data))
        time.sleep(random.uniform(4, 15))
        assert method in ('get', 'post')
        assert method == 'post' or not data
        request_headers = {
            "User-Agent": user_agent,
            "Origin": "http://jianli.58.com",
        }
        for k, v in headers.iteritems():
            request_headers[k] = v

        if method == 'get':
            response = session.get(url,
                                   headers=request_headers,
                                   proxies=proxies)
        if method == 'post':
            response = session.post(url,
                                    headers=request_headers,
                                    proxies=proxies,
                                    data=data)

        assert response
        assert response.status_code == 200
        response.encoding = 'utf-8'
        return response.text

    main_page = __session('get', 'http://my.58.com/index')
    if '普通登录方式' in main_page:
        logger.info('cookie fail, try login')
        # login_cookies = get_cookie('x58', user_name)
        login_cookies = __login(user_name,
                                user_password,
                                user_agent,
                                proxies=proxies)
        assert isinstance(login_cookies, list)
        login_cookie_jar = requests.cookies.RequestsCookieJar()
        for login_cookie in login_cookies:
            login_cookie_jar.set(login_cookie['name'],
                                 login_cookie['value'],
                                 domain=login_cookie['domain'],
                                 path=login_cookie['path'])
        session.cookies.update(login_cookie_jar)

    message = __session(
        'get', 'http://jianli.58.com/resumemsg/?resumeid=%s&rand_code=%s&f=' %
        (resume_id, random.random()))
    if '您好,此求职者只允许在58同城认证营业执照的企业查看和下载' in message:
        raise Exception('Need Certification of Business Licence')
    if '您可直接查看本简历' not in message:
        remain = re.search(
            ur"""您目前共有 <span class='f-f1a'>(\d+)</span> 份简历可下载""", message)
        assert remain and remain.group(
            1).isdigit(), 'Unexpected Message \n%s' % message
        remain = int(remain.group(1))
        if remain < 5:
            raise Exception(
                'The 58 Accoun Remains Only %s Resumes To Download' % remain)
        logger.info("获取联系方式.....")
        tel = __session(
            'get',
            'http://jianli.58.com/ajax/resumemsg/?operate=userdown&rid=%s' %
            resume_id,
            headers={
                "Referer":
                "http://jianli.58.com/resumemsg/?resumeid=%s&rand_code=%s&f=" %
                (resume_id, random.random())
            })
        open(session.temp_folder + os.path.sep + 'tel.html', 'w').write(tel)

        if '您可直接查看本简历' not in tel:
            assert re.search('>([\d ]*)</span',
                             tel), 'TEL NOT FOUND in html:\n%s' % tel
            # tel = tel.group(1).replace(' ', '')

    logger.info('fetch done, try upload resume')
    resume = __session('get', 'http://jianli.58.com/resume/%s/' % resume_id)
    open(session.temp_folder + os.path.sep + 'resume.html', 'w').write(resume)
    shutil.rmtree(session.temp_folder)
    return upload(resume, 'x58', get_contact=True)