Esempio n. 1
0
def request_jobs_count_json(city, keyword):
    query_string = {'needAddtionalResult': False}
    if city != '全国':
        query_string['city'] = city
    form_data = {
        'first': False,
        'pn': 1,
        'kd': keyword.name
    }
    headers = generate_http_header(is_crawl_jobs_count=True)
    crawler_sleep()
    try:
        cookies = Cookies.get_random_cookies()
        response = requests.post(url=constants.JOB_JSON_URL,
                                 params=query_string,
                                 data=form_data,
                                 headers=headers,
                                 cookies=cookies,
                                 allow_redirects=False,
                                 timeout=constants.TIMEOUT)
        response_json = response.json()
        if 'content' not in response_json:
            Cookies.remove_cookies(cookies)
            raise RequestsError(error_log='wrong response content')
    except RequestException as e:
        logging.error(e)
        raise RequestsError(error_log=e)
    return response_json
Esempio n. 2
0
def request_job_json(company_id, page_no):
    prams = {
        'companyId': company_id,
        'positionFirstType': u"技术",
        'pageNo': page_no,
        'pageSize': 10,
    }
    headers = generate_http_header()
    crawler_sleep()
    try:
        cookies = Cookies.get_random_cookies()
        response_json = requests.get(url=constants.COMPANY_JOB_URL,
                                     params=prams,
                                     headers=headers,
                                     cookies=cookies,
                                     timeout=constants.TIMEOUT).json()
        if 'content' not in response_json:
            Cookies.remove_cookies(cookies)
            raise RequestsError(error_log='wrong response content')
    except RequestException as e:
        logging.error(e)
        raise RequestsError(error_log=e)
    return response_json
Esempio n. 3
0
def request_company_json(url, page_no):
    prams = {
        'first': False,
        'pn': page_no,
        'sortField': 1,
        'havemark': 0,
    }
    headers = generate_http_header()
    crawler_sleep()
    try:
        cookies = Cookies.get_random_cookies()
        response_json = requests.get(url=url,
                                     params=prams,
                                     headers=headers,
                                     cookies=cookies,
                                     allow_redirects=False,
                                     timeout=constants.TIMEOUT).json()
        if 'totalCount' not in response_json:
            Cookies.remove_cookies(cookies)
            raise RequestsError(error_log='wrong response content')
    except RequestException as e:
        logging.error(e)
        raise RequestsError(error_log=e)
    return response_json