def request_jobs_count_json(city, keyword): query_string = {'needAddtionalResult': False} if city != '全国': query_string['city'] = city form_data = { 'first': False, 'pn': 1, 'kd': keyword.name } headers = generate_http_header(is_crawl_jobs_count=True) crawler_sleep() try: cookies = Cookies.get_random_cookies() response = requests.post(url=constants.JOB_JSON_URL, params=query_string, data=form_data, headers=headers, cookies=cookies, allow_redirects=False, timeout=constants.TIMEOUT) response_json = response.json() if 'content' not in response_json: Cookies.remove_cookies(cookies) raise RequestsError(error_log='wrong response content') except RequestException as e: logging.error(e) raise RequestsError(error_log=e) return response_json
def request_job_json(company_id, page_no): prams = { 'companyId': company_id, 'positionFirstType': u"技术", 'pageNo': page_no, 'pageSize': 10, } headers = generate_http_header() crawler_sleep() try: cookies = Cookies.get_random_cookies() response_json = requests.get(url=constants.COMPANY_JOB_URL, params=prams, headers=headers, cookies=cookies, timeout=constants.TIMEOUT).json() if 'content' not in response_json: Cookies.remove_cookies(cookies) raise RequestsError(error_log='wrong response content') except RequestException as e: logging.error(e) raise RequestsError(error_log=e) return response_json
def request_company_json(url, page_no): prams = { 'first': False, 'pn': page_no, 'sortField': 1, 'havemark': 0, } headers = generate_http_header() crawler_sleep() try: cookies = Cookies.get_random_cookies() response_json = requests.get(url=url, params=prams, headers=headers, cookies=cookies, allow_redirects=False, timeout=constants.TIMEOUT).json() if 'totalCount' not in response_json: Cookies.remove_cookies(cookies) raise RequestsError(error_log='wrong response content') except RequestException as e: logging.error(e) raise RequestsError(error_log=e) return response_json