def crawl_lagou_jobs_count(): pre_date = get_date_begin_by_timestamp(after_days=-1) keywords = KeywordController.get_most_frequently_keywords(limit=800) logging.info('{} crawl_lagou_job_count 定时任务运行中! 关键词 {} 个'.format( pre_date, len(keywords))) for keyword in keywords: city_jobs_count = { '全国': 0, '北京': 0, '上海': 0, '广州': 0, '深圳': 0, '杭州': 0, '成都': 0 } for city in city_jobs_count: response_json = request_jobs_count_json(city=city, keyword=keyword) city_jobs_count[city] = response_json['content']['positionResult'][ 'totalCount'] JobsCountController.add(date=pre_date, keyword_id=keyword.id, all_city=city_jobs_count['全国'], beijing=city_jobs_count['北京'], shanghai=city_jobs_count['上海'], guangzhou=city_jobs_count['广州'], shenzhen=city_jobs_count['深圳'], hangzhou=city_jobs_count['杭州'], chengdu=city_jobs_count['成都']) logging.info('crawl_lagou_job_count 任务完成!') JobController.get_jobs_statistics.cache_clear() logging.info('主动效缓存成功')
def get(self): keyword_name = self.get_argument('keyword', 'python') keyword = KeywordController.get(name=keyword_name) if not keyword: self.write_error(404) return (keyword_jobs_count, educations_request_counter, finance_stage_distribution, city_jobs_counter, salary_distribution, work_years_request_analyze) = JobController.get_jobs_statistics( keyword_id=keyword.id) self.render("keyword.html", keyword=keyword_name, keyword_jobs_count=keyword_jobs_count, educations_request_counter=educations_request_counter, finance_stage_distribution=finance_stage_distribution, city_jobs_counter=city_jobs_counter, salary_distribution=salary_distribution, work_years_request_analyze=work_years_request_analyze)
def get(self): # 统计访问人数 incr() 操作线程安全 redis_instance.incr(constants.REDIS_VISITED_PEOPLES_COUNT_KEY) keyword_name = self.get_argument('keyword', 'python') keyword = KeywordController.get(name=keyword_name) if not keyword: self.write_error(404) return (keyword_jobs_count, educations_request_counter, finance_stage_distribution, city_jobs_counter, salary_distribution, work_years_request_analyze) = get_jobs_statistics(keyword.id) self.render("keyword.html", keyword=keyword_name, keyword_jobs_count=keyword_jobs_count, educations_request_counter=educations_request_counter, finance_stage_distribution=finance_stage_distribution, city_jobs_counter=city_jobs_counter, salary_distribution=salary_distribution, work_years_request_analyze=work_years_request_analyze)
def crawl_lagou_jobs_count(): pre_date = get_date_begin_by_timestamp(after_days=-1) keywords = KeywordController.get_most_frequently_keywords(limit=2000) logging.info('{} crawl_lagou_job_count 定时任务运行中! 关键词 {} 个'.format( pre_date, len(keywords))) for keyword in keywords: city_jobs_count = { '全国': 0, '北京': 0, '上海': 0, '广州': 0, '深圳': 0, '杭州': 0, '成都': 0 } for city in city_jobs_count: response_json = request_jobs_count_json(city=city, keyword=keyword) try: city_jobs_count[city] = response_json['content'][ 'positionResult']['totalCount'] except Exception: logging.getLogger(__name__).error( '获取 jobs count 信息失败, 关键词为 {}'.format(keyword.name), exc_info=True) JobsCountController.add(date=pre_date, keyword_id=keyword.id, all_city=city_jobs_count['全国'], beijing=city_jobs_count['北京'], shanghai=city_jobs_count['上海'], guangzhou=city_jobs_count['广州'], shenzhen=city_jobs_count['深圳'], hangzhou=city_jobs_count['杭州'], chengdu=city_jobs_count['成都']) logging.info('crawl_lagou_job_count 任务完成!') # 失效缓存 remove_count = cache_clear(get_jobs_statistics) logging.info('主动失效缓存成功, 数量{}'.format(remove_count))
def generate_job_data(job, company_id): """生成职位数据""" department, description, keywords = requests_job_detail_data( job['positionId']) job_id = job['positionId'] city_id = 0 if 'city' not in job else CityController.get_city_id_by_name( job['city']) title = job['positionName'] work_year = filter_http_tag(job['workYear']) if work_year not in WORK_YEARS_REQUEST_DICT: logger.error(work_year + 'not in WORK_YEAR_DICT') work_year = WORK_YEARS_REQUEST_DICT[ work_year] if work_year in WORK_YEARS_REQUEST_DICT else WORK_YEARS_REQUEST_DICT[ 'unknown'] salary = job['salary'] education = EDUCATION_REQUEST_DICT[job['education']] department = department description = description advantage = job['positionAdvantage'] if 'positionAdvantage' in job else '' job_nature = JOB_NATURE_DICT[job['jobNature']] created_at = job_date2timestamp(job['createTime']) JobController.add(id=job_id, company_id=company_id, title=title, work_year=work_year, city_id=city_id, salary=salary, education=education, department=department, description=description, advantage=advantage, job_nature=job_nature, created_at=created_at) for keyword in keywords: keyword_id = KeywordController.get_keyword_id_by_name(keyword) JobKeywordController.add(job_id, keyword_id, city_id)