def handle_job_info(info): tools.sleep_some_time() title,link,release_time=info url=url_host+link info={} info['title']=title info['web_url']=url info['release_time']=tools.get_real_time(release_time) info['job_type']=0 info['authentication']=0 info.update(get_message_jobs(url)) print(info['title'],info['company'],info['release_time']) return info
def get_message_jobs(url): """ 获取招聘信息 """ info={} html = get_html_t(url) company_name = get_company_name(html) work_city = tools.get_work_citys(html) work_position = tools.get_work_position(html) release_time=get_release_time(html) info['release_time']=tools.get_real_time(release_time) info['web_html']=html info['company']=company_name info['work_city']=work_city info['position']=work_position return info
def get_message_jobs(url): """ 获取招聘信息 """ info = {} html = get_html_t(url) company_name = get_company_name(html) work_city = tools.get_work_citys(html) work_position = tools.get_work_position(html) release_time = get_release_time(html) info['release_time'] = tools.get_real_time(release_time) info['web_html'] = html info['company'] = company_name info['work_city'] = work_city info['position'] = work_position return info
def handle_all_data(url): """ 获取每一条信息 :param url: :return: """ tools.sleep_some_time() zhaopin_data = {} html = Get_html(url) zhaopin_data['web_url'] = url zhaopin_data['web_html'] = html zhaopin_data['title'] = get_title(html) zhaopin_data['release_time'] = tools.get_real_time(get_date(html)) zhaopin_data['company'] = tools.get_company_name(html) zhaopin_data['position'] = tools.get_work_position(html) zhaopin_data['work_city'] = tools.get_work_citys(html) zhaopin_data['message_source'] = '华农园艺学院官网' print(zhaopin_data['title'],zhaopin_data['release_time'],zhaopin_data['message_source'],zhaopin_data['company']) return zhaopin_data
def handle_all_data(url): """ 获取每一条信息 :param url: :return: """ tools.sleep_some_time() zhaopin_data = {} html = Get_html(url) zhaopin_data['web_url'] = url zhaopin_data['web_html'] = html zhaopin_data['title'] = get_title(html) zhaopin_data['release_time'] = tools.get_real_time(get_date(html)) zhaopin_data['company'] = tools.get_company_name(html) zhaopin_data['position'] = tools.get_work_position(html) zhaopin_data['work_city'] = tools.get_work_citys(html) zhaopin_data['message_source'] = '华农资环学院官网' print(zhaopin_data['title'], zhaopin_data['release_time'], zhaopin_data['message_source'], zhaopin_data['company']) return zhaopin_data
def fetch(): result=[] infos = get_message_title_and_url_list(1) for info in infos: tools.sleep_some_time() title,link,release_time=info url=url_host+link info={} info['title']=title info['web_url']=url info['release_time']=tools.get_real_time(release_time) info['message_source']="动物科学学院官网" info['job_type']=0 info['authentication']=0 info.update(get_message_jobs(url)) result.append(info) print(info['title'],info['company'],info['release_time']) return result
def fetch(): result = [] infos = get_message_title_and_url_list(1) for info in infos: tools.sleep_some_time() title, link, release_time = info url = url_host + link info = {} info['title'] = title info['web_url'] = url info['release_time'] = tools.get_real_time(release_time) info['message_source'] = "动物科学学院官网" info['job_type'] = 0 info['authentication'] = 0 info.update(get_message_jobs(url)) result.append(info) print(info['title'], info['company'], info['release_time']) return result
def add_a_job(job_title, job_company, job_url, job_city, job_message_source, job_position, job_release_time, web_html): """ 添加一条招聘信息 :param job_title: 工作标题 :param job_company: 公司名字 :param job_url: 跳转的页面地址 :param job_city: 工作地点 :param job_message_source: 消息来源 :param job_position: 工作职位 :param job_release_time: 工作发布时间 :param web_html: 抓取的网页html :return: """ if type(job_city) == list: job_city = '#'.join(job_city) if type(job_position) == list: job_position = '#'.join(job_position) token = hashlib.md5(job_company.encode("UTF-8")).hexdigest() job_release_time = tools.get_real_time(job_release_time) sql = """ insert into jobs(title, company, position, web_url, work_city, message_source, job_type, authentication, status, web_html, release_time, token, create_time) values(%s, %s, %s, %s, %s, %s, 0, 0, 0, %s, %s, %s, now()); """ try: insert_id = db_lib.insert(sql, [ job_title, job_company, job_position, job_url, job_city, job_message_source, web_html, job_release_time, token ]) add_company(job_company) return insert_id except IntegrityError: return 0 except Exception as error: log.error("写入数据库失败(错误类型:%s), 信息地址:%s" % (str(error), job_url)) return -1