def parse_item(self, response): item = PythonJobItem() q = response.xpath #body = response.body.decode("utf-8") item['address'] = response.meta['city'] item['salary'] = response.meta['salary'] item['create_time'] = response.meta['createTime'] item['body'] = q('//div[@class="content"]/p/text()').extract() item['company_name'] = response.meta['companyName'] item['position_id'] = response.meta['positionId'] item['position_name'] = response.meta['positionName']
def parse_detail(self, response): item = PythonJobItem() print("response.url==", response.url) title = response.xpath('//div[@class="cn"]/h1/text()').extract() title = "".join(title) location = response.xpath('//div[@class="cn"]/span/text()').extract() location = "".join(location) salary = response.xpath('//div[@class="cn"]/strong/text()').extract() salary = "".join(salary) company_name = response.xpath( '//div[@class="cn"]/p/a/text()').extract() company_name = "".join(company_name) company_info = response.xpath( '//div[@class="cn"]/p[@class="msg ltype"]/text()').extract() company_info = "".join(company_info) experience = response.xpath( '//div[@class="t1"]/span[1]/text()').extract() experience = "".join(experience) job_info = response.xpath( '//div[@class="bmsg job_msg inbox"]/p/text()|//div[@class="bmsg job_msg inbox"]/text()|//div[@class="bmsg job_msg inbox"]//p//span/text()' ).extract() job_info = "".join(job_info) address = response.xpath( '//div[@class="bmsg inbox"]/p/text()').extract() address = "".join(address) item["url"] = response.url item["title"] = title item["location"] = location item["salary"] = salary item["company_name"] = company_name item["company_info"] = company_info item["experience"] = experience item["job_info"] = job_info item["address"] = address # print(item) yield item
def parse_item(self, response): item = PythonJobItem() q = response.xpath item['address'] = q('//div[@class="jt"]/em/text()').extract_first('北京') item['salary'] = q('//p[@class="jp"]/text()').extract_first('面议') item['create_time'] = q( '//div[@class="jt"]/span//text()').extract_first() item['postion_id'] = response.url.split("/")[-1].split('.')[0] item['position_name'] = q( '//div[@class="jt"]/p/text()').extract_first() item['work_year'] = q('//span[@class="s_n"]/text()').extract_first( '不限') item['company_name'] = q('//p[@class="c_444"]/text()').extract_first() item['educational'] = q('//span[@class="s_x"]//text()').extract_first( '不限') item['body'] = q('//div[@class="ain"]/article//text()').extract() yield item
def parse_item(self, response): item = PythonJobItem() q = response.xpath item['address'] = q( '/html/body/div[1]/div[2]/div[1]/div/div/div[2]/p/text()' ).extract_first() item['work_year'] = q( '/html/body/div[1]/div[2]/div[1]/div/div/div[2]/p/text()').extract( )[1] item['educational'] = q( '/html/body/div[1]/div[2]/div[1]/div/div/div[2]/p/text()').extract( )[2] item['salary'] = q('//span[@class="salary"]/text()').extract_first() item['company_name'] = q( '/html/body/div[1]/div[2]/div[2]/div/div[2]/div[3]/text()' ).extract_first() item['postion_id'] = response.url.split("/")[-1].split('.')[0] item['position_name'] = q( '//*[@id="main"]/div[2]/div/div[2]/div[1]/h1/text()' ).extract_first() item['body'] = q( '//div[1]/div[2]/div[3]/div/div[2]/div[2]/div[1]/div/text()' ).extract() yield item