예제 #1
0
파일: lagou.py 프로젝트: syntomic/Spider
 def parse_item(self, response):
     item = PythonJobItem()
     q = response.xpath
     #body = response.body.decode("utf-8")
     item['address'] = response.meta['city']
     item['salary'] = response.meta['salary']
     item['create_time'] = response.meta['createTime']
     item['body'] = q('//div[@class="content"]/p/text()').extract()
     item['company_name'] = response.meta['companyName']
     item['position_id'] = response.meta['positionId']
     item['position_name'] = response.meta['positionName']
예제 #2
0
    def parse_detail(self, response):

        item = PythonJobItem()
        print("response.url==", response.url)
        title = response.xpath('//div[@class="cn"]/h1/text()').extract()
        title = "".join(title)

        location = response.xpath('//div[@class="cn"]/span/text()').extract()
        location = "".join(location)

        salary = response.xpath('//div[@class="cn"]/strong/text()').extract()

        salary = "".join(salary)

        company_name = response.xpath(
            '//div[@class="cn"]/p/a/text()').extract()
        company_name = "".join(company_name)

        company_info = response.xpath(
            '//div[@class="cn"]/p[@class="msg ltype"]/text()').extract()
        company_info = "".join(company_info)

        experience = response.xpath(
            '//div[@class="t1"]/span[1]/text()').extract()
        experience = "".join(experience)

        job_info = response.xpath(
            '//div[@class="bmsg job_msg inbox"]/p/text()|//div[@class="bmsg job_msg inbox"]/text()|//div[@class="bmsg job_msg inbox"]//p//span/text()'
        ).extract()
        job_info = "".join(job_info)

        address = response.xpath(
            '//div[@class="bmsg inbox"]/p/text()').extract()

        address = "".join(address)
        item["url"] = response.url

        item["title"] = title
        item["location"] = location

        item["salary"] = salary

        item["company_name"] = company_name

        item["company_info"] = company_info

        item["experience"] = experience

        item["job_info"] = job_info

        item["address"] = address
        # print(item)
        yield item
예제 #3
0
 def parse_item(self, response):
     item = PythonJobItem()
     q = response.xpath
     item['address'] = q('//div[@class="jt"]/em/text()').extract_first('北京')
     item['salary'] = q('//p[@class="jp"]/text()').extract_first('面议')
     item['create_time'] = q(
         '//div[@class="jt"]/span//text()').extract_first()
     item['postion_id'] = response.url.split("/")[-1].split('.')[0]
     item['position_name'] = q(
         '//div[@class="jt"]/p/text()').extract_first()
     item['work_year'] = q('//span[@class="s_n"]/text()').extract_first(
         '不限')
     item['company_name'] = q('//p[@class="c_444"]/text()').extract_first()
     item['educational'] = q('//span[@class="s_x"]//text()').extract_first(
         '不限')
     item['body'] = q('//div[@class="ain"]/article//text()').extract()
     yield item
예제 #4
0
 def parse_item(self, response):
     item = PythonJobItem()
     q = response.xpath
     item['address'] = q(
         '/html/body/div[1]/div[2]/div[1]/div/div/div[2]/p/text()'
     ).extract_first()
     item['work_year'] = q(
         '/html/body/div[1]/div[2]/div[1]/div/div/div[2]/p/text()').extract(
         )[1]
     item['educational'] = q(
         '/html/body/div[1]/div[2]/div[1]/div/div/div[2]/p/text()').extract(
         )[2]
     item['salary'] = q('//span[@class="salary"]/text()').extract_first()
     item['company_name'] = q(
         '/html/body/div[1]/div[2]/div[2]/div/div[2]/div[3]/text()'
     ).extract_first()
     item['postion_id'] = response.url.split("/")[-1].split('.')[0]
     item['position_name'] = q(
         '//*[@id="main"]/div[2]/div/div[2]/div[1]/h1/text()'
     ).extract_first()
     item['body'] = q(
         '//div[1]/div[2]/div[3]/div/div[2]/div[2]/div[1]/div/text()'
     ).extract()
     yield item