Python HtmlXPathSelector.path 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: scrapy.selector

클래스/타입: HtmlXPathSelector

메소드/함수: path

hotexamples.com에서의 예제들: 4

Python HtmlXPathSelector.path - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 scrapy.selector.HtmlXPathSelector.path에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

HtmlXPathSelector(30)

select(30)

xpath(30)

extract(22)

x(4)

path(3)

re(3)

append(1)

css(1)

encode(1)

pop(1)

selecet(1)

selector(1)

strip(1)

예제 #1

파일 보기

    def parse1(self, response):


        x = HtmlXPathSelector(response)
        item = LagouItem()
        item['title']=x.xpath("//dt[@class='clearfix']/h1/text()").extract()[2]
        item['companyname']=x.xpath("//div[@class='s_txt_jobs']//a[@style='font-size:14px;font-weight:bold;color:#000000;']/text()").extract()[0]
        item['tag']=x.xpath("//div[@class='s_txt_jobs']//div[@class='jobdetail_divRight_span']").extract()
        print  x.path("//div[@class='s_txt_jobs']//tbody/tr/td/text()").extract()
        #item['day']=x.xpath("//div[@class='s_txt_jobs']//table[@class='jobs_1']//td[@class='txt_2 '][1]/text()").extract()
        #item['place']=x.xpath("//div[@class='s_txt_jobs']//table[@class='jobs_1']//td[@class='txt_2 '][2]/text()").extract()
        #item['cnt']=x.xpath("//div[@class='s_txt_jobs']//table[@class='jobs_1']//td[@class='txt_2 '][3]/text()").extract()
        #item['jobyear']=x.xpath("//div[@class='s_txt_jobs']//table[@class='jobs_1']//td[@class='txt_2 '][4]/text()").extract()
        #item['xueli']=x.xpath("//div[@class='s_txt_jobs']//table[@class='jobs_1']//td[@class='txt_2 '][5]/text()").extract()

        title=x.xpath("//div[@class='s_txt_jobs']//table[@class='jobs_1']//td[@class='txt_1']/text()").extract()
        nei=x.xpath("//div[@class='s_txt_jobs']//table[@class='jobs_1']//td[@class='txt_2 ']/text()").extract()

        print title
        print nei
        for i,j in zip(title,nei):
            if i==u'发布日期：':
                item['day']=j
            elif i==u'工作地点：':
                item['place']=j
            elif i==u'招聘人数：':
                item['cnt']=j
            elif i==u'工作年限：':
                item['jobyear']=j
            elif i.replace(u'\xa0',u'')==u'学历：':
                item['xueli']=j
            elif i==u'薪水范围：':
                item['money']=j

        #item['money']=x.xpath("//div[@class='s_txt_jobs']//table[@class='jobs_1']//td[@class='txt_2 jobdetail_xsfw_color ']/text()").extract()
        detail=x.xpath("//td[@class='txt_4 wordBreakNormal job_detail ']/div").extract()[0]
        item['jobdetail']='\t'.join(detail)
        fu=x.xpath("//div[@class='s_txt_jobs']//div[@class='jobdetail_divRight_span']/span[@class='Welfare_label']/text()").extract()
        if len(fu) > 0:
            item['fuli']=fu

        item['url']=response.url
        next_url=x.xpath("//a[@style='border:0px; width:auto;margin-left:5px;']/@href").extract()[2]
        seq = Request(next_url,callback=self.parse)
        yield item

예제 #2

파일 보기

 def parse(self, response):
     hxs = HtmlXPathSelector(response)
     sites = hxs.path('//fieldset/ul/li')
     #sites = hxs.path('//ul/li')
     items = []
     for site in sites:
         item = DmozItem()
         item['title'] = site.path('a/text()').extract()
         item['link'] = site.path('a/@href').extract()
         item['desc'] = site.path('text()').extract()
         items.append(item)
     return items

예제 #3

파일 보기

파일: x3cn_spider.py 프로젝트: Andy10101/pycode

 def parse(self,response):
     # filename=os.path.basename(response.url).split(r'.')[0]
     # open(filename,'wb').write(response.bady)
     items=[]
     hxs=HtmlXPathSelector(response)
     sites=hxs.path("//td[@id='postmessage_6511893']/a")
     for site in sites:
         item=x3cn_Item()
         item['title']=site.path("text()").extract()
         item['link']=site.path("@href").extract()
         items.append(item)
     return items

예제 #4

파일 보기

 def parse(self, response):
     # filename=os.path.basename(response.url).split(r'.')[0]
     # open(filename,'wb').write(response.bady)
     items = []
     hxs = HtmlXPathSelector(response)
     sites = hxs.path("//td[@id='postmessage_6511893']/a")
     for site in sites:
         item = x3cn_Item()
         item['title'] = site.path("text()").extract()
         item['link'] = site.path("@href").extract()
         items.append(item)
     return items