コード例 #1
0
    def parse(self, response):
        movie_list = response.xpath(
            "//div[@class='article']//ol[@class='grid_view']/li")
        for i_item in movie_list:
            douban_item = MyfirstpjtItem()
            douban_item['serial_number'] = i_item.xpath(
                ".//div[@class='item']//em/text()").extract_first()
            douban_item['movie_name'] = i_item.xpath(
                ".//div[@class='info']/div[@class='hd']/a/span[1]/text()"
            ).extract_first()
            content = i_item.xpath(
                ".//div[@class='info']//div[@class='bd']/p[1]/text()").extract(
                )
            for i_content in content:
                content_s = "".join(i_content.split())
                douban_item['introduce'] = content_s

            douban_item['star'] = i_item.xpath(
                ".//span[@class='rating_num']/text()").extract_first()
            douban_item['evaluate'] = i_item.xpath(
                ".//div[@class='star']//span[4]//text()").extract_first()
            douban_item['describe'] = i_item.xpath(
                ".//p[@class='quote']/span/text()").extract_first()
            # print(douban_item)
            yield douban_item

        next_link = response.xpath(
            "//span[@class='next']/link/@href").extract()
        if next_link:
            next_link = next_link[0]
            yield scrapy.Request("https://movie.douban.com/top250" + next_link,
                                 callback=self.parse)
コード例 #2
0
ファイル: weisuen.py プロジェクト: whyismefly/pythoncrawl
def parse(self, response):
    item = MyfirstpjtItem()
    item["urlname"] = response.xpath("/html/head/title/text()")
    print("标题:")
    print(item["urlname"])


#12.8见myxml
コード例 #3
0
    def parse(self, response):
        # pass
        item = MyfirstpjtItem()
        # item['urlname'] = response.xpath('/html/head/title/text()')
        # print(item['urlname'])

        item['title'] = response.xpath('/html/head/title/text()').extract()
        print(item['title'])
コード例 #4
0
 def parse(self, response):
     item = MyfirstpjtItem()
     item["urlname"] = response.xpath("//a[@target='_blank']/text()")
     print(item["urlname"])
     pass
コード例 #5
0
 def parse(self, response):
     item = MyfirstpjtItem()
     item['urlname'] = response.xpath('/html/head/title/text()')
     print("以下将显示网址标题:")
     print(item['urlname'])
コード例 #6
0
 def parse(self, response):
     item=MyfirstpjtItem()
     item["urlname"]=response.xpath("/html/head/title/text()")
     print("以下将显示爬取的网址的标题")
     print(item["urlname"])
コード例 #7
0
ファイル: scrapy_test.py プロジェクト: xiyouhujing/SpiderPro
 def parse(self, response):
     item = MyfirstpjtItem()
     item['urltitle'] = response.xpath("/html/head/title/text()").extract()
     print u"以下将显示爬取的网址的标题"
     return item
コード例 #8
0
 def parse(self, response):
     item = MyfirstpjtItem()
     item['urlname'] = response.xpath('/html/head/title/text()').extract()
     print('以下将显示爬取的网址的标题')
     print(item['urlname'])
     return item