Beispiel #1
0
    def parse(self, response):
        print(response.text)
        movie_list = response.xpath("//div[@class='section']")
        for i_item in movie_list:
            print(i_item)
            # 创建一个变量 item文件导进来
            douban_item = PythondemoItem()
            # .的意思是进一步的细分
            # 获取文本信息加上一个text()
            douban_item['title'] = i_item.xpath(
                ".//div//h2[@class='section-title']//text()").extract_first()

            # 对好几行的内容进行处理
            content = i_item.xpath(".//....")
            # 进行字符串处理 这里的我们是要解析数据而不是解析第一行数据
            for i_content in content:
                # 先去掉空格
                content_s = "".join(i_content.split())
                douban_item["desc"] = content_s

            #    把数据yield到管道中去
            yield douban_item
            # 一定要写在for循环的外面
            # 看看有没有新的requrest请求
            # 下一页链接如何解析
            # 找到那个后一个标签自动翻页
            # 这里访问到他的数据即可
            next_link = response.xpath(
                "//span[#class='next']/link/@href").extract()
            if next_link:
                next_link = next_link[0]
                yield scrapy.Request("网址的主机头" + next_link, callback=self.parse)
Beispiel #2
0
 def parse(self, response):
     namelist = response.xpath("//div/div[@class='panel-body']/div")
     for item in namelist:
         print(item)
         # 创建一个变量 item文件导进来
         enname_itme = PythondemoItem()
         enname_itme['firstname'] = item.xpath(".//text()").extract_first()
         #    把数据yield到管道中去
         yield enname_itme
Beispiel #3
0
 def parse(self, response):
     imglist = response.xpath("//div[@class='slist']/ul/li")
     for item in imglist:
         print(item)
         # 创建一个变量 item文件导进来
         img_itme = PythondemoItem()
         img_itme['imgsrc'] = item.xpath(".//img/@src").extract_first()
         #    把数据yield到管道中去
         yield img_itme
Beispiel #4
0
 def parse(self, response):
     print(111)
     imglist = response.xpath("//div[@class='Left_bar']//ul/li")
     for item in imglist:
         print(item)
         # 创建一个变量 item文件导进来
         img_itme = PythondemoItem()
         img_itme['imgsrc'] = item.xpath(
             ".//img/@data-original").extract_first()
         #    把数据yield到管道中去
         yield img_itme
Beispiel #5
0
 def parse(self, response):
     print(response.text)
     namelist =  response.xpath("//table/tr")
     for item in namelist:
         print(item)
         print(2222)
         # 创建一个变量 item文件导进来
         enname_itme = PythondemoItem()
         enname_itme['firstname'] = item.xpath("./td/a/text()").extract_first()
         print(enname_itme)
         #    把数据yield到管道中去
         yield enname_itme
    def parse(self, response):
        namelist = response.xpath("//div/table[@class='table']/tbody/tr")
        for item in namelist:
            print(item)
            # 创建一个变量 item文件导进来
            enname_itme = PythondemoItem()
            enname_itme['firstname'] = item.xpath(
                "./td[1]/text()").extract_first()
            #    把数据yield到管道中去
            yield enname_itme

        next_link = response.xpath(
            "//div/ul[@class='pagination']/li[@class='nextpage']/a/@href"
        ).extract()
        print(next_link)
        print(999)
        if next_link:
            next_link = next_link[0]
            yield scrapy.Request("http://www.resgain.net" + next_link,
                                 callback=self.parse)