コード例 #1
0
    def parse(self, response):
        subSelector = response.xpath('//div[@class ="top-min"]')

        items = []

        for sub in subSelector:
            catgoryName = sub.xpath(
                './/div[@class="tt fn-clear"]/h5/text()').extract()[0]
            storyNameTotal = sub.xpath(
                './/ul[@class ="top-list fn-clear"]/li/h5/a/text()').extract()
            rankpoint = sub.xpath(
                './/ul[@class ="top-list fn-clear"]/li/div[@class="lasted-num fn-left"]/i/text()'
            ).extract()
            point_int = sub.xpath(
                './/ul[@class ="top-list fn-clear"]/li/div[@class="lasted-time fn-right"]/strong/text()'
            ).extract()
            point_float = sub.xpath(
                './/ul[@class ="top-list fn-clear"]/li/div[@class="lasted-time fn-right"]/span/text()'
            ).extract()
            pointField = [
                float(a) + float(b) for a in point_int for b in point_float
            ]

            for i in range(len(storyNameTotal)):
                item = Meiju100Item()
                item['catgory'] = catgoryName
                item["storyname"] = storyNameTotal[i]
                item['rank'] = rankpoint[i]
                item['point'] = pointField[i]
                items.append(item)

        return items
コード例 #2
0
ファイル: meiju.py プロジェクト: Scoefield/ScrapyProject
    def parse(self, response):
        li_list = response.xpath('//ul[@class="top-list  fn-clear"]/li')

        for sub in li_list:
            item = Meiju100Item()
            item['movname'] = sub.xpath('./h5/a/text()').extract_first()
            item['movurl'] = response.urljoin(
                sub.xpath('./h5/a/@href').extract_first())
            item['movstatu'] = sub.xpath(
                './span[1]/font/text()').extract_first()
            item['movstation'] = sub.xpath('./span[2]/text()').extract_first()
            item['movupdate'] = sub.xpath('./div[2]/text()').extract_first()

            if item['movstatu']:
                pass
            else:
                item['movstatu'] = sub.xpath(
                    './span[1]/text()').extract_first()

            if item['movstation']:
                pass
            else:
                item['movstation'] = [u'未知']

            if item['movupdate']:
                pass
            else:
                item['movupdate'] = sub.xpath(
                    './div[2]/font/text()').extract_first()
            yield item
コード例 #3
0
    def parse(self, response):
        items = []
        subSelector = response.xpath('//ul[@class="top-list  fn-clear"]/li')
        for sub in subSelector:
            item = Meiju100Item()
            item['storyName'] = sub.xpath('./h5/a/text()').extract()
            item['storyState'] = sub.xpath('./span[1]/font/text()').extract()

            item['tvStation'] = sub.xpath('./span[2]/text()').extract()

            item['updateTime'] = sub.xpath('./div[2]/text()').extract()

            items.append(item)
        return items
コード例 #4
0
 def parse(self, response):
     subSelector = response.xpath('//ul[@class="top-list  fn-clear"]/li')
     items = []
     for sub in subSelector:
         item = Meiju100Item()
         item['storyName'] = sub.xpath('./h5/a/text()').extract()[0]
         #            item['storyState'] = sub.xpath('./span[@class="state1 new100state1"]/text()').extract()[0]
         item['tvStation'] = sub.xpath(
             './span[@class="mjtv"]/text()').extract()[0]
         # item['updateTime'] = sub.xpath('./div[@class="lasted-time new100time fn-right"]/font/text()').extract()[0]
         item['storyLX'] = sub.xpath(
             './span[@class="mjjq"]/text()').extract()[0]
         print("----------------------------------")
         items.append(item)
     return items
コード例 #5
0
 def parse(self, response):
     subSelector = response.xpath('//li/div[@class="lasted-num fn-left"]')
     items = []
     for sub in subSelector:
         item = Meiju100Item()
         item['storyName'] = sub.xpath('../h5/a/text()').extract()[0]
         item['storyState'] = sub.xpath(
             '../span[@class="state1 new100state1"]/text()').extract()[0]
         item['tvStation'] = sub.xpath(
             '../span[@class="mjtv"]/a/text()').extract()
         item['updateTime'] = sub.xpath(
             '../div[@class="lasted-time new100time fn-right"]/text()'
         ).extract()[0]
         items.append(item)
     return items