Ejemplo n.º 1
0
 def parse_tags(self, response):
     """解析标签页"""
     # self.queue.put(response.meta.get("browser"))
     tag_item_loader = DefaultItemLoader(item=TagItem(), response=response)
     tag_item_loader.add_xpath("name", "//div[@class='top-text']/text()")
     tag_item_loader.add_xpath("likes",
                               "//div[@class='concern-num']/text()")
     tag_item_loader.add_xpath("publish_nums",
                               "//div[@class='pageInfo']/span[2]/text()")
     tag_item = tag_item_loader.load_item()
     yield tag_item
Ejemplo n.º 2
0
 def parse_person(self, response):
     # self.queue.put(response.meta.get("browser"))
     person_item_loader = DefaultItemLoader(PersonItem(), response=response)
     person_item_loader.add_xpath("name", "//span[@id='h-name']/text()")
     person_item_loader.add_xpath("gender", "//span[@id='h-gender']/@class")
     person_item_loader.add_xpath("sign", "//*[@class='h-sign']/text()")
     person_item_loader.add_xpath("level",
                                  "//a[contains(@class, 'h-level')]/@lvl")
     person_item_loader.add_xpath("avatar", "//img[@id='h-avatar']/@src")
     person_item_loader.add_xpath(
         "uid", "//div[contains(@class, 'uid')]/span[@class='text']/text()")
     person_item_loader.add_xpath(
         "birthday",
         "//div[contains(@class, 'birthday')]/span[@class='text']/text()")
     person_item_loader.add_xpath("attention_nums",
                                  "//a[contains(@class, 'n-gz')]/@title")
     person_item_loader.add_xpath("fans_nums",
                                  "//a[contains(@class, 'n-fs')]/@title")
     person_item_loader.add_xpath("play_nums",
                                  "//div[contains(@class, 'n-bf')]/@title")
     person_item_loader.add_xpath(
         "register_time",
         "//div[contains(@class, 'regtime')]/span[@class='text']/text()")
     person_item_loader.add_xpath(
         "member_level", "//a[contains(@class, 'h-vipType')]/@class")
     person_item_loader.add_xpath(
         "play_game_list",
         "//div[contains(@class, 'game')]//div[@class='detail']/text()")
     person_item_loader.add_xpath(
         "tags", "//div[contains(@class, 'tag-list')]/a/text()")
     person_item = person_item_loader.load_item()
     yield person_item
Ejemplo n.º 3
0
    def parse_article(self, response):
        article_item_loader = DefaultItemLoader(ArticleItem(),
                                                response=response)
        article_item_loader.add_xpath("author", "//a[@class='up-name']/text()")
        article_item_loader.add_xpath(
            "cover_img_url", "//div[@class='banner-img-holder']/@style")
        article_item_loader.add_xpath("title", "//h1[@class='title']/text()")
        article_item_loader.add_xpath(
            "desc", "//div[contains(@class, 'article-holder')]/p/text()")
        article_item_loader.add_value("url", response.url)
        article_item_loader.add_value("cid", get_id(response.url))
        article_item_loader.add_xpath(
            "img_box", "//figure[@class='img-box']/img/@data-src")
        article_item_loader.add_xpath(
            "views", "//div[@class='article-data']/span[1]/text()")
        article_item_loader.add_xpath(
            "likes", "//div[@class='article-data']/span[2]/text()")
        article_item_loader.add_xpath(
            "comments", "//div[@class='article-data']/span[3]/text()")
        article_item_loader.add_xpath(
            "coins", "//div[@class='coin-btn']/div/span/text()")
        article_item_loader.add_xpath(
            "collections", "//div[@class='fav-btn']/div/span/text()")
        article_item_loader.add_xpath(
            "shares", "//div[@class='share-btn']/div/span/text()")
        article_item_loader.add_xpath("publish_time",
                                      "//span[@class='create-time']/text()")
        article_item_loader.add_xpath(
            "category", "//a[@class='category-link']/span/text()")
        article_item_loader.add_xpath(
            "tags", "//li[@class='tag-item']/span[2]/text()")
        article_item = article_item_loader.load_item()
        yield article_item

        if PARSE_COMMENTS:
            for item in self.gen_comments_item(response):
                yield item
Ejemplo n.º 4
0
    def parse_detail(self, response):
        """解析Video"""
        detail_loader = DefaultItemLoader(VideoItem(), response=response)
        detail_loader.add_value("vid", get_id(response.url))
        detail_loader.add_xpath(
            "author",
            "//div[contains(@class, 'user')]/a[contains(@class, 'name')]/text()"
        )
        detail_loader.add_xpath("title", "//h1/@title")
        detail_loader.add_xpath(
            "desc", "//div[@id='v_desc']/div[contains(@class, 'info')]")
        detail_loader.add_value("url", response.url)
        detail_loader.add_xpath(
            'play_nums',
            "substring(//span[contains(@class, 'v play')]/@title, 5)")
        detail_loader.add_xpath(
            'danmu_nums',
            "substring(//span[contains(@class, 'v dm')]/@title, 5)")
        detail_loader.add_xpath(
            'coins', "substring(//span[@report-id='coinbtn1']/@title, 6)")
        detail_loader.add_xpath(
            'collections',
            "substring(//span[@report-id='collect1']/@title, 5)")
        detail_loader.add_value('comments', response.meta.get('comments', 0))
        detail_loader.add_xpath(
            'shares', "//div[@id='playpage_share']//span[@class='num']/text()")
        detail_loader.add_value('likes', response.meta.get("likes", 0))
        detail_loader.add_xpath("publish_time", "//time/text()")
        detail_loader.add_xpath(
            "category", "//div[contains(@class, 'tminfo')]/span/a/text()")
        detail_loader.add_xpath(
            "tags", "//ul[contains(@class, 'tag-area')]/li/a/text()")
        detail_item = detail_loader.load_item()
        self.crawler.stats.inc_value("detail_item")
        yield detail_item

        if PARSE_COMMENTS:
            for item in self.gen_comments_item(response):
                yield item