Ejemplo n.º 1
0
    def parse_composer(self, response):
        banner = response.xpath('//div[@class="banner-wrap"]/@style').get()
        composer = ComposerItem()
        composer['cid'] = response.meta['cid']
        composer['banner'] = re.findall(r'background-image:url\((.+?)\)',banner)
        composer['avatar'] = response.xpath(
            '//span[@class="avator-wrap-s"]/img/@src').get()

        composer['name'] = response.xpath(
            '//p[contains(@class, "creator-name")]/text()').get()

        composer['intro'] = response.xpath(
            '//p[contains(@class, "creator-desc")]/text()').get()

        composer['like_counts'] = ci(response.xpath(
            '//span[contains(@class, "like-counts")]/text()').get())

        composer['fans_counts'] = response.xpath(
            '//span[contains(@class, "fans-counts")]/@data-counts').get()

        composer['follow_counts'] = ci(response.xpath(
            '//span[@class="follow-wrap"]/span[2]/text()').get())

        composer['location'] = response.xpath(
            '//span[contains(@class, "icon-location")]/'
            'following-sibling::span[1]/text()').get() or ''

        composer['career'] = response.xpath(
            '//span[contains(@class, "icon-career")]/'
            'following-sibling::span[1]/text()').get() or ''
        yield composer
Ejemplo n.º 2
0
    def parse_composer(self, response):
        print('****创作人××××' * 5)
        composer = ComposerItem()

        banner = response.xpath(
            '//div[contains(@class,"banner-linear")]').get()
        composer['cid'] = response.meta['cid']
        composer['banner'] = re.findall('src\=\"(.+?)\"\>', banner)[0]
        composer['avatar'] = re.findall('src\=\"(.+?)\"\>', banner)[1]
        composer['name'] = response.xpath(
            '//p[contains(@class,"creator-name")]/text()').get().strip()
        composer['intro'] = response.xpath(
            '//p[contains(@class,"creator-desc")]/text()').get()
        composer['like_counts'] = convert_int(
            response.xpath(
                '//span[contains(@class,"like-counts")]/text()').get())
        composer['fans_counts'] = convert_int(
            response.xpath(
                '//span[contains(@class,"fans-counts")]/text()').get())
        composer['follow_counts'] = convert_int(
            response.xpath('//span[@class="fw_600 v-center"]/text()').get())
        composer['location'] = response.xpath(
            '//span[contains(@class,"icon-location")]/following-sibling::span[1]/text()'
        ).get() or ''
        composer['career'] = response.xpath(
            '//span[contains(@class,"icon-career")]/following-sibling::span[1]/text()'
        ).get() or ''
        yield composer
 def parse_composer(self, response):
     composer = ComposerItem()
     banner = response.xpath('//div[@class="banner-wrap"]/@style').get()
     composer['cid'] = response.xpath(
         '//div[@class="creator-info"]//span/@data-userid').get()
     composer['banner'], = re.findall(r'background-image:url\((.+?)\)',
                                      banner)
     composer['avatar'] = response.xpath(
         '//span[@class="avator-wrap-s"]/img/@src').get()
     composer['verified'] = response.xpath(
         '//span[@class="avator-wrap-s"]/span/@class').get()
     composer['name'] = response.xpath(
         '//p[contains(@class, "creator-name")]/text()').get()
     composer['intro'] = response.xpath(
         '//p[contains(@class, "creator-desc")]/text()').get()
     like_counts = response.xpath(
         '//span[contains(@class, "like-counts")]/text()').get()
     like_counts = like_counts.replace(',', '') if like_counts else ''
     composer['like_counts'] = like_counts
     composer['fans_counts'] = response.xpath(
         '//span[contains(@class, "fans-counts")]/@data-counts').get()
     composer['follow_counts'] = response.xpath(
         '//span[@class="follow-wrap"]/span[2]/text()').get()
     composer['location'] = response.xpath(
         '//span[contains(@class,"icon-location")]/following-sibling::span[1]/text()'
     ).get()
     composer['career'] = response.xpath(
         '//span[contains(@class,"icon-career")]/following-sibling::span[1]/text()'
     ).get()
     # from scrapy.shell import inspect_response
     # if composer['banner'] is None:
     #     inspect_response(response, self)
     yield composer
Ejemplo n.º 4
0
 def parse_composer(self, response):
     ci = ComposerItem()
     ci['cid'] = response.meta['composer_id']
     banner_url = response.xpath('//div[@class="banner-wrap"]/@style').get()
     banner_url = str(banner_url).split('(')[1][:-1]
     ci['banner'] = banner_url
     ci['avatar'] = response.xpath(
         '//span[@class="avator-wrap-s"]/img/@src').get()
     verified = response.xpath(
         '//span[@class="avator-wrap-s"]//span[contains(@class,"author-v")]/@class'
     )
     if verified:
         ci['verified'] = True
     else:
         ci['verified'] = False
     ci['name'] = response.xpath(
         '//p[contains(@class,"creator-name")]/text()').get()
     ci['intro'] = response.xpath(
         '//p[contains(@class,"creator-desc")]/text()').get()
     ci['like_counts'] = num_to_int(
         response.xpath(
             '//span[contains(@class,"like-counts")]/text()').get())
     ci['fans_counts'] = num_to_int(
         response.xpath(
             '//span[contains(@class,"fans-counts")]/text()').get())
     follow_counts = response.xpath(
         '//span[@class="follow-wrap"]/span[contains(@class,"fw_600")]/text()'
     ).get()
     ci['follow_counts'] = num_to_int(follow_counts)
     yield ci
Ejemplo n.º 5
0
 def parse_composer(self, response):
     composer = ComposerItem()
     composer['cid'] = response.meta['cid']
     composer['name'] = response.xpath(
         '//p[contains(@class,"creator-name")]/text()').get()
     composer['banner'] = response.xpath(
         '//div[@class="banner-wrap"]/@style').get()[21:-1]
     composer['avatar'] = response.xpath(
         '//span[@class="avator-wrap-s"]/img/@src').get()
     v = response.xpath('//span[@class="author-v yellow-v"]')
     composer['verified'] = 1 if v else 0
     composer['intro'] = response.xpath(
         '//p[contains(@class,"creator-desc")]/text()').get()
     composer['like_counts'] = response.xpath(
         '//span[contains(@class,"like-counts")]/text()').get().replace(
             ',', '')
     composer['fans_counts'] = response.xpath(
         '//span[contains(@class,"fans-counts")]/text()').get().replace(
             ',', '')
     composer['follow_counts'] = response.xpath(
         '//span[@class="follow-wrap"]/span[contains(@class,"fw")]/text()'
     ).get().replace(',', '')
     composer['location'] = response.xpath(
         '//p[contains(@class,"creator-detail")]/span[5]/text()').get()
     composer['career'] = response.xpath(
         '//p[contains(@class,"creator-detail")]/span[last()]/text()').get(
         )
     yield composer
Ejemplo n.º 6
0
 def parse_composer(self, response):
     composer = ComposerItem()
     composer['cid'] = response.meta['cid']
     # 背景大图
     composer['banner'] = response.xpath(
         '//div[@class="banner-wrap"]/@style').get()[21:-1]
     # 用户头像
     composer['avatar'] = response.xpath(
         '//span[@class="avator-wrap-s"]/img/@src').get()
     composer['name'] = response.xpath(
         '//p[contains(@class, "creator-name")]/text()').get()
     composer['intro'] = response.xpath(
         '//p[contains(@class, "creator-desc")]/text()').get()
     # 人气
     composer['like_counts'] = clean(response.xpath(
         '//span[contains(@class, "like-counts")]/text()').get())
     # 粉丝数量
     composer['fans_counts'] = clean(response.xpath(
         '//span[contains(@class, "fans-counts")]/text()').get())
     # 关注数量
     composer['follow_counts'] = clean(response.xpath(
         '//span[@class="follow-wrap"]/span[2]/text()').get())
     # 位置
     composer['location'] = response.xpath(
         '//span[contains(@class, "icon-location")]'
         '/following-sibling::span[1]/text()').get()
     # 职业
     composer['career'] = response.xpath(
         '//span[contains(@class, "icon-career")]'
         '/following-sibling::span[1]/text()').get()
     yield composer
Ejemplo n.º 7
0
    def parse_composer(self, response):
        composer = ComposerItem()
        composer['cid'] = response.meta['cid']
        composer['name'] = response.xpath(
            '//p[contains(@class,"creator-name")]/text()').get()
        # 简介
        composer['intro'] = response.xpath(
            '//p[contains(@class,"creator-desc")]/text()').get()
        # 背景
        composer['banner'] = response.xpath(
            '//div[@class="banner-wrap"]/@style').get()
        if composer['banner']:
            # 提取样式中的图片链接
            composer['banner'] = composer['banner'][21:-1]
        elem = response.xpath('//span[@class="avator-wrap-s"]')
        # 头像
        composer['avatar'] = elem.xpath('./img/@src').get()
        auth_style = elem.xpath('./span/@class').get()
        if auth_style:
            #认证
            composer['verified'] = vip_map.get(auth_style.split(" ")[-1])
        # 人气
        composer['like_counts'] = ci(
            response.xpath(
                '//span[contains(@class,"like-counts")]/text()').get())
        # 粉丝
        composer['fans_counts'] = ci(
            response.xpath(
                '//span[contains(@class,"fans-counts")]/@data-counts').get())
        # 关注
        composer['follow_counts'] = ci(
            response.xpath(
                '//span[@class="follow-wrap"]/span[last()]/text()').get())

        yield composer
Ejemplo n.º 8
0
    def parse_composer(self, response):
        composer = ComposerItem()
        composer['cid'] = response.meta['cid']
        composer['name'] = response.xpath(
            '//p[contains(@class, "creator-name")]/text()').get()
        composer['intro'] = response.xpath(
            '//p[contains(@class, "creator-desc")]/text()').get()
        composer['banner'] = response.xpath(
            '//div[@class="banner-wrap"]/@style').get()
        if composer['banner']:
            composer['banner'] = composer['banner'][21:-1]

        composer['verified'] = response.xpath(
            '//span[@class="avator-wrap-s"]/span/@class').get()
        if composer['verified']:
            composer['verified'] = vip_map.get(
                composer['verified'].split(' ')[-1], 0)
        composer['like_counts'] = convert_int(
            response.xpath(
                '//span[contains(@class,"like-counts")]/text()').get())
        composer['follow_counts'] = convert_int(
            response.xpath(
                '//span[contains(@class,"follow-wrap")]/span[last()]/text()').
            get())
        composer['fans_counts'] = convert_int(
            response.xpath(
                '//span[contains(@class,"fans-counts")]/text()').get())
        composer['avatar'] = response.xpath(
            '//span[@class="avator-wrap-s"]/img/@src').get()
        yield composer
Ejemplo n.º 9
0
 def parse_composer(self, response):
     composer = ComposerItem()
     composer['cid'] = response.meta['cid']
     composer['name'] = response.xpath('//p[contains(@class,"creator-name")]/text()').extract_first()
     composer['banner'] = response.xpath('//div[@class="banner-wrap"]/@style').extract_first()[21:-1]
     elem = response.xpath('//span[@class="avator-wrap-s"]')
     composer['avatar'] = elem.xpath('./img/@src').extract_first()
     auth_style = elem.xpath('./span/@class').get()
     if auth_style:
         composer['verified'] = vip_map.get(auth_style.split(' ')[-1], 0)
     composer['intro'] = strip(response.xpath('//p[contains(@class,"creator-desc")]/text()').extract_first())
     composer['like_counts'] = ci(response.xpath('//span[contains(@class,"like-counts")]/text()').extract_first())
     composer['fans_counts'] = ci(response.xpath('//span[contains(@class,"fans-counts")]/text()').extract_first())
     composer['follow_counts'] = ci(response.xpath('//span[@class="follow-wrap"]/span[2]/text()').extract_first())
     composer['location'] = response.xpath('//span[contains(@class, "icon-location")]/following-sibling::span[1]/text()').get()
     composer['career'] = response.xpath('//span[contains(@class, "icon-career")]/following-sibling::span[1]/text()').get()
     yield composer
Ejemplo n.º 10
0
 def parse_composer(self, response):
     composer = ComposerItem()
     composer['cid'] = response.meta['cid']
     # 用户主页的背景大图
     banner = response.xpath('//div[@class="banner-wrap"]/@style').get()
     if banner:
         composer['banner'] = banner[21:-1]
     # 用户头像
     composer['avatar'] = response.xpath(
         '//span[@class="avator-wrap-s"]/img/@src').get()
     # 用户是否是官方认证用户
     composer['verified'] = response.xpath(
         '//span[@class="avator-wrap-s"]/span/@class').get()
     # 用户名称
     composer['name'] = response.xpath(
         '//p[contains(@class, "creator-name")]/text()').get()
     # 自我介绍
     composer['intro'] = response.xpath(
         '//p[contains(@class, "creator-desc")]/text()').get()
     # 用户被点赞的次数
     composer['like_counts'] = ci(
         response.xpath(
             '//span[contains(@class, "like-counts")]/text()').get())
     # 粉丝数量
     composer['fans_counts'] = response.xpath(
         '//span[contains(@class, "fans-counts")]/@data-counts').get()
     # 关注数量
     composer['follow_counts'] = ci(
         response.xpath(
             '//span[@class="follow-wrap"]/span[2]/text()').get())
     # 用户所在地区,定位到icon-location这个span,然后再取它相邻的下一个span
     location = response.xpath(
         '//span[contains(@class, "icon-location")]/following-sibling::span[1]/text()'
     ).get()
     if location:
         # 处理了一下特殊字符
         composer['location'] = location.strip().replace('\xa0', '-')
     # 用户的职业,xpath同上
     composer['career'] = response.xpath(
         '//span[contains(@class, "icon-career")]/following-sibling::span[1]/text()'
     ).get() or ''
     yield composer