def parse_composer(self, response): banner = response.xpath('//div[@class="banner-wrap"]/@style').get() composer = ComposerItem() composer['cid'] = response.meta['cid'] composer['banner'] = re.findall(r'background-image:url\((.+?)\)',banner) composer['avatar'] = response.xpath( '//span[@class="avator-wrap-s"]/img/@src').get() composer['name'] = response.xpath( '//p[contains(@class, "creator-name")]/text()').get() composer['intro'] = response.xpath( '//p[contains(@class, "creator-desc")]/text()').get() composer['like_counts'] = ci(response.xpath( '//span[contains(@class, "like-counts")]/text()').get()) composer['fans_counts'] = response.xpath( '//span[contains(@class, "fans-counts")]/@data-counts').get() composer['follow_counts'] = ci(response.xpath( '//span[@class="follow-wrap"]/span[2]/text()').get()) composer['location'] = response.xpath( '//span[contains(@class, "icon-location")]/' 'following-sibling::span[1]/text()').get() or '' composer['career'] = response.xpath( '//span[contains(@class, "icon-career")]/' 'following-sibling::span[1]/text()').get() or '' yield composer
def parse_composer(self, response): print('****创作人××××' * 5) composer = ComposerItem() banner = response.xpath( '//div[contains(@class,"banner-linear")]').get() composer['cid'] = response.meta['cid'] composer['banner'] = re.findall('src\=\"(.+?)\"\>', banner)[0] composer['avatar'] = re.findall('src\=\"(.+?)\"\>', banner)[1] composer['name'] = response.xpath( '//p[contains(@class,"creator-name")]/text()').get().strip() composer['intro'] = response.xpath( '//p[contains(@class,"creator-desc")]/text()').get() composer['like_counts'] = convert_int( response.xpath( '//span[contains(@class,"like-counts")]/text()').get()) composer['fans_counts'] = convert_int( response.xpath( '//span[contains(@class,"fans-counts")]/text()').get()) composer['follow_counts'] = convert_int( response.xpath('//span[@class="fw_600 v-center"]/text()').get()) composer['location'] = response.xpath( '//span[contains(@class,"icon-location")]/following-sibling::span[1]/text()' ).get() or '' composer['career'] = response.xpath( '//span[contains(@class,"icon-career")]/following-sibling::span[1]/text()' ).get() or '' yield composer
def parse_composer(self, response): composer = ComposerItem() banner = response.xpath('//div[@class="banner-wrap"]/@style').get() composer['cid'] = response.xpath( '//div[@class="creator-info"]//span/@data-userid').get() composer['banner'], = re.findall(r'background-image:url\((.+?)\)', banner) composer['avatar'] = response.xpath( '//span[@class="avator-wrap-s"]/img/@src').get() composer['verified'] = response.xpath( '//span[@class="avator-wrap-s"]/span/@class').get() composer['name'] = response.xpath( '//p[contains(@class, "creator-name")]/text()').get() composer['intro'] = response.xpath( '//p[contains(@class, "creator-desc")]/text()').get() like_counts = response.xpath( '//span[contains(@class, "like-counts")]/text()').get() like_counts = like_counts.replace(',', '') if like_counts else '' composer['like_counts'] = like_counts composer['fans_counts'] = response.xpath( '//span[contains(@class, "fans-counts")]/@data-counts').get() composer['follow_counts'] = response.xpath( '//span[@class="follow-wrap"]/span[2]/text()').get() composer['location'] = response.xpath( '//span[contains(@class,"icon-location")]/following-sibling::span[1]/text()' ).get() composer['career'] = response.xpath( '//span[contains(@class,"icon-career")]/following-sibling::span[1]/text()' ).get() # from scrapy.shell import inspect_response # if composer['banner'] is None: # inspect_response(response, self) yield composer
def parse_composer(self, response): ci = ComposerItem() ci['cid'] = response.meta['composer_id'] banner_url = response.xpath('//div[@class="banner-wrap"]/@style').get() banner_url = str(banner_url).split('(')[1][:-1] ci['banner'] = banner_url ci['avatar'] = response.xpath( '//span[@class="avator-wrap-s"]/img/@src').get() verified = response.xpath( '//span[@class="avator-wrap-s"]//span[contains(@class,"author-v")]/@class' ) if verified: ci['verified'] = True else: ci['verified'] = False ci['name'] = response.xpath( '//p[contains(@class,"creator-name")]/text()').get() ci['intro'] = response.xpath( '//p[contains(@class,"creator-desc")]/text()').get() ci['like_counts'] = num_to_int( response.xpath( '//span[contains(@class,"like-counts")]/text()').get()) ci['fans_counts'] = num_to_int( response.xpath( '//span[contains(@class,"fans-counts")]/text()').get()) follow_counts = response.xpath( '//span[@class="follow-wrap"]/span[contains(@class,"fw_600")]/text()' ).get() ci['follow_counts'] = num_to_int(follow_counts) yield ci
def parse_composer(self, response): composer = ComposerItem() composer['cid'] = response.meta['cid'] composer['name'] = response.xpath( '//p[contains(@class,"creator-name")]/text()').get() composer['banner'] = response.xpath( '//div[@class="banner-wrap"]/@style').get()[21:-1] composer['avatar'] = response.xpath( '//span[@class="avator-wrap-s"]/img/@src').get() v = response.xpath('//span[@class="author-v yellow-v"]') composer['verified'] = 1 if v else 0 composer['intro'] = response.xpath( '//p[contains(@class,"creator-desc")]/text()').get() composer['like_counts'] = response.xpath( '//span[contains(@class,"like-counts")]/text()').get().replace( ',', '') composer['fans_counts'] = response.xpath( '//span[contains(@class,"fans-counts")]/text()').get().replace( ',', '') composer['follow_counts'] = response.xpath( '//span[@class="follow-wrap"]/span[contains(@class,"fw")]/text()' ).get().replace(',', '') composer['location'] = response.xpath( '//p[contains(@class,"creator-detail")]/span[5]/text()').get() composer['career'] = response.xpath( '//p[contains(@class,"creator-detail")]/span[last()]/text()').get( ) yield composer
def parse_composer(self, response): composer = ComposerItem() composer['cid'] = response.meta['cid'] # 背景大图 composer['banner'] = response.xpath( '//div[@class="banner-wrap"]/@style').get()[21:-1] # 用户头像 composer['avatar'] = response.xpath( '//span[@class="avator-wrap-s"]/img/@src').get() composer['name'] = response.xpath( '//p[contains(@class, "creator-name")]/text()').get() composer['intro'] = response.xpath( '//p[contains(@class, "creator-desc")]/text()').get() # 人气 composer['like_counts'] = clean(response.xpath( '//span[contains(@class, "like-counts")]/text()').get()) # 粉丝数量 composer['fans_counts'] = clean(response.xpath( '//span[contains(@class, "fans-counts")]/text()').get()) # 关注数量 composer['follow_counts'] = clean(response.xpath( '//span[@class="follow-wrap"]/span[2]/text()').get()) # 位置 composer['location'] = response.xpath( '//span[contains(@class, "icon-location")]' '/following-sibling::span[1]/text()').get() # 职业 composer['career'] = response.xpath( '//span[contains(@class, "icon-career")]' '/following-sibling::span[1]/text()').get() yield composer
def parse_composer(self, response): composer = ComposerItem() composer['cid'] = response.meta['cid'] composer['name'] = response.xpath( '//p[contains(@class,"creator-name")]/text()').get() # 简介 composer['intro'] = response.xpath( '//p[contains(@class,"creator-desc")]/text()').get() # 背景 composer['banner'] = response.xpath( '//div[@class="banner-wrap"]/@style').get() if composer['banner']: # 提取样式中的图片链接 composer['banner'] = composer['banner'][21:-1] elem = response.xpath('//span[@class="avator-wrap-s"]') # 头像 composer['avatar'] = elem.xpath('./img/@src').get() auth_style = elem.xpath('./span/@class').get() if auth_style: #认证 composer['verified'] = vip_map.get(auth_style.split(" ")[-1]) # 人气 composer['like_counts'] = ci( response.xpath( '//span[contains(@class,"like-counts")]/text()').get()) # 粉丝 composer['fans_counts'] = ci( response.xpath( '//span[contains(@class,"fans-counts")]/@data-counts').get()) # 关注 composer['follow_counts'] = ci( response.xpath( '//span[@class="follow-wrap"]/span[last()]/text()').get()) yield composer
def parse_composer(self, response): composer = ComposerItem() composer['cid'] = response.meta['cid'] composer['name'] = response.xpath( '//p[contains(@class, "creator-name")]/text()').get() composer['intro'] = response.xpath( '//p[contains(@class, "creator-desc")]/text()').get() composer['banner'] = response.xpath( '//div[@class="banner-wrap"]/@style').get() if composer['banner']: composer['banner'] = composer['banner'][21:-1] composer['verified'] = response.xpath( '//span[@class="avator-wrap-s"]/span/@class').get() if composer['verified']: composer['verified'] = vip_map.get( composer['verified'].split(' ')[-1], 0) composer['like_counts'] = convert_int( response.xpath( '//span[contains(@class,"like-counts")]/text()').get()) composer['follow_counts'] = convert_int( response.xpath( '//span[contains(@class,"follow-wrap")]/span[last()]/text()'). get()) composer['fans_counts'] = convert_int( response.xpath( '//span[contains(@class,"fans-counts")]/text()').get()) composer['avatar'] = response.xpath( '//span[@class="avator-wrap-s"]/img/@src').get() yield composer
def parse_composer(self, response): composer = ComposerItem() composer['cid'] = response.meta['cid'] composer['name'] = response.xpath('//p[contains(@class,"creator-name")]/text()').extract_first() composer['banner'] = response.xpath('//div[@class="banner-wrap"]/@style').extract_first()[21:-1] elem = response.xpath('//span[@class="avator-wrap-s"]') composer['avatar'] = elem.xpath('./img/@src').extract_first() auth_style = elem.xpath('./span/@class').get() if auth_style: composer['verified'] = vip_map.get(auth_style.split(' ')[-1], 0) composer['intro'] = strip(response.xpath('//p[contains(@class,"creator-desc")]/text()').extract_first()) composer['like_counts'] = ci(response.xpath('//span[contains(@class,"like-counts")]/text()').extract_first()) composer['fans_counts'] = ci(response.xpath('//span[contains(@class,"fans-counts")]/text()').extract_first()) composer['follow_counts'] = ci(response.xpath('//span[@class="follow-wrap"]/span[2]/text()').extract_first()) composer['location'] = response.xpath('//span[contains(@class, "icon-location")]/following-sibling::span[1]/text()').get() composer['career'] = response.xpath('//span[contains(@class, "icon-career")]/following-sibling::span[1]/text()').get() yield composer
def parse_composer(self, response): composer = ComposerItem() composer['cid'] = response.meta['cid'] # 用户主页的背景大图 banner = response.xpath('//div[@class="banner-wrap"]/@style').get() if banner: composer['banner'] = banner[21:-1] # 用户头像 composer['avatar'] = response.xpath( '//span[@class="avator-wrap-s"]/img/@src').get() # 用户是否是官方认证用户 composer['verified'] = response.xpath( '//span[@class="avator-wrap-s"]/span/@class').get() # 用户名称 composer['name'] = response.xpath( '//p[contains(@class, "creator-name")]/text()').get() # 自我介绍 composer['intro'] = response.xpath( '//p[contains(@class, "creator-desc")]/text()').get() # 用户被点赞的次数 composer['like_counts'] = ci( response.xpath( '//span[contains(@class, "like-counts")]/text()').get()) # 粉丝数量 composer['fans_counts'] = response.xpath( '//span[contains(@class, "fans-counts")]/@data-counts').get() # 关注数量 composer['follow_counts'] = ci( response.xpath( '//span[@class="follow-wrap"]/span[2]/text()').get()) # 用户所在地区,定位到icon-location这个span,然后再取它相邻的下一个span location = response.xpath( '//span[contains(@class, "icon-location")]/following-sibling::span[1]/text()' ).get() if location: # 处理了一下特殊字符 composer['location'] = location.strip().replace('\xa0', '-') # 用户的职业,xpath同上 composer['career'] = response.xpath( '//span[contains(@class, "icon-career")]/following-sibling::span[1]/text()' ).get() or '' yield composer