def parse(self, response): yield UserItem({ 'name': response.css('span.username::text').extract_first(), 'type': response.css('a.member-icon img.user-icon::attr(title)' ).extract_first(default='普通用户'), 'status': response.xpath( '//div[@class="userinfo-banner-status"]/span[1]/text()'). extract_first(), 'job': response.xpath( '//div[@class="userinfo-banner-status"]/span[2]/text()'). extract_first(), 'school': response.xpath('//div[@class="userinfo-banner-status"]/a/text()' ).extract_first(), 'join_date': response.css('span.join-date::text').extract_first(), 'level': response.css('span.user-level::text').extract_first(), 'learn_courses_num': response.css('span.latest-learn-num::text').extract_first() })
def parse(self, response): yield UserItem({ 'name': response.css('span.username::text').extract_first(), 'type': response.css( 'div.pull-left.userinfo-banner-avatar img.user-icon::attr(title)' ).extract_first(default="普通会员"), 'status': response.xpath( '//div[@class="userinfo-banner-status"]/span[1]/text()' ).extract_first(), 'job': response.xpath( '//div[@class="userinfo-banner-status"]/span[2]/text()'). extract_first(), 'school': response.xpath( '//div[@class="userinfo-banner-status"]/span[2]/text()'). extract_first(), 'level': response.css('span.user-level::text').extract_first(), 'join_date': response.css('span.join-date::text').re_first( '\d\d\d\d-\d\d-\d\d'), 'learn_courses_num': response.css('span.latest-learn-num::text').extract_first() })
def parse(self, response): yield UserItem({ 'name': response.xpath('//span[@class="username"]/text()').extract_first(), 'join_date': response.xpath('//span[@class="join-date"]/text()').re_first( '[\d-]*'), 'status': response.xpath( '//div[@class="userinfo-banner-status"]/span[1]/text()' ).extract_first(), 'job': response.xpath( '//div[@class="userinfo-banner-status"]/span[2]/text()'). extract_first(), 'school': response.xpath('//div[@class="userinfo-banner-status"]/a/text()' ).extract_first(), 'type': response.xpath( '//a[@class="member-icon"]/img[@class="user-icon"]/@title'). extract_first('普通用户'), 'level': response.xpath( '//span[@class="user-level"]/text()').extract_first(), 'learn_courses_num': response.xpath( '//span[@class="latest-learn-num"]/text()').extract_first() })
def parse(self,response): item = UserItem( name = response.xpath( '//div[contains(@class,"user-meta")]/span/text()' ).extract_first().strip(), level = response.xpath( '//div[contains(@class,"user-meta")]/span/text()' ).extract()[1].strip(), status = response.xpath( '//div[contains(@class,"user-status")]/span/text()' ).extract_first(default='无').strip(), school_job = response.xpath( '//div[contains(@class,"user-status")]/span[2]/text()' ).extract_first(default='无').strip(), learn_courses_num = response.xpath( '//div[contains(@class,"tabs-left")]/span/text()' ).re_first('\D+(\d+)\D+'), join_date = response.xpath( '//span[contains(@class,"user-join-date")]/text()' ).extract_first().strip() ) if response.xpath('//div[contains(@class,"avatar-container")]/a/div/img/@src').extract(): item['is_vip'] = True yield item
def parse(self, response): item = UserItem({ 'username': response.css('span.username::text').extract_first(), 'userlevel': response.css('span.user-level::text').extract_first(), 'joindate': response.css('span.join-date::text').extract_first(), }) yield item
def parse(self, response): item = UserItem( name = response.xpath('//div[@class="user-meta"]/span/text()').extract()[0].strip(), level = response.xpath('//div[@class="user-meta"]/span/text()').extract()[1].strip(), status = response.xpath('//div[@class="user-status"]/span/text()').extract_first(default='null').strip(), school_job = response.xpath('//div[@class="user-status"]/span[2]/text()').extract_first(default='null').strip(), join_date = response.css('span.user-join-date::text').extract_first().strip(), learn_courses_num = response.xpath('//span[@class="tab-item"]/text()').re_first('\D+(\d+)\D+') ) if len(response.css('div.user-avatar img').extract()) == 2: item['is_vip'] = True yield item
def parse(self, response): yield UserItem({ 'name': response.xpath('//*[@class="user-meta"]/span[1]/text()').re_first( '\S+'), 'join_date': response.xpath('//*[@class="user-join-date"]/text()').re_first( '[0-9]+\-[0-9]+\-[0-9]+'), 'learn_courses_num': response.xpath( '//*[@class="user-courses-data"]/div/div/div/span[1]/text()'). re_first('[0-9]+'), })
def parse(self, response): yield UserItem( name = response.xpath('//div[@class="userinfo-banner-meta"]' '/span[@class="username"]/text()').extract_first(), type = response.xpath('//a[@class="member-icon"]' '/img/@title').extract_first(default='普通会员'), status = response.xpath('//div[@class="userinfo-banner-status"]' '/span[1]/text()').extract_first(), school_job = response.xpath('//div[@class="userinfo-banner-status"]' '/span[2]/text()').extract_first(), join_date = response.xpath( '//span[@class="join-date"]/text()').extract_first(), level = response.css('span.user-level::text').extract_first(), learn_courses_num = response.css( 'span.latest-learn-num::text').extract_first() })