コード例 #1
0
 def parse(self, response):
     yield UserItem({
         'name':
         response.css('span.username::text').extract_first(),
         'type':
         response.css('a.member-icon img.user-icon::attr(title)'
                      ).extract_first(default='普通用户'),
         'status':
         response.xpath(
             '//div[@class="userinfo-banner-status"]/span[1]/text()').
         extract_first(),
         'job':
         response.xpath(
             '//div[@class="userinfo-banner-status"]/span[2]/text()').
         extract_first(),
         'school':
         response.xpath('//div[@class="userinfo-banner-status"]/a/text()'
                        ).extract_first(),
         'join_date':
         response.css('span.join-date::text').extract_first(),
         'level':
         response.css('span.user-level::text').extract_first(),
         'learn_courses_num':
         response.css('span.latest-learn-num::text').extract_first()
     })
コード例 #2
0
 def parse(self, response):
     yield UserItem({
         'name':
         response.css('span.username::text').extract_first(),
         'type':
         response.css(
             'div.pull-left.userinfo-banner-avatar img.user-icon::attr(title)'
         ).extract_first(default="普通会员"),
         'status':
         response.xpath(
             '//div[@class="userinfo-banner-status"]/span[1]/text()'
         ).extract_first(),
         'job':
         response.xpath(
             '//div[@class="userinfo-banner-status"]/span[2]/text()').
         extract_first(),
         'school':
         response.xpath(
             '//div[@class="userinfo-banner-status"]/span[2]/text()').
         extract_first(),
         'level':
         response.css('span.user-level::text').extract_first(),
         'join_date':
         response.css('span.join-date::text').re_first(
             '\d\d\d\d-\d\d-\d\d'),
         'learn_courses_num':
         response.css('span.latest-learn-num::text').extract_first()
     })
コード例 #3
0
 def parse(self, response):
     yield UserItem({
         'name':
         response.xpath('//span[@class="username"]/text()').extract_first(),
         'join_date':
         response.xpath('//span[@class="join-date"]/text()').re_first(
             '[\d-]*'),
         'status':
         response.xpath(
             '//div[@class="userinfo-banner-status"]/span[1]/text()'
         ).extract_first(),
         'job':
         response.xpath(
             '//div[@class="userinfo-banner-status"]/span[2]/text()').
         extract_first(),
         'school':
         response.xpath('//div[@class="userinfo-banner-status"]/a/text()'
                        ).extract_first(),
         'type':
         response.xpath(
             '//a[@class="member-icon"]/img[@class="user-icon"]/@title').
         extract_first('普通用户'),
         'level':
         response.xpath(
             '//span[@class="user-level"]/text()').extract_first(),
         'learn_courses_num':
         response.xpath(
             '//span[@class="latest-learn-num"]/text()').extract_first()
     })
コード例 #4
0
ファイル: users.py プロジェクト: perfect-circle/Python-LOU
    def parse(self,response):
        item = UserItem(
                name = response.xpath(
                    '//div[contains(@class,"user-meta")]/span/text()'
                    ).extract_first().strip(),
                level = response.xpath(
                    '//div[contains(@class,"user-meta")]/span/text()'
                    ).extract()[1].strip(),
                status = response.xpath(
                    '//div[contains(@class,"user-status")]/span/text()'
                    ).extract_first(default='无').strip(),
                school_job = response.xpath(
                    '//div[contains(@class,"user-status")]/span[2]/text()'
                    ).extract_first(default='无').strip(),
                learn_courses_num = response.xpath(
                    '//div[contains(@class,"tabs-left")]/span/text()'
                    ).re_first('\D+(\d+)\D+'),
                join_date = response.xpath(
                    '//span[contains(@class,"user-join-date")]/text()'
                    ).extract_first().strip()
                )
        if response.xpath('//div[contains(@class,"avatar-container")]/a/div/img/@src').extract():
            item['is_vip'] = True

        yield item
コード例 #5
0
 def parse(self, response):
     item = UserItem({
         'username': response.css('span.username::text').extract_first(),
         'userlevel': response.css('span.user-level::text').extract_first(),
         'joindate': response.css('span.join-date::text').extract_first(),
     })
     yield item
コード例 #6
0
ファイル: users.py プロジェクト: Achang0121/python-spider
 def parse(self, response):
     item = UserItem(
         name = response.xpath('//div[@class="user-meta"]/span/text()').extract()[0].strip(),
         level = response.xpath('//div[@class="user-meta"]/span/text()').extract()[1].strip(),
         status = response.xpath('//div[@class="user-status"]/span/text()').extract_first(default='null').strip(),
         school_job = response.xpath('//div[@class="user-status"]/span[2]/text()').extract_first(default='null').strip(),
         join_date = response.css('span.user-join-date::text').extract_first().strip(),
         learn_courses_num = response.xpath('//span[@class="tab-item"]/text()').re_first('\D+(\d+)\D+')
         )
     if len(response.css('div.user-avatar img').extract()) == 2:
         item['is_vip'] = True
     yield item
コード例 #7
0
ファイル: users.py プロジェクト: fywest/python
 def parse(self, response):
     yield UserItem({
         'name':
         response.xpath('//*[@class="user-meta"]/span[1]/text()').re_first(
             '\S+'),
         'join_date':
         response.xpath('//*[@class="user-join-date"]/text()').re_first(
             '[0-9]+\-[0-9]+\-[0-9]+'),
         'learn_courses_num':
         response.xpath(
             '//*[@class="user-courses-data"]/div/div/div/span[1]/text()').
         re_first('[0-9]+'),
     })
コード例 #8
0
ファイル: user.py プロジェクト: xiaolitou/shiyanlou-001
 def parse(self, response):
     yield UserItem(
         name = response.xpath('//div[@class="userinfo-banner-meta"]'
                 '/span[@class="username"]/text()').extract_first(),
         type = response.xpath('//a[@class="member-icon"]'
                 '/img/@title').extract_first(default='普通会员'),
         status = response.xpath('//div[@class="userinfo-banner-status"]'
                   '/span[1]/text()').extract_first(),
         school_job = response.xpath('//div[@class="userinfo-banner-status"]'
                       '/span[2]/text()').extract_first(),
         join_date = response.xpath(
                      '//span[@class="join-date"]/text()').extract_first(),
         level = response.css('span.user-level::text').extract_first(),
         learn_courses_num = response.css(
                              'span.latest-learn-num::text').extract_first()
     })