def load_item(self, user_item):
     user = GithubUserItem()
     user['url'] = host + ''.join(user_item.xpath('a/@href').extract())
     user['_id'] = user['username'] = user['url'].split('/')[-1]
     user['url'] = host + "/" + user['_id']
     user['nickname'] = ''.join(
         user_item.xpath(
             'div[@class="user-list-info"]/a/following-sibling::text()[1]').
         extract()).strip()
     user['user_id'] = ''.join(user_item.xpath(
         'a/img/@src').extract()).split("/")[-1].split("?")[0]
     user['location'] = ''.join(
         user_item.xpath('div[@class="user-list-info"]/ul/li/span/text()').
         extract()).strip()
     user['email'] = ''.join(
         user_item.xpath(
             'div[@class="user-list-info"]/ul/li/span/a/text()').extract())
     user['join_date'] = ''.join(
         user_item.xpath(
             'div[@class="user-list-info"]/ul/li/span/time/@datetime').
         extract())
     user['update_time'] = str(datetime.now())
     return user
    def parse_user_all_info(self, response):
        selector = Selector(response)

        user = GithubUserItem()
        user['_id'] = user['username'] = response.url.split('/')[-1]
        user['url'] = response.url
        user['update_time'] = str(datetime.now())

        if len(
                selector.xpath(
                    '//div[@itemtype="http://schema.org/Organization"]').
                extract()) > 0:
            user['type'] = 1
            user['nickname'] = ''.join(
                selector.xpath('//div[@class="org-header-info"]/h1/span/text()'
                               ).extract())
            user['user_id'] = ''.join(
                selector.xpath('//div[@class="org-header-wrapper"]/img/@src').
                extract()).split("/")[-1].split("?")[0]

            user['location'] = ''.join(
                selector.xpath(
                    '//div[@class="org-header-info"]/ul/li[@class="meta-item"]/span[@itemprop="location"]/text()'
                ).extract())
            user['website'] = ''.join(
                selector.xpath(
                    '//div[@class="org-header-info"]/ul/li[@class="meta-item"]/a[@itemprop="url"]/text()'
                ).extract())
            user['email'] = ''.join(
                selector.xpath(
                    '//div[@class="org-header-info"]/ul/li[@class="meta-item"]/a[@itemprop="email"]/text()'
                ).extract())

            user['member_num'] = ''.join(
                selector.xpath(
                    '//div[@class="org-module simple-box"]/h3[@class="org-module-title org-members-title"]/a/span[1]'
                ).extract())
        else:
            user['type'] = 0
            user['nickname'] = ''.join(
                selector.xpath(
                    '//div[@class="column one-fourth vcard"]/h1/span[@class="vcard-fullname"]/text()'
                ).extract())
            user['user_id'] = ''.join(
                selector.xpath(
                    '//div[@class="column one-fourth vcard"]/a[1]/@href').
                extract()).split("/")[-1].split("?")[0]

            user['company'] = ''.join(
                selector.xpath(
                    '//ul[@class="vcard-details"]/li[@itemprop="worksFor"]/text()'
                ).extract())
            user['location'] = ''.join(
                selector.xpath(
                    '//ul[@class="vcard-details"]/li[@itemprop="homeLocation"]/text()'
                ).extract())
            user['email'] = urllib.unquote(''.join(
                selector.xpath(
                    '//ul[@class="vcard-details"]/li[@class="vcard-detail"]/a[@class="email"]/text()'
                ).extract()))
            user['website'] = ''.join(
                selector.xpath(
                    '//ul[@class="vcard-details"]/li[@class="vcard-detail"]/a[@class="url"]/text()'
                ).extract())
            user['join_date'] = ''.join(
                selector.xpath(
                    '//ul[@class="vcard-details"]/li[@class="vcard-detail"]/time[@class="join-date"]/@datetime'
                ).extract())

            nums = selector.xpath(
                '//div[@class="column one-fourth vcard"]/div[@class="vcard-stats"]/a/strong/text()'
            ).extract()
            user['follower_num'] = nums[0]
            user['star_num'] = nums[1]
            user['followee_num'] = nums[2]
            user['organizations'] = selector.xpath(
                '//div[@class="clearfix"]/a[@itemprop="follows"]/@href'
            ).extract()

        yield user
    def parse_user(self, response):
        selector = Selector(response)

        user = GithubUserItem()
        user['_id'] = user['username'] = response.url.split('/')[-1]
        user['url'] = response.url

        if len(
                selector.xpath(
                    '//div[@itemtype="http://schema.org/Organization"]').
                extract()) > 0:
            user['type'] = "Org"
            user['nickname'] = ''.join(
                selector.xpath('//div[@class="org-header-info"]/h1/span/text()'
                               ).extract())
            user['location'] = ''.join(
                selector.xpath(
                    '//ul[@class="org-header-meta"]/li[@class="meta-item"]/span[@itemprop="location"]/text()'
                ).extract())
            user['email'] = urllib.unquote(''.join(
                selector.xpath(
                    '//ul[@class="org-header-meta"]/li[@class="meta-item"]/a[@itemprop="email"]/text()'
                ).extract()))
            user['website'] = ''.join(
                selector.xpath(
                    '//ul[@class="org-header-meta"]/li[@class="meta-item"]/a[@itemprop="url"]/text()'
                ).extract())
            user['member_num'] = ''.join(
                selector.xpath(
                    '//div[@class="org-module simple-box"]/h3[@class="org-module-title org-members-title"]/a/span[1]'
                ).extract())
            user['update_time'] = str(datetime.now())
        else:
            user['type'] = "User"
            user['user_id'] = ''.join(
                selector.xpath(
                    '//div[@class="column one-fourth vcard"]/a/@href').extract(
                    )).split("/")[-1].split("?")[0]
            user['nickname'] = ''.join(
                selector.xpath(
                    '//div[@class="column one-fourth vcard"]/h1/span[@class="vcard-fullname"]/text()'
                ).extract())
            user['company'] = ''.join(
                selector.xpath(
                    '//ul[@class="vcard-details"]/li/span[@itemprop="worksFor"]/text()'
                ).extract())
            user['location'] = ''.join(
                selector.xpath(
                    '//ul[@class="vcard-details"]/li[@itemprop="homeLocation"]/text()'
                ).extract())
            user['email'] = urllib.unquote(''.join(
                selector.xpath(
                    '//ul[@class="vcard-details"]/li[@class="vcard-detail"]/a[starts-with(@href,"mailto")]/@data-email'
                ).extract()))
            user['website'] = ''.join(
                selector.xpath(
                    '//ul[@class="vcard-details"]/li[@class="vcard-detail"]/a[@class="url"]/text()'
                ).extract())
            user['join_date'] = ''.join(
                selector.xpath(
                    '//ul[@class="vcard-details"]/li[@class="vcard-detail"]/span[@class="join-date"]/text()'
                ).extract())

            user['follower_num'] = ''.join(
                selector.xpath('//div[@class="vcard-stats"]/a[1]/strong/text()'
                               ).extract())
            user['star_num'] = ''.join(
                selector.xpath('//div[@class="vcard-stats"]/a[2]/strong/text()'
                               ).extract())
            user['followee_num'] = ''.join(
                selector.xpath('//div[@class="vcard-stats"]/a[3]/strong/text()'
                               ).extract())
            #user['repo_num'] = ''
            user['org_num'] = str(
                len(
                    selector.xpath(
                        '//div[@class="vcard-orgs"]/div/a').extract()))
            user['update_time'] = str(datetime.now())

        yield user
        print 'NEW:%s' % user['username']