Python ZhihuspiderItem.items Examples

Programming Language: Python

Namespace/Package Name: DZhihuSpider.items

Class/Type: ZhihuspiderItem

Method/Function: items

Examples at hotexamples.com: 2

Python ZhihuspiderItem.items - 2 examples found. These are the top rated real world Python examples of DZhihuSpider.items.ZhihuspiderItem.items extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

items(2)

Frequently Used Methods

items (2)

Example #1

Show file

File: ZhihuNotGenRequest.py Project: fdx321/DZhihuSpider

    def parse_about(self, response):
        selector = Selector(response)
        item = ZhihuspiderItem()
        # http://www.zhihu.com/people/jiang-xiao-fan-92-14/about
        # 取jiang-xiao-fan-92-14作为userID
        item['userid'] = response.url.split('/')[-2]
        item['name'] = selector.css('div>.name').xpath('text()').extract()
        item['location'] = selector.css('.location>a').xpath('text()').extract()
        item['business'] = selector.css('.business>a').xpath('text()').extract()
        item['gender'] = selector.css('.gender>i').xpath('@class').extract()
        item['employment'] = selector.css('.employment>a').xpath('text()').extract()
        item['position'] = selector.css('.position>a').xpath('text()').extract()
        item['education'] = selector.css('.education>a').xpath('text()').extract()
        item['major'] = selector.css('.education-extra>a').xpath('text()').extract()

        for (key, val) in item.items():
            if isinstance(val, list) and len(val) > 0:
                item[key] = val[0]
            elif len(val) == 0:
                item[key] = ''

        if item['gender'].find('female') != -1:
            item['gender'] = u'female'
        elif item['gender'].find('male') != -1:
            item['gender'] = u'male'
        else:
            item['gender'] = u'unknown'

        yield item

        while self.server.llen(self.queue_key) > 2000:
            encoded_request = self.server.rpop(self.queue_key)
            if encoded_request:
                request = self.request_from_dict(pickle.loads(encoded_request))
                if request._get_url().find('about') == -1:
                    self.server.lpush(self.queue_key, encoded_request)
                    continue
            yield request
            break

Example #2

Show file

File: Zhihu.py Project: fdx321/DZhihuSpider

    def parse_about(self, response):
        selector = Selector(response)

        item = ZhihuspiderItem()

        # http://www.zhihu.com/people/jiang-xiao-fan-92-14/about
        # 取jiang-xiao-fan-92-14作为userID
        item['userid'] = response.url.split('/')[-2]
        item['name'] = selector.css('div>.name').xpath('text()').extract()
        item['location'] = selector.css('.location>a').xpath('text()').extract()
        item['business'] = selector.css('.business>a').xpath('text()').extract()
        item['gender'] = selector.css('.gender>i').xpath('@class').extract()
        item['employment'] = selector.css('.employment>a').xpath('text()').extract()
        item['position'] = selector.css('.position>a').xpath('text()').extract()
        item['education'] = selector.css('.education>a').xpath('text()').extract()
        item['major'] = selector.css('.education-extra>a').xpath('text()').extract()

        for (key, val) in item.items():
            if isinstance(val, list) and len(val) > 0:
                item[key] = val[0]
            elif len(val) == 0:
                item[key] = ''

        if item['gender'].find('female') != -1:
            item['gender'] = u'female'
        elif item['gender'].find('male') != -1:
            item['gender'] = u'male'
        else:
            item['gender'] = u'unknown'

        # 解析完后,获得关注列表和被关注列表的链接,生成新的请求
        followeesHref = selector.xpath('/html/body/div[3]/div[2]/div[1]/a[1]/@href').extract()[0]
        followersHref = selector.xpath('/html/body/div[3]/div[2]/div[1]/a[2]/@href').extract()[0]
        yield FormRequest(self.host + followeesHref, meta={'cookiejar': 'followeesCookie'}, headers=self.headers,
                          cookies=self.cookies, callback=self.parse_followees)
        yield FormRequest(self.host + followersHref, meta={'cookiejar': 'followersCookie'}, headers=self.headers,
                          cookies=self.cookies, callback=self.parse_followers)

        yield item