コード例 #1
0
ファイル: baseSpider.py プロジェクト: Lewisch/rspid
    def parse(self, response):
        print "Processing data..."
        redditPosts = Selector(response).xpath('//div[@id="siteTable"]')
        items = []
        for post in redditPosts:
            item = redditItems()
            item['postTitle'] = post.xpath('div/div[2]/p[1]/a/text()').extract()
            item['postLink'] = post.xpath('div/div[2]/p[1]/a/@href').extract()
            item['postUpvote'] = post.xpath('div/div[1]/div[3]/text()').extract()
            item['commentLink'] = post.xpath('div/div[2]//ul/li[1]/a/@href').extract()
            item['subcategory'] = response.xpath('//body/div[1]/div[2]/span/a/text()').extract()
            items.append(item)

	return items
コード例 #2
0
ファイル: baseSpider.py プロジェクト: zbef3825/rspid
	def parse(self, response):
		print "Processing data..."
		redditPosts = Selector(response).xpath('//div[@id="siteTable"]')
		items = []
		for post in redditPosts:
			item = redditItems()
			item['postTitle'] = post.xpath('div/div[2]/p[1]/a/text()').extract()
			item['postLink'] = post.xpath('div/div[2]/p[1]/a/@href').extract()
			item['postUpvote'] = post.xpath('div/div[1]/div[3]/text()').extract()
			item['commentLink'] = post.xpath('div/div[2]//ul/li[1]/a/@href').extract()
			item['rankingPosition'] = str(0)
			item['lastUpdate'] = post.xpath('div/div[2]/p[2]/time/@datetime').extract()
			item['postOrigin'] = post.xpath('div/div[2]/p[2]/a/text()').extract()
			items.append(item)

		return items		
コード例 #3
0
ファイル: baseSpider.py プロジェクト: Lewisch/rspid
    def parse(self, response):
        print "Processing data..."
        redditPosts = Selector(response).xpath('//div[@id="siteTable"]')
        items = []
        for post in redditPosts:
            item = redditItems()
            item['postTitle'] = post.xpath(
                'div/div[2]/p[1]/a/text()').extract()
            item['postLink'] = post.xpath('div/div[2]/p[1]/a/@href').extract()
            item['postUpvote'] = post.xpath(
                'div/div[1]/div[3]/text()').extract()
            item['commentLink'] = post.xpath(
                'div/div[2]//ul/li[1]/a/@href').extract()
            item['subcategory'] = response.xpath(
                '//body/div[1]/div[2]/span/a/text()').extract()
            items.append(item)

        return items
コード例 #4
0
    def parse(self, response):
        print "Processing data..."
        redditPosts = Selector(response).xpath('//div[@id="siteTable"]')
        items = []
        for post in redditPosts:
            item = redditItems()
            item['postTitle'] = post.xpath(
                'div/div[2]/p[1]/a/text()').extract()
            item['postLink'] = post.xpath('div/div[2]/p[1]/a/@href').extract()
            item['postUpvote'] = post.xpath(
                'div/div[1]/div[3]/text()').extract()
            item['commentLink'] = post.xpath(
                'div/div[2]//ul/li[1]/a/@href').extract()
            item['rankingPosition'] = str(0)
            item['lastUpdate'] = post.xpath(
                'div/div[2]/p[2]/time/@datetime').extract()
            item['postOrigin'] = post.xpath(
                'div/div[2]/p[2]/a/text()').extract()
            items.append(item)

        return items