Exemple #1
0
    def parse(self, response):
		hxs =  HtmlXPathSelector(response)
		
		videoNameVec = hxs.select('//div/div/div/div/h1/a/text()').extract()
		videoViewNumVec = hxs.select('//ul[@class = "info"]/li[3]/text()').extract()
		videoPubTimeVec = hxs.select('//ul[@class = "info"]/li[2]/text()').extract()		
		videoLengthVec = hxs.select('//ul[@class = "info"]/li[1]/text()').extract()
		
		# check for errors
		numVN = len(videoNameVec)
		numVNV = len(videoViewNumVec)
		numPTV = len(videoViewNumVec)
		numLV = len(videoLengthVec)
	
		if (numVN != numVNV) or (numVN != numPTV) or (numVN != numLV):
			raise NameError( 'fetch failed for tudou' )
	
		# if pass the test, continue to assign to structure
		itemList = []
		for iVideo in range(numVN):
			item = videoItem()
			item['name'] = videoNameVec[iVideo]
			item['viewNum'] = videoViewNumVec[iVideo]
			item['pubTime'] = videoPubTimeVec[iVideo]
			item['length'] = videoLengthVec[iVideo]
			itemList.append(item)
		return	itemList	
Exemple #2
0
    def parse(self, response):
        hxs = HtmlXPathSelector(response)

        videoNameVec = hxs.select('//div/div/div/div/h1/a/text()').extract()
        videoViewNumVec = hxs.select(
            '//ul[@class = "info"]/li[3]/text()').extract()
        videoPubTimeVec = hxs.select(
            '//ul[@class = "info"]/li[2]/text()').extract()
        videoLengthVec = hxs.select(
            '//ul[@class = "info"]/li[1]/text()').extract()

        # check for errors
        numVN = len(videoNameVec)
        numVNV = len(videoViewNumVec)
        numPTV = len(videoViewNumVec)
        numLV = len(videoLengthVec)

        if (numVN != numVNV) or (numVN != numPTV) or (numVN != numLV):
            raise NameError('fetch failed for tudou')

        # if pass the test, continue to assign to structure
        itemList = []
        for iVideo in range(numVN):
            item = videoItem()
            item['name'] = videoNameVec[iVideo]
            item['viewNum'] = videoViewNumVec[iVideo]
            item['pubTime'] = videoPubTimeVec[iVideo]
            item['length'] = videoLengthVec[iVideo]
            itemList.append(item)
        return itemList
Exemple #3
0
	def parse(self, response):
	
		#inspect_response(response)
		
		hxs =  HtmlXPathSelector(response)
		videoNameVec = hxs.select('//div[2]/div/div[2]/div/ul/li/a/text()').extract()
		videoViewNumVec = hxs.select('//ul/li/span[2]/text()').extract()
		videoPubTimeVec = hxs.select('//div[2]/div/ul/li[@class = "v_pub"]/span/text()').extract()
		# The format is 20941 characters from the CJK Unified Ideographs block.
	
		# Check if the length is the same
		numVN = len(videoNameVec)
		numVNV = len(videoViewNumVec)
		numPTV = len(videoViewNumVec)
	
		if (numVN != numVNV) or (numVN != numPTV):
			raise NameError( 'fetch failed for youku' )
	
		# if pass the test, continue to assign to structure
		itemList = []
		for iVideo in range(numVN):
			item = videoItem()
			item['name'] = videoNameVec[iVideo]
			item['viewNum'] = videoViewNumVec[iVideo]
			item['pubTime'] = videoPubTimeVec[iVideo]
			itemList.append(item)

		return	itemList
Exemple #4
0
    def parse(self, response):

        #inspect_response(response)

        hxs = HtmlXPathSelector(response)
        videoNameVec = hxs.select(
            '//div[2]/div/div[2]/div/ul/li/a/text()').extract()
        videoViewNumVec = hxs.select('//ul/li/span[2]/text()').extract()
        videoPubTimeVec = hxs.select(
            '//div[2]/div/ul/li[@class = "v_pub"]/span/text()').extract()
        # The format is 20941 characters from the CJK Unified Ideographs block.

        # Check if the length is the same
        numVN = len(videoNameVec)
        numVNV = len(videoViewNumVec)
        numPTV = len(videoViewNumVec)

        if (numVN != numVNV) or (numVN != numPTV):
            raise NameError('fetch failed for youku')

        # if pass the test, continue to assign to structure
        itemList = []
        for iVideo in range(numVN):
            item = videoItem()
            item['name'] = videoNameVec[iVideo]
            item['viewNum'] = videoViewNumVec[iVideo]
            item['pubTime'] = videoPubTimeVec[iVideo]
            itemList.append(item)

        return itemList