Python compressDocIdTermInstanceTable 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: data

메소드/함수: compressDocIdTermInstanceTable

hotexamples.com에서의 예제들: 2

Python compressDocIdTermInstanceTable - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 data.compressDocIdTermInstanceTable에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

예제 #1

파일 보기

파일: index.py 프로젝트: hellertime/leif

	def compressTermIdData(self,termId):
		return data.compressDocIdTermInstanceTable(self.termIdHash[termId])

예제 #2

파일 보기

파일: index.py 프로젝트: hellertime/leif

	def mergePartitions(self,termIdList,*partitions):
		"""Merge the data from partitions into self
		This must be a method on an ExternalPartition, MemoryPartitions have no concept of merging
		termIdList must contain the sorted list of all termIds in all partitions
		the resulting merged partition will contain one entry for each termId
		"""
		# seek constants... no need to import them from wherever...
		SEEK_END = 2
		SEEK_CUR = 1
		# internal functions to handle some preliminaries
		# makes the main merge code easier to understand, sacrificing overval function length
		def _growPartitionFile(howMuch):
			"""extend the disk partition by howMuch"""
			if os.path.exists(self.path): # prevent trucating existing file
				print >> sys.stderr, "Extending ExternalPartition %s" % self.path
				fp = open(self.path,"rb+")
			else:
				print >> sys.stderr, "Creating ExternalPartiton %s" % self.path
				fp = open(self.path,"wb")

			fp.seek(0,SEEK_END)
			previousSize = fp.tell()
			fp.seek(howMuch - 1,SEEK_CUR)
			fp.write('\x00')
			newSize = fp.tell()
			print >> sys.stderr, "ExternalPartition grew %d bytes, has size %s" % (newSize-previousSize,newSize)
			fp.close()
			self.__mmap_init__()
		def _relocateDocIdTermInstanceTables():
			"""Relocates the existing tables to the end of the file maintaining proper offsets"""
			rp = open(self.path,"rb")
			wp = open(self.path,"rb+")
			wp.seek(0,SEEK_END)

			for termId in reversed(sorted(self.termIdHash)):
				header = self.termIdHash[termId]
				rp.seek(header.offset)
				wp.seek(-header.length,SEEK_CUR)
				newOffset = wp.tell()
				wp.write(rp.read(header.length))
				wp.seek(newOffset)
				self.termIdHash[termId].offset = newOffset

			rp.close()
			wp.close()
		# Main Merge Logic
		spaceNeeded = sum(map(lambda partition: partition.estimateSizeOnDisk(),partitions))
		_growPartitionFile(spaceNeeded)
		_relocateDocIdTermInstanceTables()
		wp = open(self.path,"rb+")

		for termId in termIdList:
			partitionsHoldingTermId = list()
			for partition in partitions:
				if termId in partition:
					partitionsHoldingTermId.append(partition)

			# always add self last
			if termId in self: partitionsHoldingTermId.append(self)
			if len(partitionsHoldingTermId) == 0: continue
			else:
				newOffset = wp.tell()

			if len(partitionsHoldingTermId) == 1:
				partition = partitionsHoldingTermId[0]
				#print >> sys.stderr, "Merge single instance of termId %d from %s" % (termId,partition.name)
				header,compressedData = partition.compressTermIdData(termId)
				wp.write(compressedData)
				header.offset = newOffset
				self.termIdHash[termId] = header

				if partition is not self: partition.deleteTermId(termId)
			else:
				table = data.DocIdTermInstanceTable()
				for partition in partitionsHoldingTermId:
					#print >> sys.stderr, "Merge multi instance termId %s from %s" % (termId,partition.name)
					for docIdTermInstanceVector in partition.lookupTermId(termId):
						docId = docIdTermInstanceVector.docId
						for termInstance in docIdTermInstanceVector.termInstancesGenerator:
							table.insertTermInstanceRecord(docId,termInstance)

					if partition is not self: partition.deleteTermId(termId)

				header,compressedData = data.compressDocIdTermInstanceTable(table) 
				wp.write(compressedData)
				header.offset = newOffset
				self.termIdHash[termId] = header

		wp.truncate()
		print >> sys.stderr, "ExternalPartition was truncated to size %d" % wp.tell()
		wp.close()
		self.__mmap_init__()