Python Partition.getDirPartitionNames 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: partition

클래스/타입: Partition

메소드/함수: getDirPartitionNames

hotexamples.com에서의 예제들: 4

Python Partition.getDirPartitionNames - 4개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 partition.Partition.getDirPartitionNames에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

filesystem(5)

type(5)

start(5)

size(5)

getDirTestNames(4)

bootable(4)

getDirPartitionNames(4)

components(4)

Partition(3)

union(2)

is_hook(2)

find(2)

findHash(2)

findShiftedValue(2)

is_one_dimensional(2)

is_self_conjugate(2)

getDirPercentName(2)

getList(2)

run(1)

segment(1)

read_file(1)

sets(1)

start_partition(1)

split_partition(1)

num_row_perms(1)

update(1)

updateDiskHashBackupTable1(1)

updateDiskHashTable(1)

updateDiskHashTable1(1)

wait_partition_destination(1)

process(1)

is_2special(1)

merge(1)

extend(1)

add_state(1)

clip_id_to_side(1)

conjugate(1)

decode_partition_type(1)

discrete(1)

evaluate(1)

finish_partition(1)

make_group(1)

generate_partitioned_queries_learning(1)

get_content(1)

get_file(1)

get_spec(1)

grid(1)

__len__(1)

length(1)

wrap_partitions_from_dict_arr(1)

예제 #1

파일 보기

파일: bilboTrain.py 프로젝트: OpenEdition/bilbo

class bibloTrain():
	def __init__(self, bilboOptions, dirCorpus, testPercentage, numberOfPartition=10, prefix=''):
		self.bilboOptions = bilboOptions
		self.bilboOptions.T = True
		self.bilboOptions.L = False
		self.bilboOptions.t = 'bibl'
		#self.bilboOptions.k = 'all'
		#print self.bilboOptions
		self.partitions = Partition(dirCorpus, testPercentage, numberOfPartition, prefix)
		self.dirPartitions = self.partitions.getDirPartitionNames()
	
	def train(self):
		for dirPartition in self.dirPartitions:
			print "dirPartition", dirPartition
			(annotateDir, testDir, trainDir, modelDir, resultDir) = self.partitions.getDirTestNames(dirPartition)
			
			self._del_tmp_file(trainDir) # tmp file of test data are here
			bilbo = Bilbo(modelDir, self.bilboOptions, "crf_model_simple") # tmpFiles saved in modelDir if -k all
			bilbo.train(trainDir, modelDir, 1)

	def _del_tmp_file(self, resultDir):
		pattern = os.path.join(resultDir,'tmp*')
		tmpDirs = glob.glob(pattern)
		for tmpDir in tmpDirs:
			shutil.rmtree(tmpDir)

예제 #2

파일 보기

파일: bilboEval.py 프로젝트: OpenEdition/bilbo

class bilboEval():
	def __init__(self, dirCorpus, testPercentage, numberOfPartition=10, prefix=''):
		self.partitions = Partition(dirCorpus, testPercentage, numberOfPartition, prefix)
		self.dirPartitions = self.partitions.getDirPartitionNames()

	def eval(self):
		allValues = []
		for dirPartition in self.dirPartitions:
			#print "dirPartition", dirPartition
			(annotateDir, testDir, trainDir, modelDir, resultDir) = self.partitions.getDirTestNames(dirPartition)
			
			labeledContent = self._getFile(resultDir, 'testEstCRF_Wapiti.txt')
			desiredContent = self._getFile(trainDir, 'evaldata_CRF_Wapiti.txt') # tmpFiles from training of testDir are saved in trainDir !

			# harmonize the two lists, they are not tokenized the same way
			desiredContentHarmonized, labeledContentHarmonized = prepareEval.prepareEval(desiredContent, labeledContent)

			self._saveFile(labeledContentHarmonized, resultDir, 'annotatedEval.txt')
			self._saveFile(desiredContentHarmonized, resultDir, 'desiredEval.txt')
			
			evalText, labels, values = TokenAccuracyEval.evaluate(labeledContentHarmonized, desiredContentHarmonized)
			allValues.append(values)
			self._saveFile(evalText, dirPartition, 'evaluation.txt')
		
		# calculate average of results for all partitions
		average = [float(sum(col))/len(col) for col in zip(*allValues)]
		allValues.append(average)
		
		# print all results and average on the last line
		finalEval = "\t".join(labels) + "\n"
		finalEval += "\n".join(["\t".join(['{:f}'.format(v) for v in values]) for values in allValues])
		self._saveFile(finalEval, self.partitions.getDirPercentName(), 'evaluation.tsv')

	def _getFile(self, fileDir, pattern):
		pattern = os.path.join(fileDir,'tmp*', pattern)
		files = glob.glob(pattern)
		with open(files[0], 'r', encoding='utf-8') as content_file:
			content = content_file.read()
			return content

	def _saveFile(self, content, dirName, fileName):
		fileName = os.path.join(dirName, fileName)
		with open(fileName, 'w', encoding='utf-8') as content_file:
			content_file.write(content)

예제 #3

파일 보기

파일: bilboAnnotate.py 프로젝트: ansdma/bilbo

class bilboAnnotate():
	def __init__(self, bilboOptions, dirCorpus, testPercentage, numberOfPartition=10, prefix=''):
		self.bilboOptions = bilboOptions
		self.bilboOptions.L = True
		self.bilboOptions.T = False
		self.bilboOptions.t = 'bibl'
		self.bilboOptions.k = 'all'
		self.bilboOptions.o = 'simple'
		#print self.bilboOptions
		
		self.partitions = Partition(dirCorpus, testPercentage, numberOfPartition, prefix)
		self.dirPartitions = self.partitions.getDirPartitionNames()

	def annotate(self):
		for dirPartition in self.dirPartitions:
			(annotateDir, testDir, trainDir, modelDir, resultDir) = self.partitions.getDirTestNames(dirPartition)
			
			# annotation of test data striped tagged
			self._setBilboAnnotate()
			self._del_tmp_file(resultDir)
			bilbo = Bilbo(resultDir, self.bilboOptions, "crf_model_simple")
			bilbo.annotate(annotateDir, modelDir, 1)
			
			# train with test data for evaluation
			self._setBilboTrain()
			self._del_tmp_file(trainDir)
			bilbo = Bilbo(trainDir, self.bilboOptions, "crf_model_simple") # To save tmpFiles in testDir
			corpus = Corpus(testDir, self.bilboOptions)
			corpus.extract(1, "bibl")
			bilbo.crf.prepareTrain(corpus, 1, "evaldata_CRF.txt", 1, 1) #CRF training data extraction

	def _setBilboAnnotate(self):
		self.bilboOptions.L = True
		self.bilboOptions.T = False
	
	def _setBilboTrain(self):
		self.bilboOptions.L = False
		self.bilboOptions.T = True

	def _del_tmp_file(self, resultDir):
		pattern = os.path.join(resultDir,'tmp*')
		tmpDirs = glob.glob(pattern)
		for tmpDir in tmpDirs:
			shutil.rmtree(tmpDir)

예제 #4

파일 보기

파일: bilboEval.py 프로젝트: ansdma/bilbo

class bilboEval():
	def __init__(self, dirCorpus, testPercentage, numberOfPartition=10, prefix=''):
		self.partitions = Partition(dirCorpus, testPercentage, numberOfPartition, prefix)
		self.dirPartitions = self.partitions.getDirPartitionNames()

	def eval(self):
		allValues = []
		for dirPartition in self.dirPartitions:
			#print "dirPartition", dirPartition
			(annotateDir, testDir, trainDir, modelDir, resultDir) = self.partitions.getDirTestNames(dirPartition)
			
			testEstCRF = self._getFile(resultDir, 'testEstCRF_Wapiti.txt')
			testEstCRFFormated = self._formatEval(testEstCRF)
			##print testEstCRFFormated
			
			desiredResult = self._getFile(trainDir, 'evaldata_CRF_Wapiti.txt')
			desiredResultFormated = self._formatEval(desiredResult)
			#print desiredResultFormated

			desiredResultFormated, testEstCRFFormated = self._harmonizeList(desiredResultFormated, testEstCRFFormated)

			testEstCRFFormated = "\n".join(testEstCRFFormated)
			desiredResultFormated = "\n".join(desiredResultFormated)
			self._saveFile(testEstCRFFormated, resultDir, 'annotatedEval.txt')
			self._saveFile(desiredResultFormated, resultDir, 'desiredEval.txt')
			
			evalText, labels, values = TokenAccuracyEval.evaluate(testEstCRFFormated, desiredResultFormated)
			allValues.append(values)
			self._saveFile(evalText, dirPartition, 'evaluation.txt')
		
		average = [float(sum(col))/len(col) for col in zip(*allValues)]
		allValues.append(average)
		
		finalEval = "\t".join(labels) + "\n"
		finalEval += "\n".join(["\t".join(['{:f}'.format(v) for v in values]) for values in allValues])
		self._saveFile(finalEval, self.partitions.getDirPercentName(), 'evaluation.tsv')

	def _getFeatureAndName(self, token):
		words = token.split("\t")
		feature = words[0]
		name = words[1] if len(words) > 1 else ""
		if len(name.split()) > 1:
			name = "".join(name.split()) # kind of a bug: get rid of non printing utf-8 characters
		return feature, name

	# output is not the same length, before debug, dirty solution to harmonise output
	def _harmonizeList(self, shortList, longList):
		indexLong = 0
		indexShort = 0
		lengthShort = len(shortList)
		lengthLong = len(longList)
		newShortList = []
		newLongList = []
		while True:
			featureShort, partShort = self._getFeatureAndName(shortList[indexShort])
			featureLong , partLong = self._getFeatureAndName(longList[indexLong])
			
			while True:
				if partShort == partLong:
					#print indexShort, partShort.encode('utf8'), indexLong, partLong.encode('utf8'), "RESOLVED"
					break
				#print indexShort, partShort.encode('utf8'), len(partShort), indexLong, partLong.encode('utf8'), len(partLong)
				if partShort < partLong:
					indexShort +=1
					_, partShortAppend = self._getFeatureAndName(shortList[indexShort])
					partShort += partShortAppend
				else:
					indexLong +=1
					_, partLongAppend  = self._getFeatureAndName(longList[indexLong])
					partLong += partLongAppend

			textShort = featureShort + "\t" + partShort if partShort else ''
			newShortList.append(textShort)
			textLong = featureLong + "\t" + partLong if partLong else ''
			newLongList.append(textLong)
			
			indexShort += 1
			indexLong += 1
			
			if indexShort == lengthShort or indexLong == lengthLong:
				break
		
		#print str(len(newShortList)), indexShort, lengthShort, str(len(newLongList)), indexLong, lengthLong
		return newShortList, newLongList

	def _getFile(self, fileDir, pattern):
		pattern = os.path.join(fileDir,'tmp*', pattern)
		files = glob.glob(pattern)
		with open(files[0], 'r', encoding='utf-8') as content_file:
			content = content_file.read()
			return content

	def _saveFile(self, content, dirName, fileName):
		fileName = os.path.join(dirName, fileName)
		with open(fileName, 'w', encoding='utf-8') as content_file:
			content_file.write(content)

	def _formatEval(self, content):
		formated = []
		for line in content.split("\n"):
			words = line.split(" ")
			#print words
			if len(words)>1:
				formated.append(words[-1].strip() + "\t" + words[0].strip())
				#formated.append(words[0])
			else:
				formated.append('')
		return formated