Python GenerateXml Examples

Programming Language: Python

Namespace/Package Name: bilbo.output.GenerateXml

Class/Type: GenerateXml

Examples at hotexamples.com: 2

Python GenerateXml - 2 examples found. These are the top rated real world Python examples of bilbo.output.GenerateXml.GenerateXml extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

simpleComp(1)

Example #1

Show file

File: CRF.py Project: Unaah/bilbo

	def __init__(self, dirResult, options={}):
		"""
		Attributes
		----------
		generateXml : GenerateXml
		dirResult : string
			directory for result files
		"""
		self.generateXml = GenerateXml()
		self.dirResult = dirResult
		self.options = options
		self.dirModel = ""
		main = os.path.realpath(__file__).split('/')
		self.rootDir = "/".join(main[:len(main)-4])

Example #2

Show file

File: CRF.py Project: Unaah/bilbo

class CRF(object):
	"""
	CRF object is created in a Bilbo object
	CRF model learning and test
	"""

	def __init__(self, dirResult, options={}):
		"""
		Attributes
		----------
		generateXml : GenerateXml
		dirResult : string
			directory for result files
		"""
		self.generateXml = GenerateXml()
		self.dirResult = dirResult
		self.options = options
		self.dirModel = ""
		main = os.path.realpath(__file__).split('/')
		self.rootDir = "/".join(main[:len(main)-4])		
		
	def setDirModel(self, dirModel):
		self.dirModel = dirModel
		
		
	def prepareTrain(self, corpus, typeCorpus, fileRes, tr=-1, extOption=-1, optsvm=True):
		"""
		Prepare CRF training data
		
		Parameters
		----------
		corpus : Corpus
		typeCorpus : int, {1, 2, 3}
			1 : corpus 1, 2 : corpus 2...
		fileRes : string
			output file name
		tr : int, {1, 0, -1, -2} (default -1)
			check if training or test data
			1 : train, 0 : test without label, -1 : test with label, -2 : test only label 
		extOption : int, {-1, 1, ...} (default -1)
			extra option for crf training/test data format
			check if data is internal data, if yes we'll use a modified index for corpus type 2
			-1 : data format for SVM 
			1 : data format for normal CRF training/test data 
			2-5 : (not yet provided)
		"""	
		listReferences = corpus.getListReferences(typeCorpus)
		newListReferences = ListReferences(listReferences, typeCorpus)
		extractor = Extract_crf(self.options)
		nbRef = corpus.nbReference(typeCorpus)

		'generation of training index for each reference'
		extractor.randomgen(newListReferences, 1)
				
		'if corpus type 2 and extOption=1, we use a modified index list' #!!!!!!!!!!
		if typeCorpus == 2 and extOption == 1:
			'modify the indices to eliminate the reference (or not print the reference) classified as non-bibl BY SVM'
			if optsvm == True : #if not, do not modify
				extractor.extractIndices(self.dirResult+"svm_predictions_training", newListReferences)
			extractor.extract(typeCorpus, nbRef, self.dirResult+fileRes, newListReferences, tr, extOption)
			
		else: # typeCorpus == 1 or (typeCorpus == 2 and isFrstExt == -1)
			########## SOURCE DATA EXTRACTION FOR SVM OR CORPUS 1 (BUT THESE ARE DIFFERENT !!!)
			extractor.extract(typeCorpus, nbRef, self.dirResult+fileRes, newListReferences, tr, extOption)
		
		return
	

	def prepareTest(self, corpus, typeCorpus, indiceSvm = 0):
		"""
		Prepare CRF test data
		
		Parameters
		----------
		corpus : Corpus
		typeCorpus : int, {1, 2, 3}
			1 : corpus 1, 2 : corpus 2...
		indiceSvm : int, {0, -1, 2}
			0 : normal(corpus 1)
			-1 : data04SVM (corpus2),
			2 : external data => svm isn't called
		"""
		listReferences = corpus.getListReferences(typeCorpus)
		listReferencesObj = ListReferences(listReferences, typeCorpus)
		
		extractor = Extract_crf(self.options)
		nbRef = corpus.nbReference(typeCorpus)
		
		'generation of test index for each reference'
		extractor.randomgen(ListReferences(listReferencesObj.getReferences(),typeCorpus), 0)
		
		if indiceSvm == -1:
			extractor.extract(typeCorpus, nbRef, self.dirResult+"data04SVM_ori.txt", ListReferences(listReferencesObj.getReferences(),typeCorpus))
		else: 
			'file for CRF training'
			if typeCorpus == 2 and indiceSvm != 2 :
				extractor.extractIndices4new(self.dirResult+"svm_predictions_new", ListReferences(listReferencesObj.getReferences(),typeCorpus))
			
			extractor.extract(typeCorpus, nbRef, self.dirResult+"testdatawithlabel_CRF.txt",ListReferences(listReferencesObj.getReferences(),typeCorpus), -1, 1)			
			extractor.extract(typeCorpus, nbRef, self.dirResult+"testdata_CRF.txt",ListReferences(listReferencesObj.getReferences(),typeCorpus), 0, 1)

		return ListReferences(listReferencesObj.getReferences(),typeCorpus)
		

	def runTrain(self, directory, fichier, modelname, penalty=0.00001) :
		"""
		Run CRF training module from Wapiti software
		
		Parameters
		----------
		directory : string 
			directory where we save the model
		fichier : string
			filename that has been generated by preprareTrain
		"""
		dependencyDir = os.path.join(self.rootDir, 'dependencies')
		command = dependencyDir+"/wapiti-1.4.0/wapiti train -p "+self.rootDir+"/KB/config/wapiti/pattern_ref -2 "+str(penalty)+" "+self.dirResult+fichier+" "+directory+modelname
		process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
		process.wait()	

		return
	

	def runTest(self, directory, fichier, modelname, addStr="") :
		"""
		Run CRF test module from Wapiti software to label new data
		
		Parameters
		----------
		directory : string 
			directory where we save the model
		fichier : string
			filename that has been generated by preprareTest
		"""
		dependencyDir = os.path.join(self.rootDir, 'dependencies')
		command = dependencyDir+"/wapiti-1.4.0/wapiti label -m "+directory+modelname+" "+self.dirResult+fichier+" "+self.dirResult+"testEstCRF"+addStr+"_Wapiti.txt"
		process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE)
		process.wait()
	
		'Create testEstCRF.txt keeping only predicted labels'
		fafter = open(self.dirResult+"testEstCRF"+addStr+".txt", 'w')
		for line in open(self.dirResult+"testEstCRF"+addStr+"_Wapiti.txt", 'r') :
			line = line.split()
			if len(line) > 0 : 
				fafter.write(str(line[len(line)-1]))
				fafter.write("\n")
			else : fafter.write("\n")
		fafter.close()
		if addStr == "" :
			self.generateXml.simpleComp(self.dirResult+"testdata_CRF.txt", self.dirResult+'testEstCRF.txt', 2, self.dirResult+'testEstCRF.xml')	
		return
	
	
	def postProcessTest(self, fnameCRFresult, fnameCRFtoAdd, refsAfterSVM):
		"""
		Post-processing of labeling result. After a normal CRF labeling, we return to the SVM classification result,
		then check which notes should be annotated as non-bibliographic ones, then actually modify the labeling result.
		
		Parameters
		----------
		fnameCRFresult : string 
			directory where we save the model
		fnameCRFtoAdd : string
			filename that has been generated by preprareTest
		refsAfterSVM : list of 'Reference' objects
			reference list containing SVM classification result
		"""
		
		fbefore = open(self.dirResult+fnameCRFresult, 'r')
		fafter = open(self.dirResult+fnameCRFtoAdd, 'w')
		
		for reference in refsAfterSVM :
			if reference.train != -1 :
				line = fbefore.readline()
				while (len(line.split()) > 0) :
					fafter.write(str(line))
					line = fbefore.readline()
				fafter.write("\n")
			elif len(reference.getWord()) > 0 : # if there is no word in the reference, it was already ignored in printing before
				line = fbefore.readline()
				while (len(line.split()) > 0) :
					fafter.write("nonbibl \n")
					line = fbefore.readline()
				fafter.write("\n")
		fafter.close()
		fbefore.close()
		
		self.generateXml.simpleComp(self.dirResult+"testdata_CRF.txt", self.dirResult+fnameCRFtoAdd, 2, self.dirResult+'testEstCRF.xml')	
		
		return