Example #1
0
	def div(self,N):
		
		if not isNumber(N):
			raise Exception("ERROR","mod_svm.%s.div: The object to divide is not a number" % (self.__class_)) 
			
		if N == 0:
			raise Exception("ERROR", "mod_svm.%s.div: The number to divide is zero" % (self.__class_))
		
		self.accuracy  = self.accuracy/N
		self.precision = self.precision/N
		self.recall    = self.recall/N
		self.fscore    = self.fscore/N
		self.roc_area  = self.roc_area/N
		self.confusion_matrix['TP'] = self.confusion_matrix['TP']/N
		self.confusion_matrix['TN'] = self.confusion_matrix['TN']/N
		self.confusion_matrix['FP'] = self.confusion_matrix['FP']/N
		self.confusion_matrix['FN'] = self.confusion_matrix['FN']/N
Example #2
0
	def train_K_fold(self,hash_filename_training=None,model_filename=None,alpha_filename=None):
		
		"""
			In K-fold training the input is not a single pattern file but a set of K files.
			The pattern file is splitted into K pieces and the training is composed by K sub-trainings. 
			In each round the Kth piece is retained and the training is performed with the remaining pieces. The Kth piece is used to validate the model
			
			The hash structure that is needed as input is composed by K keys each of one corresponds to a differerent partition.
			The entry with k key is composed by a tuple. The first element is a file with the training patterns and the other with the validation pattern
		"""
		
		if hash_filename_training==None and model_filename==None and alpha_filename==None:
			hash_filename_training = self.train_filename
			model_filename    = self.modelfilename
			alpha_filename    = self.alpha_filename
			
			if hash_filename_training == None and model_filename == None and alpha_filename == None:
				raise Exception('ERROR','mod_svm.c_svm_light.train_K_fold: The files needed for training had not been provided')
		
		l_folds = sorted(hash_filename_training.keys()) 
		
		if filter(lambda k: not isNumber(k), l_folds) <> []:
			raise Exception('ERROR','c_svm_light.train_K_fold: The input hash for training is not properly built')
		
		(root,ext) = os.path.splitext(model_filename)
		
		log_filename        = root + '.log'
		
		l_results        = []
		l_results_fscore = []
		
		for k in l_folds:
			
			(filename_training,filename_validation) = hash_filename_training[k]
			
			model_filename_k      = root + '.%d.svm' % (k)
			filename_prediction_k = root + '.%d.out' % (k)
			
			self.train(filename_training, model_filename_k, alpha_filename)
			
			val_result = self.test(model_filename_k, filename_validation, filename_prediction_k, log_filename)
			
			l_results_fscore.append((k,val_result.get_fscore()))
			l_results.append((k,val_result))
			
		median_fscore = numpy_median(map(itemgetter(1),l_results_fscore))
		
		k_median = filter(lambda (k,fs): fs==median_fscore, l_results_fscore)[0][0]
		
		result = filter(lambda (k,res): k==k_median, l_results)[0][1]
		
		(filename_training,filename_validation) = hash_filename_training[k_median]
		
		self.train(filename_training, model_filename, alpha_filename)
		
		filename_prediction          =  root + '.out'
		filename_prediction_k_median =  root + '.%d.out' % (k_median)
		
		shutil.copyfile(filename_prediction_k_median, filename_prediction)
						
		return result