Ejemplo n.º 1
0
	def evaluate_performance(self,filename_out,l_labels):
		
		file_out = file(filename_out,'r')
		l_scores = map(float,map(string.strip,file_out.readlines()))
		file_out.close()
		
		conf_matrix = self.get_confusion_matrix(l_scores,l_labels)
			
		sensitivity = div_sec(conf_matrix['TP'],conf_matrix['P'])
		specificity = div_sec(conf_matrix['TN'],(conf_matrix['TN']+conf_matrix['TP'])) 
		acc         = 100*div_sec((conf_matrix['TP']+conf_matrix['TN']),(conf_matrix['P']+conf_matrix['N']))
		rec         = 100*sensitivity
		prec        = 100*div_sec(conf_matrix['TP'],(conf_matrix['TP']+conf_matrix['FP']))  
			
		l_fpr_tpr = self.calculate_roc_values(l_scores, l_labels)
		roc_area  = self.calculate_roc_area(l_fpr_tpr)
			
		fscore = self.get_f_score(acc, prec, rec)
	
		result = c_result()
		
		result.set_confusion_matrix(conf_matrix)
		result.set_accuracy(acc)
		result.set_precision(prec)
		result.set_recall(rec)
		result.set_fscore(fscore)
		result.set_roc_values(l_fpr_tpr)
		result.set_roc_area(roc_area)
			
		return result
Ejemplo n.º 2
0
	def calculate_log_likelihood(self,N_pos,N_neg):
		
		"""
			The calculus is taken from the paper:
				"Comparing Corpora using Fequency Profiling". Paul Rayson and Roger Garside.
				WCC '00 Proceedings of the workshop on Comparing corpora - Volume 9. 2000
		
            The contingency table is:
            
                            Positive_set       Negative_set
                            ----------------------------
            feature           n11=no_pos       n12=no_neg
            not_feature       n21=Npos-no_pos  n22=Nneg-no_neg
            
            no_pos = Number of items of the positive set where the feature appears.
            no_neg = Number of items of the negative set where the feature appears.
            Npos   = Total number of items in the positive set
            Nneg   = Total number of items in the negative set
            
            The log-likelihood (LL) measures the relative frequency difference between the positive and negative
            sets. The higher the value the more significative the difference is.  
            
            On-line calculator: http://ucrel.lancs.ac.uk/llwizard.html
             
        """
		
		n11 = float(self.no_positives)
		n12 = float(self.no_negatives)
		n21 = N_pos-n11
		n22 = N_neg-n12
		
		coeff = div_sec((n11+n12),(N_pos+N_neg))
		
		E1 = N_pos*coeff
		E2 = N_neg*coeff 
		
		try:
			LL = 2*(n11*log_sec(div_sec(n11,E1))+n12*log_sec(div_sec(n12,E2)))
		except:
			print "aqui"
		
		self.significance = LL