def evaluate_performance(self,filename_out,l_labels): file_out = file(filename_out,'r') l_scores = map(float,map(string.strip,file_out.readlines())) file_out.close() conf_matrix = self.get_confusion_matrix(l_scores,l_labels) sensitivity = div_sec(conf_matrix['TP'],conf_matrix['P']) specificity = div_sec(conf_matrix['TN'],(conf_matrix['TN']+conf_matrix['TP'])) acc = 100*div_sec((conf_matrix['TP']+conf_matrix['TN']),(conf_matrix['P']+conf_matrix['N'])) rec = 100*sensitivity prec = 100*div_sec(conf_matrix['TP'],(conf_matrix['TP']+conf_matrix['FP'])) l_fpr_tpr = self.calculate_roc_values(l_scores, l_labels) roc_area = self.calculate_roc_area(l_fpr_tpr) fscore = self.get_f_score(acc, prec, rec) result = c_result() result.set_confusion_matrix(conf_matrix) result.set_accuracy(acc) result.set_precision(prec) result.set_recall(rec) result.set_fscore(fscore) result.set_roc_values(l_fpr_tpr) result.set_roc_area(roc_area) return result
def calculate_log_likelihood(self,N_pos,N_neg): """ The calculus is taken from the paper: "Comparing Corpora using Fequency Profiling". Paul Rayson and Roger Garside. WCC '00 Proceedings of the workshop on Comparing corpora - Volume 9. 2000 The contingency table is: Positive_set Negative_set ---------------------------- feature n11=no_pos n12=no_neg not_feature n21=Npos-no_pos n22=Nneg-no_neg no_pos = Number of items of the positive set where the feature appears. no_neg = Number of items of the negative set where the feature appears. Npos = Total number of items in the positive set Nneg = Total number of items in the negative set The log-likelihood (LL) measures the relative frequency difference between the positive and negative sets. The higher the value the more significative the difference is. On-line calculator: http://ucrel.lancs.ac.uk/llwizard.html """ n11 = float(self.no_positives) n12 = float(self.no_negatives) n21 = N_pos-n11 n22 = N_neg-n12 coeff = div_sec((n11+n12),(N_pos+N_neg)) E1 = N_pos*coeff E2 = N_neg*coeff try: LL = 2*(n11*log_sec(div_sec(n11,E1))+n12*log_sec(div_sec(n12,E2))) except: print "aqui" self.significance = LL