def get_log_likelihood(contingency_table): (n11,n12,n21,n22) = contingency_table g2 = n11*log_sec(n11)+n21*log_sec(n21)+n12*log_sec(n12)+n22*log_sec(n22)-(n11+n21)*log_sec(n11+n21)-(n11+n12)*log_sec(n11+n12)-(n21+n22)*log_sec(n21+n22)-(n12+n22)*log_sec(n12+n22)+(n11+n21+n12+n22)*log_sec(n11+n21+n12+n22) return 2*abs(g2)
def get_log_likelihood(contingency_table): (n11, n12, n21, n22) = contingency_table g2 = n11 * log_sec(n11) + n21 * log_sec(n21) + n12 * log_sec( n12) + n22 * log_sec(n22) - (n11 + n21) * log_sec(n11 + n21) - ( n11 + n12) * log_sec(n11 + n12) - (n21 + n22) * log_sec( n21 + n22) - (n12 + n22) * log_sec(n12 + n22) + ( n11 + n21 + n12 + n22) * log_sec(n11 + n21 + n12 + n22) return 2 * abs(g2)
def calculate_log_likelihood(self,N_pos,N_neg): """ The calculus is taken from the paper: "Comparing Corpora using Fequency Profiling". Paul Rayson and Roger Garside. WCC '00 Proceedings of the workshop on Comparing corpora - Volume 9. 2000 The contingency table is: Positive_set Negative_set ---------------------------- feature n11=no_pos n12=no_neg not_feature n21=Npos-no_pos n22=Nneg-no_neg no_pos = Number of items of the positive set where the feature appears. no_neg = Number of items of the negative set where the feature appears. Npos = Total number of items in the positive set Nneg = Total number of items in the negative set The log-likelihood (LL) measures the relative frequency difference between the positive and negative sets. The higher the value the more significative the difference is. On-line calculator: http://ucrel.lancs.ac.uk/llwizard.html """ n11 = float(self.no_positives) n12 = float(self.no_negatives) n21 = N_pos-n11 n22 = N_neg-n12 coeff = div_sec((n11+n12),(N_pos+N_neg)) E1 = N_pos*coeff E2 = N_neg*coeff try: LL = 2*(n11*log_sec(div_sec(n11,E1))+n12*log_sec(div_sec(n12,E2))) except: print "aqui" self.significance = LL