def get_log_likelihood(contingency_table):

	(n11,n12,n21,n22) = contingency_table

	g2 = n11*log_sec(n11)+n21*log_sec(n21)+n12*log_sec(n12)+n22*log_sec(n22)-(n11+n21)*log_sec(n11+n21)-(n11+n12)*log_sec(n11+n12)-(n21+n22)*log_sec(n21+n22)-(n12+n22)*log_sec(n12+n22)+(n11+n21+n12+n22)*log_sec(n11+n21+n12+n22)

	return 2*abs(g2)
Exemple #2
0
def get_log_likelihood(contingency_table):

    (n11, n12, n21, n22) = contingency_table

    g2 = n11 * log_sec(n11) + n21 * log_sec(n21) + n12 * log_sec(
        n12) + n22 * log_sec(n22) - (n11 + n21) * log_sec(n11 + n21) - (
            n11 + n12) * log_sec(n11 + n12) - (n21 + n22) * log_sec(
                n21 + n22) - (n12 + n22) * log_sec(n12 + n22) + (
                    n11 + n21 + n12 + n22) * log_sec(n11 + n21 + n12 + n22)

    return 2 * abs(g2)
	def calculate_log_likelihood(self,N_pos,N_neg):
		
		"""
			The calculus is taken from the paper:
				"Comparing Corpora using Fequency Profiling". Paul Rayson and Roger Garside.
				WCC '00 Proceedings of the workshop on Comparing corpora - Volume 9. 2000
		
            The contingency table is:
            
                            Positive_set       Negative_set
                            ----------------------------
            feature           n11=no_pos       n12=no_neg
            not_feature       n21=Npos-no_pos  n22=Nneg-no_neg
            
            no_pos = Number of items of the positive set where the feature appears.
            no_neg = Number of items of the negative set where the feature appears.
            Npos   = Total number of items in the positive set
            Nneg   = Total number of items in the negative set
            
            The log-likelihood (LL) measures the relative frequency difference between the positive and negative
            sets. The higher the value the more significative the difference is.  
            
            On-line calculator: http://ucrel.lancs.ac.uk/llwizard.html
             
        """
		
		n11 = float(self.no_positives)
		n12 = float(self.no_negatives)
		n21 = N_pos-n11
		n22 = N_neg-n12
		
		coeff = div_sec((n11+n12),(N_pos+N_neg))
		
		E1 = N_pos*coeff
		E2 = N_neg*coeff 
		
		try:
			LL = 2*(n11*log_sec(div_sec(n11,E1))+n12*log_sec(div_sec(n12,E2)))
		except:
			print "aqui"
		
		self.significance = LL