Example #1
0
    def from_counts(cls, alphabet, counts, prior= None):
        """Build a LogoData object from counts."""
        # Counts is a Motif object?
        #counts = counts.array
        
        seq_length, A = counts.shape
        
        if prior is not None: prior = array(prior, float64)
        
        if prior is None or sum(prior)==0.0:
            R = log(A)
            ent = zeros(  seq_length, float64)
            entropy_interval = None    
            for i in range (0, seq_length) :
                C = sum(counts[i]) 
                #FIXME: fixup corebio.moremath.entropy()?
                if C == 0 :
                    ent[i] = 0.0
                else :
                    ent[i] = R - entropy(counts[i])
        else :
            ent = zeros(  seq_length, float64)
            entropy_interval = zeros( (seq_length,2) , float64)
        
            R = log(A)
            
            for i in range (0, seq_length) :
                alpha = array(counts[i] , float64)
                alpha += prior
                
                posterior = Dirichlet(alpha)
                ent[i] = posterior.mean_relative_entropy(prior/sum(prior)) 
                entropy_interval[i][0], entropy_interval[i][1] = \
                    posterior.interval_relative_entropy(prior/sum(prior), 0.95) 
 
        weight = array( na.sum(counts,axis=1) , float) 
        weight /= max(weight)
 
        return cls(seq_length, alphabet, counts, ent, entropy_interval, weight)
    def from_counts(cls, alphabet, counts, prior= None):
        """Build a LogoData object from counts."""
        # Counts is a Motif object?
        #counts = counts.array
        
        seq_length, A = counts.shape
        
        if prior is not None: prior = array(prior, float64)
        
        if prior is None or sum(prior)==0.0:
            R = log(A)
            ent = zeros(  seq_length, float64)
            entropy_interval = None    
            for i in range (0, seq_length) :
                C = sum(counts[i]) 
                #FIXME: fixup corebio.moremath.entropy()?
                if C == 0 :
                    ent[i] = 0.0
                else :
                    ent[i] = R - entropy(counts[i])
        else :
            ent = zeros(  seq_length, float64)
            entropy_interval = zeros( (seq_length,2) , float64)
        
            R = log(A)
            
            for i in range (0, seq_length) :
                alpha = array(counts[i] , float64)
                alpha += prior
                
                posterior = Dirichlet(alpha)
                ent[i] = posterior.mean_relative_entropy(prior/sum(prior)) 
                entropy_interval[i][0], entropy_interval[i][1] = \
                    posterior.interval_relative_entropy(prior/sum(prior), 0.95) 
 
        weight = array( na.sum(counts,axis=1) , float) 
        weight /= max(weight)
 
        return cls(seq_length, alphabet, counts, ent, entropy_interval, weight)
Example #3
0
    def from_counts(cls, alphabet, counts, stats_func=None, prior=None, composition=None,
                    ngdata=None, pvalue=None):
        """Build a LogoData object from counts."""
        # Counts is a Motif object?
        #counts = counts.array

        if alphabet in [codon_dna_alphabet, codon_rna_alphabet]:
            seq_length, A = len(counts), len(alphabet)
        else:
            seq_length, A = counts.shape

        if prior is not None: prior = array(prior, float64)

        if ngdata is not None:
            if ngdata.counts.shape[0] != seq_length:
                raise ValueError("Sequence length in negative dataset should be the same as the input's.")

        if prior is None or sum(prior)==0.0:
            R = log(A)
            odds = None
            ent = zeros(seq_length, float64)
            entropy_interval = None
            max_value = 0.0
            for i in range (0, seq_length) :
                C = sum(counts[i])
                if C == 0:
                    ent[i] = 0.0
                else:
                    ent[i] = R - entropy(counts[i])
                if max_value < na.max(ent[i]):
                    max_value = na.max(ent[i])
        else :
            odds = []
            ent = zeros(seq_length, float64)
            entropy_interval = zeros( (seq_length,2) , float64)
            max_value_ent = 0.0
            max_value_int = 0.0
            refdata = None
            if pvalue is not None or stats_func is None:
                pvalue = array(pvalue, float64)
                pvalue_calc = False
            else:
                pvalue = []
                pvalue_calc = True
        
            R = log(A)
            
            for i in range (0, seq_length):
                alpha = array(counts[i] , float64)
                alpha += prior
                posterior = Dirichlet(alpha)
                ent[i] = posterior.mean_relative_entropy(prior/sum(prior))
                odds.append(posterior.odds_ratio(composition))
                entropy_interval[i][0], entropy_interval[i][1] = \
                    posterior.interval_relative_entropy(prior/sum(prior), 0.95)
                if pvalue_calc == True:
                    if ngdata is not None:
                        ngalpha = array(ngdata.counts[i], float64)
                        ngalpha += ngdata.prior
                        ngposterior = Dirichlet(ngalpha)
                        ngcompos = ngposterior.alpha/sum(ngposterior.alpha)
                        pvalue.append( stats_func(posterior.alpha, ngcompos) )
                    else:
                        pvalue.append( stats_func(posterior.alpha, composition) )
                if max_value_ent < na.max(ent[i]):
                    max_value_ent = na.max(ent[i])
                if max_value_int < na.max(entropy_interval[i][1]):
                    max_value_int = na.max(entropy_interval[i][1])

            max_value = max_value_ent + max_value_int

        weight = array( na.sum(counts,axis=1) , float)
        weight /= max(weight)
        return cls(seq_length, alphabet, counts, ent, entropy_interval,
                   weight, pvalue, composition, odds, max_value, prior)