def plot_motif(motif, file_name, title, fout=None): print 'Generating %s ...' % file_name # convert probabilities to counts by simply scaling the pobabilities, # this is ok, since the logo is scaled by the discrete entropy and not # the differential entropy of the Dirichlet-compound posterior distribution counts = [ [ int(round(motif[j][i]*1000)) for j in range(len(motif)) ] for i in range(len(motif[0])) ] alphabet = Alphabet(DNA.letters, zip(DNA.letters.lower(), DNA.letters)) data = LogoData.from_counts(alphabet, np.array(counts)) options = LogoOptions() options.color_scheme = nucleotide options.logo_title = title options.creator_text = '' options.fineprint = '' options.stacks_per_line = 60 options.yaxis_scale = 1.0 options.scale_width = False # use the natural logarithm as unit options.unit_name = "nats" options.yaxis_label = "p" # use discrete entropy to scale the logo # note: somehow the weblogo library requires that the entropy is inverted: # Log(|A|) - H(X) # that is, the stack height is simply defined by what is given by data.entropy, # since we use nats, the entropy should also be in the interval [0, 1]! data.entropy = map(lambda x: 1.0 - x/math.log(4), information.entropy(motif, len(motif), len(motif[0]))) data.entropy_interval = None format = LogoFormat(data, options) if not fout: fout = open(file_name, 'w') pdf_formatter(data, format, fout) fout.close() else: pdf_formatter(data, format, fout)
p = p_independent(N_hypercolumns, units_per_hypercolumn, X) p_i = p[i, :] p_j = p[j, :] p_joint = joint(i, j, X, distribution, units_per_hypercolumn) aux = np.copy(p_joint) if np.any(p_joint == 0): # Joint p_joint[p_joint < low_noise] = low_noise sum = p_joint.sum() p_joint = p_joint / sum p_i = p_joint.sum(axis=1) p_j = p_joint.sum(axis=0) # Calculate the entropies x1 = entropy(p_i) x2 = entropy(p_j) x3 = joint_entropy(p_joint) MI = mutual_information(p_i, p_j, p_joint) MI2 = mutual_information2(p_i, p_j, p_joint) MI_alt = x1 + x2 - x3 D1 = x3 - MI D2 = x3 - MI2 print 'MI', MI print 'MI2', MI2 print 'MI_alt', MI_alt print np.isclose(MI, MI2) print 'distances 1, 2', D1, D2
def entropy(self): return information.entropy(self.motif())
def entropy(self): motif = self.motif() return information.entropy(motif, self.n, self.m)