def read(handle, format): alphabet = IUPAC.unambiguous_dna counts = {} if format == "pfm": # reads the motif from Jaspar .pfm file letters = "ACGT" for letter, line in zip(letters, handle): words = line.split() #if there is a letter in the beginning, ignore it if words[0] == letter: words = words[1:] counts[letter] = map(float, words) motif = Motif(alphabet, counts=counts) elif format == "sites": # reads the motif from Jaspar .sites file instances = [] for line in handle: if not line.startswith(">"): break # line contains the header ">...." # now read the actual sequence line = handle.next() instance = "" for c in line.strip(): if c == c.upper(): instance += c instance = Seq(instance, alphabet) instances.append(instance) motif = Motif(alphabet, instances=instances) else: raise ValueError("Unknown format %s" % format) motif.mask = "*" * motif.length return motif
def read(handle, format): alphabet = IUPAC.unambiguous_dna counts = {} if format=="pfm": # reads the motif from Jaspar .pfm file letters = "ACGT" for letter, line in zip(letters, handle): words = line.split() #if there is a letter in the beginning, ignore it if words[0]==letter: words = words[1:] counts[letter] = map(float, words) motif = Motif(alphabet, counts=counts) elif format=="sites": # reads the motif from Jaspar .sites file instances = [] for line in handle: if not line.startswith(">"): break # line contains the header ">...." # now read the actual sequence line = handle.next() instance = "" for c in line.strip(): if c==c.upper(): instance += c instance = Seq(instance, alphabet) instances.append(instance) motif = Motif(alphabet, instances=instances) else: raise ValueError("Unknown format %s" % format) motif.set_mask("*"*motif.length) return motif
class AlignAceConsumer: """ The general purpose consumer for the AlignAceScanner (DEPRECATED). Should be passed as the consumer to the feed method of the AlignAceScanner. After 'consuming' the file, it has the list of motifs in the motifs property. This class is DEPRECATED; please use the read() function in this module instead. """ def __init__(self): import warnings warnings.warn( "Bio.Motif.Parsers.AlignAce.AlignAceConsumer is deprecated; please use the read() function in this module instead.", Bio.BiopythonDeprecationWarning) self.motifs = [] self.current_motif = None self.param_dict = None def parameters(self, line): self.param_dict = {} def parameter(self, line): par_name = line.split("=")[0].strip() par_value = line.split("=")[1].strip() self.param_dict[par_name] = par_value def sequences(self, line): self.seq_dict = [] def sequence(self, line): seq_name = line.split("\t")[1] self.seq_dict.append(seq_name) def motif(self, line): self.current_motif = Motif() self.motifs.append(self.current_motif) self.current_motif.alphabet = IUPAC.unambiguous_dna def motif_hit(self, line): seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna) self.current_motif.add_instance(seq) def motif_score(self, line): self.current_motif.score = float(line.split()[-1]) def motif_mask(self, line): self.current_motif.set_mask(line.strip("\n\c")) def noevent(self, line): pass def version(self, line): self.ver = line def command_line(self, line): self.cmd_line = line
class AlignAceConsumer(object): """ The general purpose consumer for the AlignAceScanner (DEPRECATED). Should be passed as the consumer to the feed method of the AlignAceScanner. After 'consuming' the file, it has the list of motifs in the motifs property. This class is DEPRECATED; please use the read() function in this module instead. """ def __init__(self): import warnings warnings.warn("Bio.Motif.Parsers.AlignAce.AlignAceConsumer is deprecated; please use the read() function in this module instead.", Bio.BiopythonDeprecationWarning) self.motifs=[] self.current_motif=None self.param_dict = None def parameters(self,line): self.param_dict={} def parameter(self,line): par_name = line.split("=")[0].strip() par_value = line.split("=")[1].strip() self.param_dict[par_name]=par_value def sequences(self,line): self.seq_dict=[] def sequence(self,line): seq_name = line.split("\t")[1] self.seq_dict.append(seq_name) def motif(self,line): self.current_motif = Motif() self.motifs.append(self.current_motif) self.current_motif.alphabet=IUPAC.unambiguous_dna def motif_hit(self,line): seq = Seq(line.split("\t")[0],IUPAC.unambiguous_dna) self.current_motif.add_instance(seq) def motif_score(self,line): self.current_motif.score = float(line.split()[-1]) def motif_mask(self,line): self.current_motif.set_mask(line.strip("\n\c")) def noevent(self,line): pass def version(self,line): self.ver = line def command_line(self,line): self.cmd_line = line
class AlignAceConsumer: """ The general purpose consumer for the AlignAceScanner. Should be passed as the consumer to the feed method of the AlignAceScanner. After 'consuming' the file, it has the list of motifs in the motifs property. """ def __init__(self): self.motifs = [] self.current_motif = None self.param_dict = None def parameters(self, line): self.param_dict = {} def parameter(self, line): par_name = line.split("=")[0].strip() par_value = line.split("=")[1].strip() self.param_dict[par_name] = par_value def sequences(self, line): self.seq_dict = [] def sequence(self, line): seq_name = line.split("\t")[1] self.seq_dict.append(seq_name) def motif(self, line): self.current_motif = Motif() self.motifs.append(self.current_motif) self.current_motif.alphabet = IUPAC.unambiguous_dna def motif_hit(self, line): seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna) self.current_motif.add_instance(seq) def motif_score(self, line): self.current_motif.score = float(line.split()[-1]) def motif_mask(self, line): self.current_motif.set_mask(line.strip("\n\c")) def noevent(self, line): pass def version(self, line): self.ver = line def command_line(self, line): self.cmd_line = line
def construct_weblogo(self, weblogo_filename, weblogo_revcompl_filename): self.weblogo_basename = os.path.basename(weblogo_filename) self.weblogo_revcompl_basename = os.path.basename(weblogo_revcompl_filename) motif = Motif(alphabet=IUPAC.unambiguous_dna) for kmer in self.kmer_lst: motif.add_instance(Seq(kmer, motif.alphabet)) logowidth_normal = self.construct_weblogo_helper(weblogo_filename, motif) #reverse complement motif_revcompl = motif.reverse_complement() logowidth_revcompl = self.construct_weblogo_helper(weblogo_revcompl_filename, motif_revcompl) self.logowidth = max(self.logowidth, logowidth_normal, logowidth_revcompl)
def read(handle): """read(handle)""" record = Record() record.ver = next(handle) record.cmd_line = next(handle) for line in handle: if line.strip() == "": pass elif line[:4] == "Para": record.param_dict = {} elif line[0] == "#": seq_name = line.split("\t")[1] record.seq_dict.append(seq_name) elif "=" in line: par_name = line.split("=")[0].strip() par_value = line.split("=")[1].strip() record.param_dict[par_name] = par_value elif line[:5] == "Input": record.seq_dict = [] elif line[:5] == "Motif": record.current_motif = Motif() record.motifs.append(record.current_motif) record.current_motif.alphabet = IUPAC.unambiguous_dna elif line[:3] == "MAP": record.current_motif.score = float(line.split()[-1]) elif len(line.split("\t")) == 4: seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna) record.current_motif.add_instance(seq) elif "*" in line: record.current_motif.set_mask(line.strip("\n\c")) else: raise ValueError(line) return record
def construct_weblogo(self, weblogo_filename, weblogo_revcompl_filename): self.weblogo_basename = os.path.basename(weblogo_filename) self.weblogo_revcompl_basename = os.path.basename( weblogo_revcompl_filename) motif = Motif(alphabet=IUPAC.unambiguous_dna) for kmer in self.kmer_lst: motif.add_instance(Seq(kmer, motif.alphabet)) logowidth_normal = self.construct_weblogo_helper( weblogo_filename, motif) #reverse complement motif_revcompl = motif.reverse_complement() logowidth_revcompl = self.construct_weblogo_helper( weblogo_revcompl_filename, motif_revcompl) self.logowidth = max(self.logowidth, logowidth_normal, logowidth_revcompl)
def read(handle): """read(handle)""" record = Record() line = handle.next() record.version = line.strip() line = handle.next() record.command = line.strip() for line in handle: line = line.strip() if line == "": pass elif line[:4] == "Para": record.parameters = {} elif line[0] == "#": seq_name = line.split("\t")[1] record.sequences.append(seq_name) elif "=" in line: par_name, par_value = line.split("=") par_name = par_name.strip() par_value = par_value.strip() record.parameters[par_name] = par_value elif line[:5] == "Input": record.sequences = [] elif line[:5] == "Motif": words = line.split() assert words[0] == "Motif" number = int(words[1]) instances = [] elif line[:3] == "MAP": motif = Motif(IUPAC.unambiguous_dna, instances) motif.score = float(line.split()[-1]) motif.number = number motif.set_mask(mask) record.motifs.append(motif) elif len(line.split("\t")) == 4: seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna) instances.append(seq) elif "*" in line: mask = line.strip("\r\n") else: raise ValueError(line) return record
def read(handle): """read(handle)""" record = Record() line = handle.next() record.version = line.strip() line = handle.next() record.command = line.strip() for line in handle: line = line.strip() if line == "": pass elif line[:4] == "Para": record.parameters = {} elif line[0] == "#": seq_name = line.split("\t")[1] record.sequences.append(seq_name) elif "=" in line: par_name, par_value = line.split("=") par_name = par_name.strip() par_value = par_value.strip() record.parameters[par_name] = par_value elif line[:5] == "Input": record.sequences = [] elif line[:5] == "Motif": words = line.split() assert words[0] == "Motif" number = int(words[1]) instances = [] elif line[:3] == "MAP": motif = Motif(IUPAC.unambiguous_dna, instances) motif.score = float(line.split()[-1]) motif.number = number motif.mask = mask record.motifs.append(motif) elif len(line.split("\t")) == 4: seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna) instances.append(seq) elif "*" in line: mask = line.strip("\r\n") else: raise ValueError(line) return record
def read(handle): """read(handle)""" record = Record() line = handle.next() record.version = line.strip() line = handle.next() record.command = line.strip() for line in handle: line = line.strip() if line == "": pass elif line[:4] == "Para": record.parameters = {} elif line[0] == "#": seq_name = line.split("\t")[1] record.sequences.append(seq_name) elif "=" in line: par_name, par_value = line.split("=") par_name = par_name.strip() par_value = par_value.strip() record.parameters[par_name] = par_value elif line[:5] == "Input": record.sequences = [] elif line[:5] == "Motif": current_motif = Motif() current_motif.alphabet = IUPAC.unambiguous_dna record.motifs.append(current_motif) elif line[:3] == "MAP": current_motif.score = float(line.split()[-1]) elif len(line.split("\t")) == 4: seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna) current_motif.add_instance(seq) elif "*" in line: current_motif.set_mask(line.strip("\n\c")) else: raise ValueError(line) return record
def read(handle): """read(handle)""" record = Record() line = handle.next() record.version = line.strip() line = handle.next() record.command = line.strip() for line in handle: line = line.strip() if line=="": pass elif line[:4]=="Para": record.parameters={} elif line[0]=="#": seq_name = line.split("\t")[1] record.sequences.append(seq_name) elif "=" in line: par_name, par_value = line.split("=") par_name = par_name.strip() par_value = par_value.strip() record.parameters[par_name]=par_value elif line[:5]=="Input": record.sequences=[] elif line[:5]=="Motif": current_motif = Motif() current_motif.alphabet=IUPAC.unambiguous_dna record.motifs.append(current_motif) elif line[:3]=="MAP": current_motif.score = float(line.split()[-1]) elif len(line.split("\t"))==4: seq = Seq(line.split("\t")[0],IUPAC.unambiguous_dna) current_motif.add_instance(seq) elif "*" in line: current_motif.set_mask(line.strip("\n\c")) else: raise ValueError(line) return record
def __init__(self): BaseMotif.__init__(self) self.references = []
def __init__(self): Motif.__init__(self) self.evalue = 0.0
def motif(self,line): self.current_motif = Motif() self.motifs.append(self.current_motif) self.current_motif.alphabet=IUPAC.unambiguous_dna
def __init__ (self): Motif.__init__(self) self.evalue = 0.0
def motif(self, line): self.current_motif = Motif() self.motifs.append(self.current_motif) self.current_motif.alphabet = IUPAC.unambiguous_dna
def __init__(self, alphabet=None, instances=None): Motif.__init__(self, alphabet, instances) self.evalue = 0.0
def __init__ (self, alphabet=None, instances=None): Motif.__init__(self, alphabet, instances) self.evalue = 0.0
def graph_logo( alignment, columns, filename=None, dpi=None, edgecolor='k', figsize=None, format='pdf', labels=None, linewidth=0., transparent=True, refidx=-1 ): if filename is None: fd, filename = mkstemp(); close(fd) if figsize is None: figsize = (3, 3) if labels is None: labels = ['%d' % (idx + 1) for idx in columns] if refidx >= 0: msa = alignment alignment = msa[:refidx] alignment.extend(msa[refidx + 1:]) M = len(alignment) N = len(columns) alph = None for _alph in (_DNA_ALPHABET, _RNA_ALPHABET, _AMINO_ALPHABET): for r in alignment: r.seq.alphabet = _alph if all([_verify_alphabet(r.seq.upper()) for r in alignment]): alph = _alph break if alph is None: raise RuntimeError("sequences with indeterminable alphabet provided") motif = Motif(alphabet=alph) instances = (''.join(z).upper() for z in zip(*[alignment[:, i] for i in columns])) for instance in instances: motif.add_instance(Seq(instance, alph)) # set laplace = True to include the backgrounds pwm = _fix_ambigs(motif.pwm(laplace=False), alph) # heuristic to determine whether nucleotide or protein alphabet # need to use either base 4 or 20 depending alphlen, _alphkeys = max(((len(pwm[i]), pwm[i].keys()) for i in range(N)), key=itemgetter(0)) s, colors = (4, _DNA_COLORS) if alphlen < 20 else (20, _AMINO_COLORS) alphkeys = [''] alphkeys.extend(_alphkeys) alphmap = dict(zip(alphkeys, range(len(alphkeys)))) # compute the information content at each position maxbits = np.log2(s) e_n = (s - 1) / (2. * np.log(2) * M) R = maxbits * np.ones((N,), dtype=float) R -= [-sum(v * np.log2(v) for _, v in pwm[i].items() if v > 0.) for i in range(N)] R -= e_n heights = np.zeros((alphlen, N), dtype=float) identities = np.zeros((alphlen, N), dtype=int) for j in range(N): i = 0 for k, v in sorted(pwm[j].items(), key=itemgetter(1)): heights[i, j] = R[j] * v identities[i, j] = alphmap[k] i += 1 font = Basefont(join(_HY454_FONT_PATHS[0], 'Roboto-Black.ttf')) fig = plt.figure(figsize=figsize, dpi=dpi) # make each column a vertical golden rect rect = 0.2, 0.2, 0.382 * N, 0.618 ax = fig.add_axes(rect) _adjust_spines_outward(ax, ('left',), 9) ax.set_ylabel('bits', fontproperties=_ROBOTO_REGULAR) if figsize is None: fig.set_figwidth(N) if transparent: fig.patch.set_alpha(0.) ax.patch.set_alpha(0.) # remove the top and right ticks for tick in ax.xaxis.get_major_ticks() + ax.yaxis.get_major_ticks(): tick.tick2On = False # remove the bottom ticks for tick in ax.xaxis.get_major_ticks(): tick.tick1On = False # rotate the x-axis labels by 45 degrees to enhance packing for label in ax.xaxis.get_ticklabels(): label.set_rotation(45) # set font properties for label in ax.xaxis.get_ticklabels() + ax.yaxis.get_ticklabels(): label.set_fontproperties(_ROBOTO_REGULAR) # disable top and right spines, we don't need them ax.spines['bottom'].set_visible(False) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) def format_xlabel(x, pos=None): idx = np.clip(int(x)-1, 0, N-1) return labels[idx] ax.xaxis.set_major_formatter(FuncFormatter(format_xlabel)) # avoid too much precision ax.yaxis.set_major_formatter(FormatStrFormatter('%1.1f')) # set the ticks ysep = 0.5 if alphlen < 20 else 1.0 yticks = np.arange(0, maxbits, ysep, dtype=float) if maxbits - yticks[-1] < ysep: yticks[-1] = maxbits else: yticks = np.append(yticks, maxbits) ax.set_yticks(yticks) ax.set_xticks(np.arange(1, N+1, dtype=float) + 0.5) # set the axes limits here AFTER the ticks, otherwise borkage ax.set_xlim((1, N+1)) ax.set_ylim((0, maxbits)) idxs = np.arange(1, N+1) bottoms = np.zeros((N,), dtype=float) for i in range(alphlen): bars = ax.bar(idxs, heights[i, :], width=1., bottom=bottoms) bottoms += heights[i, :] for j, bar in enumerate(bars): if identities[i, j]: l = alphkeys[identities[i, j]] glyph = font[l] ax.add_patch(glyph) glyph.set_transform(bar.get_transform()) bar.set_visible(False) glyph.set_edgecolor(edgecolor) glyph.set_facecolor(colors[l]) glyph.set_linewidth(linewidth) glyph.set_zorder(-1) # set the remaining spine to show the maximum value ax.spines['left'].set_bounds(0, max(bottoms)) fig.savefig(filename, format=format, transparent=transparent, bbox_inches='tight', pad_inches=0.25) return filename