예제 #1
0
class AlignAceConsumer:
    """
    The general purpose consumer for the AlignAceScanner (DEPRECATED).

    Should be passed as the consumer to the feed method of the AlignAceScanner. After 'consuming' the file, it has the list of motifs in the motifs property.

    This class is DEPRECATED; please use the read() function in this module
    instead.
    """
    def __init__(self):
        import warnings
        warnings.warn(
            "Bio.Motif.Parsers.AlignAce.AlignAceConsumer is deprecated; please use the read() function in this module instead.",
            Bio.BiopythonDeprecationWarning)
        self.motifs = []
        self.current_motif = None
        self.param_dict = None

    def parameters(self, line):
        self.param_dict = {}

    def parameter(self, line):
        par_name = line.split("=")[0].strip()
        par_value = line.split("=")[1].strip()
        self.param_dict[par_name] = par_value

    def sequences(self, line):
        self.seq_dict = []

    def sequence(self, line):
        seq_name = line.split("\t")[1]
        self.seq_dict.append(seq_name)

    def motif(self, line):
        self.current_motif = Motif()
        self.motifs.append(self.current_motif)
        self.current_motif.alphabet = IUPAC.unambiguous_dna

    def motif_hit(self, line):
        seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna)
        self.current_motif.add_instance(seq)

    def motif_score(self, line):
        self.current_motif.score = float(line.split()[-1])

    def motif_mask(self, line):
        self.current_motif.set_mask(line.strip("\n\c"))

    def noevent(self, line):
        pass

    def version(self, line):
        self.ver = line

    def command_line(self, line):
        self.cmd_line = line
예제 #2
0
class AlignAceConsumer(object):
    """
    The general purpose consumer for the AlignAceScanner (DEPRECATED).

    Should be passed as the consumer to the feed method of the AlignAceScanner. After 'consuming' the file, it has the list of motifs in the motifs property.

    This class is DEPRECATED; please use the read() function in this module
    instead.
    """
    def __init__(self):
        import warnings
        warnings.warn("Bio.Motif.Parsers.AlignAce.AlignAceConsumer is deprecated; please use the read() function in this module instead.", Bio.BiopythonDeprecationWarning)
        self.motifs=[]
        self.current_motif=None
        self.param_dict = None
    
    def parameters(self,line):
        self.param_dict={}

    def parameter(self,line):
        par_name = line.split("=")[0].strip()
        par_value = line.split("=")[1].strip()
        self.param_dict[par_name]=par_value
        
    def sequences(self,line):
        self.seq_dict=[]
        
    def sequence(self,line):
        seq_name = line.split("\t")[1]
        self.seq_dict.append(seq_name)
        
    def motif(self,line):
        self.current_motif = Motif()
        self.motifs.append(self.current_motif)
        self.current_motif.alphabet=IUPAC.unambiguous_dna
        
    def motif_hit(self,line):
        seq = Seq(line.split("\t")[0],IUPAC.unambiguous_dna)
        self.current_motif.add_instance(seq)
        
    def motif_score(self,line):
        self.current_motif.score = float(line.split()[-1])
        
    def motif_mask(self,line):
        self.current_motif.set_mask(line.strip("\n\c"))

    def noevent(self,line):
        pass
        
    def version(self,line):
        self.ver = line
        
    def command_line(self,line):
        self.cmd_line = line
예제 #3
0
class AlignAceConsumer:
    """
    The general purpose consumer for the AlignAceScanner.

    Should be passed as the consumer to the feed method of the AlignAceScanner. After 'consuming' the file, it has the list of motifs in the motifs property.
    """

    def __init__(self):
        self.motifs = []
        self.current_motif = None
        self.param_dict = None

    def parameters(self, line):
        self.param_dict = {}

    def parameter(self, line):
        par_name = line.split("=")[0].strip()
        par_value = line.split("=")[1].strip()
        self.param_dict[par_name] = par_value

    def sequences(self, line):
        self.seq_dict = []

    def sequence(self, line):
        seq_name = line.split("\t")[1]
        self.seq_dict.append(seq_name)

    def motif(self, line):
        self.current_motif = Motif()
        self.motifs.append(self.current_motif)
        self.current_motif.alphabet = IUPAC.unambiguous_dna

    def motif_hit(self, line):
        seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna)
        self.current_motif.add_instance(seq)

    def motif_score(self, line):
        self.current_motif.score = float(line.split()[-1])

    def motif_mask(self, line):
        self.current_motif.set_mask(line.strip("\n\c"))

    def noevent(self, line):
        pass

    def version(self, line):
        self.ver = line

    def command_line(self, line):
        self.cmd_line = line
예제 #4
0
class AlignAceConsumer:
    """
    The general purpose consumer for the AlignAceScanner.

    Should be passed as the consumer to the feed method of the AlignAceScanner. After 'consuming' the file, it has the list of motifs in the motifs property.
    """
    def __init__(self):
        self.motifs = []
        self.current_motif = None
        self.param_dict = None

    def parameters(self, line):
        self.param_dict = {}

    def parameter(self, line):
        par_name = line.split("=")[0].strip()
        par_value = line.split("=")[1].strip()
        self.param_dict[par_name] = par_value

    def sequences(self, line):
        self.seq_dict = []

    def sequence(self, line):
        seq_name = line.split("\t")[1]
        self.seq_dict.append(seq_name)

    def motif(self, line):
        self.current_motif = Motif()
        self.motifs.append(self.current_motif)
        self.current_motif.alphabet = IUPAC.unambiguous_dna

    def motif_hit(self, line):
        seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna)
        self.current_motif.add_instance(seq)

    def motif_score(self, line):
        self.current_motif.score = float(line.split()[-1])

    def motif_mask(self, line):
        self.current_motif.set_mask(line.strip("\n\c"))

    def noevent(self, line):
        pass

    def version(self, line):
        self.ver = line

    def command_line(self, line):
        self.cmd_line = line
예제 #5
0
    def construct_weblogo(self, weblogo_filename, weblogo_revcompl_filename):
        self.weblogo_basename = os.path.basename(weblogo_filename)
        self.weblogo_revcompl_basename = os.path.basename(weblogo_revcompl_filename)

        motif = Motif(alphabet=IUPAC.unambiguous_dna)
        for kmer in self.kmer_lst:
            motif.add_instance(Seq(kmer, motif.alphabet))

        logowidth_normal = self.construct_weblogo_helper(weblogo_filename, motif)

        #reverse complement
        motif_revcompl = motif.reverse_complement()
        logowidth_revcompl = self.construct_weblogo_helper(weblogo_revcompl_filename, motif_revcompl)

        self.logowidth = max(self.logowidth, logowidth_normal, logowidth_revcompl)
예제 #6
0
    def construct_weblogo(self, weblogo_filename, weblogo_revcompl_filename):
        self.weblogo_basename = os.path.basename(weblogo_filename)
        self.weblogo_revcompl_basename = os.path.basename(
            weblogo_revcompl_filename)

        motif = Motif(alphabet=IUPAC.unambiguous_dna)
        for kmer in self.kmer_lst:
            motif.add_instance(Seq(kmer, motif.alphabet))

        logowidth_normal = self.construct_weblogo_helper(
            weblogo_filename, motif)

        #reverse complement
        motif_revcompl = motif.reverse_complement()
        logowidth_revcompl = self.construct_weblogo_helper(
            weblogo_revcompl_filename, motif_revcompl)

        self.logowidth = max(self.logowidth, logowidth_normal,
                             logowidth_revcompl)
예제 #7
0
def read(handle):
    """read(handle)"""
    record = Record()
    line = handle.next()
    record.version = line.strip()
    line = handle.next()
    record.command = line.strip()
    for line in handle:
        line = line.strip()
        if line == "":
            pass
        elif line[:4] == "Para":
            record.parameters = {}
        elif line[0] == "#":
            seq_name = line.split("\t")[1]
            record.sequences.append(seq_name)
        elif "=" in line:
            par_name, par_value = line.split("=")
            par_name = par_name.strip()
            par_value = par_value.strip()
            record.parameters[par_name] = par_value
        elif line[:5] == "Input":
            record.sequences = []
        elif line[:5] == "Motif":
            current_motif = Motif()
            current_motif.alphabet = IUPAC.unambiguous_dna
            record.motifs.append(current_motif)
        elif line[:3] == "MAP":
            current_motif.score = float(line.split()[-1])
        elif len(line.split("\t")) == 4:
            seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna)
            current_motif.add_instance(seq)
        elif "*" in line:
            current_motif.set_mask(line.strip("\n\c"))
        else:
            raise ValueError(line)
    return record
예제 #8
0
파일: AlignAce.py 프로젝트: BingW/biopython
def read(handle):
    """read(handle)"""
    record = Record()
    line = handle.next()
    record.version = line.strip()
    line = handle.next()
    record.command = line.strip()
    for line in handle:
        line = line.strip()
        if line=="":
            pass
        elif line[:4]=="Para":
            record.parameters={}
        elif line[0]=="#":
            seq_name = line.split("\t")[1]
            record.sequences.append(seq_name)
        elif "=" in line:
            par_name, par_value = line.split("=")
            par_name = par_name.strip()
            par_value = par_value.strip()
            record.parameters[par_name]=par_value
        elif line[:5]=="Input":
            record.sequences=[]
        elif line[:5]=="Motif":
            current_motif = Motif()
            current_motif.alphabet=IUPAC.unambiguous_dna
            record.motifs.append(current_motif)
        elif line[:3]=="MAP":
            current_motif.score = float(line.split()[-1])
        elif len(line.split("\t"))==4:
            seq = Seq(line.split("\t")[0],IUPAC.unambiguous_dna)
            current_motif.add_instance(seq)
        elif "*" in line:
            current_motif.set_mask(line.strip("\n\c"))
        else:
            raise ValueError(line)
    return record
예제 #9
0
def graph_logo(
    alignment,
    columns,
    filename=None,
    dpi=None, edgecolor='k', figsize=None, format='pdf', labels=None, linewidth=0., transparent=True,
    refidx=-1
):
    if filename is None:
        fd, filename = mkstemp(); close(fd)

    if figsize is None:
        figsize = (3, 3)

    if labels is None:
        labels = ['%d' % (idx + 1) for idx in columns]

    if refidx >= 0:
        msa = alignment
        alignment = msa[:refidx]
        alignment.extend(msa[refidx + 1:])

    M = len(alignment)
    N = len(columns)

    alph = None
    for _alph in (_DNA_ALPHABET, _RNA_ALPHABET, _AMINO_ALPHABET):
        for r in alignment:
            r.seq.alphabet = _alph
        if all([_verify_alphabet(r.seq.upper()) for r in alignment]):
            alph = _alph
            break
    if alph is None:
        raise RuntimeError("sequences with indeterminable alphabet provided")

    motif = Motif(alphabet=alph)

    instances = (''.join(z).upper() for z in zip(*[alignment[:, i] for i in columns]))
    for instance in instances:
        motif.add_instance(Seq(instance, alph))

    # set laplace = True to include the backgrounds
    pwm = _fix_ambigs(motif.pwm(laplace=False), alph)

    # heuristic to determine whether nucleotide or protein alphabet
    # need to use either base 4 or 20 depending
    alphlen, _alphkeys = max(((len(pwm[i]), pwm[i].keys()) for i in range(N)), key=itemgetter(0))
    s, colors = (4, _DNA_COLORS) if alphlen < 20 else (20, _AMINO_COLORS)
    alphkeys = ['']
    alphkeys.extend(_alphkeys)
    alphmap = dict(zip(alphkeys, range(len(alphkeys))))

    # compute the information content at each position
    maxbits = np.log2(s)
    e_n = (s - 1) / (2. * np.log(2) * M)
    R = maxbits * np.ones((N,), dtype=float)
    R -= [-sum(v * np.log2(v) for _, v in pwm[i].items() if v > 0.) for i in range(N)]
    R -= e_n

    heights = np.zeros((alphlen, N), dtype=float)
    identities = np.zeros((alphlen, N), dtype=int)

    for j in range(N):
        i = 0
        for k, v in sorted(pwm[j].items(), key=itemgetter(1)):
            heights[i, j] = R[j] * v
            identities[i, j] = alphmap[k]
            i += 1

    font = Basefont(join(_HY454_FONT_PATHS[0], 'Roboto-Black.ttf'))

    fig = plt.figure(figsize=figsize, dpi=dpi)

    # make each column a vertical golden rect
    rect = 0.2, 0.2, 0.382 * N, 0.618
    ax = fig.add_axes(rect)

    _adjust_spines_outward(ax, ('left',), 9)

    ax.set_ylabel('bits', fontproperties=_ROBOTO_REGULAR)

    if figsize is None:
        fig.set_figwidth(N)

    if transparent:
        fig.patch.set_alpha(0.)
        ax.patch.set_alpha(0.)

    # remove the top and right ticks
    for tick in ax.xaxis.get_major_ticks() + ax.yaxis.get_major_ticks():
        tick.tick2On = False

    # remove the bottom ticks
    for tick in ax.xaxis.get_major_ticks():
        tick.tick1On = False

    # rotate the x-axis labels by 45 degrees to enhance packing
    for label in ax.xaxis.get_ticklabels():
        label.set_rotation(45)

    # set font properties
    for label in ax.xaxis.get_ticklabels() + ax.yaxis.get_ticklabels():
        label.set_fontproperties(_ROBOTO_REGULAR)

    # disable top and right spines, we don't need them
    ax.spines['bottom'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    def format_xlabel(x, pos=None):
        idx = np.clip(int(x)-1, 0, N-1)
        return labels[idx]

    ax.xaxis.set_major_formatter(FuncFormatter(format_xlabel))
    # avoid too much precision
    ax.yaxis.set_major_formatter(FormatStrFormatter('%1.1f'))

    # set the ticks
    ysep = 0.5 if alphlen < 20 else 1.0
    yticks = np.arange(0, maxbits, ysep, dtype=float)
    if maxbits - yticks[-1] < ysep:
        yticks[-1] = maxbits
    else:
        yticks = np.append(yticks, maxbits)
    ax.set_yticks(yticks)
    ax.set_xticks(np.arange(1, N+1, dtype=float) + 0.5)

    # set the axes limits here AFTER the ticks, otherwise borkage
    ax.set_xlim((1, N+1))
    ax.set_ylim((0, maxbits))

    idxs = np.arange(1, N+1)
    bottoms = np.zeros((N,), dtype=float)
    for i in range(alphlen):
        bars = ax.bar(idxs, heights[i, :], width=1., bottom=bottoms)
        bottoms += heights[i, :]
        for j, bar in enumerate(bars):
            if identities[i, j]:
                l = alphkeys[identities[i, j]]
                glyph = font[l]
                ax.add_patch(glyph)
                glyph.set_transform(bar.get_transform())
                bar.set_visible(False)
                glyph.set_edgecolor(edgecolor)
                glyph.set_facecolor(colors[l])
                glyph.set_linewidth(linewidth)
                glyph.set_zorder(-1)

    # set the remaining spine to show the maximum value
    ax.spines['left'].set_bounds(0, max(bottoms))

    fig.savefig(filename, format=format, transparent=transparent, bbox_inches='tight', pad_inches=0.25)

    return filename