Exemple #1
0
def read(handle, format):
    alphabet = IUPAC.unambiguous_dna
    counts = {}
    if format == "pfm":
        # reads the motif from Jaspar .pfm file
        letters = "ACGT"
        for letter, line in zip(letters, handle):
            words = line.split()
            #if there is a letter in the beginning, ignore it
            if words[0] == letter:
                words = words[1:]
            counts[letter] = map(float, words)
        motif = Motif(alphabet, counts=counts)
    elif format == "sites":
        # reads the motif from Jaspar .sites file
        instances = []
        for line in handle:
            if not line.startswith(">"):
                break
            # line contains the header ">...."
            # now read the actual sequence
            line = handle.next()
            instance = ""
            for c in line.strip():
                if c == c.upper():
                    instance += c
            instance = Seq(instance, alphabet)
            instances.append(instance)
        motif = Motif(alphabet, instances=instances)
    else:
        raise ValueError("Unknown format %s" % format)
    motif.mask = "*" * motif.length
    return motif
Exemple #2
0
def read(handle, format):
    alphabet = IUPAC.unambiguous_dna
    counts = {}
    if format=="pfm":
        # reads the motif from Jaspar .pfm file
        letters = "ACGT"
        for letter, line in zip(letters, handle):
            words = line.split()
            #if there is a letter in the beginning, ignore it
            if words[0]==letter:
                words = words[1:]
            counts[letter] = map(float, words)
        motif = Motif(alphabet, counts=counts)
    elif format=="sites":
        # reads the motif from Jaspar .sites file
        instances = []
        for line in handle:
            if not line.startswith(">"):
                break
            # line contains the header ">...."
            # now read the actual sequence
            line = handle.next()
            instance = ""
            for c in line.strip():
                if c==c.upper():
                   instance += c
            instance = Seq(instance, alphabet)
            instances.append(instance)
        motif = Motif(alphabet, instances=instances)
    else:
        raise ValueError("Unknown format %s" % format)
    motif.set_mask("*"*motif.length)
    return motif
Exemple #3
0
class AlignAceConsumer:
    """
    The general purpose consumer for the AlignAceScanner (DEPRECATED).

    Should be passed as the consumer to the feed method of the AlignAceScanner. After 'consuming' the file, it has the list of motifs in the motifs property.

    This class is DEPRECATED; please use the read() function in this module
    instead.
    """
    def __init__(self):
        import warnings
        warnings.warn(
            "Bio.Motif.Parsers.AlignAce.AlignAceConsumer is deprecated; please use the read() function in this module instead.",
            Bio.BiopythonDeprecationWarning)
        self.motifs = []
        self.current_motif = None
        self.param_dict = None

    def parameters(self, line):
        self.param_dict = {}

    def parameter(self, line):
        par_name = line.split("=")[0].strip()
        par_value = line.split("=")[1].strip()
        self.param_dict[par_name] = par_value

    def sequences(self, line):
        self.seq_dict = []

    def sequence(self, line):
        seq_name = line.split("\t")[1]
        self.seq_dict.append(seq_name)

    def motif(self, line):
        self.current_motif = Motif()
        self.motifs.append(self.current_motif)
        self.current_motif.alphabet = IUPAC.unambiguous_dna

    def motif_hit(self, line):
        seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna)
        self.current_motif.add_instance(seq)

    def motif_score(self, line):
        self.current_motif.score = float(line.split()[-1])

    def motif_mask(self, line):
        self.current_motif.set_mask(line.strip("\n\c"))

    def noevent(self, line):
        pass

    def version(self, line):
        self.ver = line

    def command_line(self, line):
        self.cmd_line = line
Exemple #4
0
class AlignAceConsumer(object):
    """
    The general purpose consumer for the AlignAceScanner (DEPRECATED).

    Should be passed as the consumer to the feed method of the AlignAceScanner. After 'consuming' the file, it has the list of motifs in the motifs property.

    This class is DEPRECATED; please use the read() function in this module
    instead.
    """
    def __init__(self):
        import warnings
        warnings.warn("Bio.Motif.Parsers.AlignAce.AlignAceConsumer is deprecated; please use the read() function in this module instead.", Bio.BiopythonDeprecationWarning)
        self.motifs=[]
        self.current_motif=None
        self.param_dict = None
    
    def parameters(self,line):
        self.param_dict={}

    def parameter(self,line):
        par_name = line.split("=")[0].strip()
        par_value = line.split("=")[1].strip()
        self.param_dict[par_name]=par_value
        
    def sequences(self,line):
        self.seq_dict=[]
        
    def sequence(self,line):
        seq_name = line.split("\t")[1]
        self.seq_dict.append(seq_name)
        
    def motif(self,line):
        self.current_motif = Motif()
        self.motifs.append(self.current_motif)
        self.current_motif.alphabet=IUPAC.unambiguous_dna
        
    def motif_hit(self,line):
        seq = Seq(line.split("\t")[0],IUPAC.unambiguous_dna)
        self.current_motif.add_instance(seq)
        
    def motif_score(self,line):
        self.current_motif.score = float(line.split()[-1])
        
    def motif_mask(self,line):
        self.current_motif.set_mask(line.strip("\n\c"))

    def noevent(self,line):
        pass
        
    def version(self,line):
        self.ver = line
        
    def command_line(self,line):
        self.cmd_line = line
Exemple #5
0
class AlignAceConsumer:
    """
    The general purpose consumer for the AlignAceScanner.

    Should be passed as the consumer to the feed method of the AlignAceScanner. After 'consuming' the file, it has the list of motifs in the motifs property.
    """

    def __init__(self):
        self.motifs = []
        self.current_motif = None
        self.param_dict = None

    def parameters(self, line):
        self.param_dict = {}

    def parameter(self, line):
        par_name = line.split("=")[0].strip()
        par_value = line.split("=")[1].strip()
        self.param_dict[par_name] = par_value

    def sequences(self, line):
        self.seq_dict = []

    def sequence(self, line):
        seq_name = line.split("\t")[1]
        self.seq_dict.append(seq_name)

    def motif(self, line):
        self.current_motif = Motif()
        self.motifs.append(self.current_motif)
        self.current_motif.alphabet = IUPAC.unambiguous_dna

    def motif_hit(self, line):
        seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna)
        self.current_motif.add_instance(seq)

    def motif_score(self, line):
        self.current_motif.score = float(line.split()[-1])

    def motif_mask(self, line):
        self.current_motif.set_mask(line.strip("\n\c"))

    def noevent(self, line):
        pass

    def version(self, line):
        self.ver = line

    def command_line(self, line):
        self.cmd_line = line
Exemple #6
0
class AlignAceConsumer:
    """
    The general purpose consumer for the AlignAceScanner.

    Should be passed as the consumer to the feed method of the AlignAceScanner. After 'consuming' the file, it has the list of motifs in the motifs property.
    """
    def __init__(self):
        self.motifs = []
        self.current_motif = None
        self.param_dict = None

    def parameters(self, line):
        self.param_dict = {}

    def parameter(self, line):
        par_name = line.split("=")[0].strip()
        par_value = line.split("=")[1].strip()
        self.param_dict[par_name] = par_value

    def sequences(self, line):
        self.seq_dict = []

    def sequence(self, line):
        seq_name = line.split("\t")[1]
        self.seq_dict.append(seq_name)

    def motif(self, line):
        self.current_motif = Motif()
        self.motifs.append(self.current_motif)
        self.current_motif.alphabet = IUPAC.unambiguous_dna

    def motif_hit(self, line):
        seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna)
        self.current_motif.add_instance(seq)

    def motif_score(self, line):
        self.current_motif.score = float(line.split()[-1])

    def motif_mask(self, line):
        self.current_motif.set_mask(line.strip("\n\c"))

    def noevent(self, line):
        pass

    def version(self, line):
        self.ver = line

    def command_line(self, line):
        self.cmd_line = line
Exemple #7
0
    def construct_weblogo(self, weblogo_filename, weblogo_revcompl_filename):
        self.weblogo_basename = os.path.basename(weblogo_filename)
        self.weblogo_revcompl_basename = os.path.basename(weblogo_revcompl_filename)

        motif = Motif(alphabet=IUPAC.unambiguous_dna)
        for kmer in self.kmer_lst:
            motif.add_instance(Seq(kmer, motif.alphabet))

        logowidth_normal = self.construct_weblogo_helper(weblogo_filename, motif)

        #reverse complement
        motif_revcompl = motif.reverse_complement()
        logowidth_revcompl = self.construct_weblogo_helper(weblogo_revcompl_filename, motif_revcompl)

        self.logowidth = max(self.logowidth, logowidth_normal, logowidth_revcompl)
Exemple #8
0
def read(handle):
    """read(handle)"""
    record = Record()
    record.ver = next(handle)
    record.cmd_line = next(handle)
    for line in handle:
        if line.strip() == "":
            pass
        elif line[:4] == "Para":
            record.param_dict = {}
        elif line[0] == "#":
            seq_name = line.split("\t")[1]
            record.seq_dict.append(seq_name)
        elif "=" in line:
            par_name = line.split("=")[0].strip()
            par_value = line.split("=")[1].strip()
            record.param_dict[par_name] = par_value
        elif line[:5] == "Input":
            record.seq_dict = []
        elif line[:5] == "Motif":
            record.current_motif = Motif()
            record.motifs.append(record.current_motif)
            record.current_motif.alphabet = IUPAC.unambiguous_dna
        elif line[:3] == "MAP":
            record.current_motif.score = float(line.split()[-1])
        elif len(line.split("\t")) == 4:
            seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna)
            record.current_motif.add_instance(seq)
        elif "*" in line:
            record.current_motif.set_mask(line.strip("\n\c"))
        else:
            raise ValueError(line)
    return record
Exemple #9
0
    def construct_weblogo(self, weblogo_filename, weblogo_revcompl_filename):
        self.weblogo_basename = os.path.basename(weblogo_filename)
        self.weblogo_revcompl_basename = os.path.basename(
            weblogo_revcompl_filename)

        motif = Motif(alphabet=IUPAC.unambiguous_dna)
        for kmer in self.kmer_lst:
            motif.add_instance(Seq(kmer, motif.alphabet))

        logowidth_normal = self.construct_weblogo_helper(
            weblogo_filename, motif)

        #reverse complement
        motif_revcompl = motif.reverse_complement()
        logowidth_revcompl = self.construct_weblogo_helper(
            weblogo_revcompl_filename, motif_revcompl)

        self.logowidth = max(self.logowidth, logowidth_normal,
                             logowidth_revcompl)
Exemple #10
0
def read(handle):
    """read(handle)"""
    record = Record()
    line = handle.next()
    record.version = line.strip()
    line = handle.next()
    record.command = line.strip()
    for line in handle:
        line = line.strip()
        if line == "":
            pass
        elif line[:4] == "Para":
            record.parameters = {}
        elif line[0] == "#":
            seq_name = line.split("\t")[1]
            record.sequences.append(seq_name)
        elif "=" in line:
            par_name, par_value = line.split("=")
            par_name = par_name.strip()
            par_value = par_value.strip()
            record.parameters[par_name] = par_value
        elif line[:5] == "Input":
            record.sequences = []
        elif line[:5] == "Motif":
            words = line.split()
            assert words[0] == "Motif"
            number = int(words[1])
            instances = []
        elif line[:3] == "MAP":
            motif = Motif(IUPAC.unambiguous_dna, instances)
            motif.score = float(line.split()[-1])
            motif.number = number
            motif.set_mask(mask)
            record.motifs.append(motif)
        elif len(line.split("\t")) == 4:
            seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna)
            instances.append(seq)
        elif "*" in line:
            mask = line.strip("\r\n")
        else:
            raise ValueError(line)
    return record
Exemple #11
0
def read(handle):
    """read(handle)"""
    record = Record()
    line = handle.next()
    record.version = line.strip()
    line = handle.next()
    record.command = line.strip()
    for line in handle:
        line = line.strip()
        if line == "":
            pass
        elif line[:4] == "Para":
            record.parameters = {}
        elif line[0] == "#":
            seq_name = line.split("\t")[1]
            record.sequences.append(seq_name)
        elif "=" in line:
            par_name, par_value = line.split("=")
            par_name = par_name.strip()
            par_value = par_value.strip()
            record.parameters[par_name] = par_value
        elif line[:5] == "Input":
            record.sequences = []
        elif line[:5] == "Motif":
            words = line.split()
            assert words[0] == "Motif"
            number = int(words[1])
            instances = []
        elif line[:3] == "MAP":
            motif = Motif(IUPAC.unambiguous_dna, instances)
            motif.score = float(line.split()[-1])
            motif.number = number
            motif.mask = mask
            record.motifs.append(motif)
        elif len(line.split("\t")) == 4:
            seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna)
            instances.append(seq)
        elif "*" in line:
            mask = line.strip("\r\n")
        else:
            raise ValueError(line)
    return record
Exemple #12
0
def read(handle):
    """read(handle)"""
    record = Record()
    line = handle.next()
    record.version = line.strip()
    line = handle.next()
    record.command = line.strip()
    for line in handle:
        line = line.strip()
        if line == "":
            pass
        elif line[:4] == "Para":
            record.parameters = {}
        elif line[0] == "#":
            seq_name = line.split("\t")[1]
            record.sequences.append(seq_name)
        elif "=" in line:
            par_name, par_value = line.split("=")
            par_name = par_name.strip()
            par_value = par_value.strip()
            record.parameters[par_name] = par_value
        elif line[:5] == "Input":
            record.sequences = []
        elif line[:5] == "Motif":
            current_motif = Motif()
            current_motif.alphabet = IUPAC.unambiguous_dna
            record.motifs.append(current_motif)
        elif line[:3] == "MAP":
            current_motif.score = float(line.split()[-1])
        elif len(line.split("\t")) == 4:
            seq = Seq(line.split("\t")[0], IUPAC.unambiguous_dna)
            current_motif.add_instance(seq)
        elif "*" in line:
            current_motif.set_mask(line.strip("\n\c"))
        else:
            raise ValueError(line)
    return record
Exemple #13
0
def read(handle):
    """read(handle)"""
    record = Record()
    line = handle.next()
    record.version = line.strip()
    line = handle.next()
    record.command = line.strip()
    for line in handle:
        line = line.strip()
        if line=="":
            pass
        elif line[:4]=="Para":
            record.parameters={}
        elif line[0]=="#":
            seq_name = line.split("\t")[1]
            record.sequences.append(seq_name)
        elif "=" in line:
            par_name, par_value = line.split("=")
            par_name = par_name.strip()
            par_value = par_value.strip()
            record.parameters[par_name]=par_value
        elif line[:5]=="Input":
            record.sequences=[]
        elif line[:5]=="Motif":
            current_motif = Motif()
            current_motif.alphabet=IUPAC.unambiguous_dna
            record.motifs.append(current_motif)
        elif line[:3]=="MAP":
            current_motif.score = float(line.split()[-1])
        elif len(line.split("\t"))==4:
            seq = Seq(line.split("\t")[0],IUPAC.unambiguous_dna)
            current_motif.add_instance(seq)
        elif "*" in line:
            current_motif.set_mask(line.strip("\n\c"))
        else:
            raise ValueError(line)
    return record
 def __init__(self):
     BaseMotif.__init__(self)
     self.references = []
Exemple #15
0
 def __init__(self):
     Motif.__init__(self)
     self.evalue = 0.0
 def motif(self,line):
     self.current_motif = Motif()
     self.motifs.append(self.current_motif)
     self.current_motif.alphabet=IUPAC.unambiguous_dna
Exemple #17
0
 def __init__ (self):
     Motif.__init__(self)
     self.evalue = 0.0
Exemple #18
0
 def motif(self, line):
     self.current_motif = Motif()
     self.motifs.append(self.current_motif)
     self.current_motif.alphabet = IUPAC.unambiguous_dna
Exemple #19
0
 def __init__(self, alphabet=None, instances=None):
     Motif.__init__(self, alphabet, instances)
     self.evalue = 0.0
Exemple #20
0
 def __init__ (self, alphabet=None, instances=None):
     Motif.__init__(self, alphabet, instances)
     self.evalue = 0.0
Exemple #21
0
def graph_logo(
    alignment,
    columns,
    filename=None,
    dpi=None, edgecolor='k', figsize=None, format='pdf', labels=None, linewidth=0., transparent=True,
    refidx=-1
):
    if filename is None:
        fd, filename = mkstemp(); close(fd)

    if figsize is None:
        figsize = (3, 3)

    if labels is None:
        labels = ['%d' % (idx + 1) for idx in columns]

    if refidx >= 0:
        msa = alignment
        alignment = msa[:refidx]
        alignment.extend(msa[refidx + 1:])

    M = len(alignment)
    N = len(columns)

    alph = None
    for _alph in (_DNA_ALPHABET, _RNA_ALPHABET, _AMINO_ALPHABET):
        for r in alignment:
            r.seq.alphabet = _alph
        if all([_verify_alphabet(r.seq.upper()) for r in alignment]):
            alph = _alph
            break
    if alph is None:
        raise RuntimeError("sequences with indeterminable alphabet provided")

    motif = Motif(alphabet=alph)

    instances = (''.join(z).upper() for z in zip(*[alignment[:, i] for i in columns]))
    for instance in instances:
        motif.add_instance(Seq(instance, alph))

    # set laplace = True to include the backgrounds
    pwm = _fix_ambigs(motif.pwm(laplace=False), alph)

    # heuristic to determine whether nucleotide or protein alphabet
    # need to use either base 4 or 20 depending
    alphlen, _alphkeys = max(((len(pwm[i]), pwm[i].keys()) for i in range(N)), key=itemgetter(0))
    s, colors = (4, _DNA_COLORS) if alphlen < 20 else (20, _AMINO_COLORS)
    alphkeys = ['']
    alphkeys.extend(_alphkeys)
    alphmap = dict(zip(alphkeys, range(len(alphkeys))))

    # compute the information content at each position
    maxbits = np.log2(s)
    e_n = (s - 1) / (2. * np.log(2) * M)
    R = maxbits * np.ones((N,), dtype=float)
    R -= [-sum(v * np.log2(v) for _, v in pwm[i].items() if v > 0.) for i in range(N)]
    R -= e_n

    heights = np.zeros((alphlen, N), dtype=float)
    identities = np.zeros((alphlen, N), dtype=int)

    for j in range(N):
        i = 0
        for k, v in sorted(pwm[j].items(), key=itemgetter(1)):
            heights[i, j] = R[j] * v
            identities[i, j] = alphmap[k]
            i += 1

    font = Basefont(join(_HY454_FONT_PATHS[0], 'Roboto-Black.ttf'))

    fig = plt.figure(figsize=figsize, dpi=dpi)

    # make each column a vertical golden rect
    rect = 0.2, 0.2, 0.382 * N, 0.618
    ax = fig.add_axes(rect)

    _adjust_spines_outward(ax, ('left',), 9)

    ax.set_ylabel('bits', fontproperties=_ROBOTO_REGULAR)

    if figsize is None:
        fig.set_figwidth(N)

    if transparent:
        fig.patch.set_alpha(0.)
        ax.patch.set_alpha(0.)

    # remove the top and right ticks
    for tick in ax.xaxis.get_major_ticks() + ax.yaxis.get_major_ticks():
        tick.tick2On = False

    # remove the bottom ticks
    for tick in ax.xaxis.get_major_ticks():
        tick.tick1On = False

    # rotate the x-axis labels by 45 degrees to enhance packing
    for label in ax.xaxis.get_ticklabels():
        label.set_rotation(45)

    # set font properties
    for label in ax.xaxis.get_ticklabels() + ax.yaxis.get_ticklabels():
        label.set_fontproperties(_ROBOTO_REGULAR)

    # disable top and right spines, we don't need them
    ax.spines['bottom'].set_visible(False)
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)

    def format_xlabel(x, pos=None):
        idx = np.clip(int(x)-1, 0, N-1)
        return labels[idx]

    ax.xaxis.set_major_formatter(FuncFormatter(format_xlabel))
    # avoid too much precision
    ax.yaxis.set_major_formatter(FormatStrFormatter('%1.1f'))

    # set the ticks
    ysep = 0.5 if alphlen < 20 else 1.0
    yticks = np.arange(0, maxbits, ysep, dtype=float)
    if maxbits - yticks[-1] < ysep:
        yticks[-1] = maxbits
    else:
        yticks = np.append(yticks, maxbits)
    ax.set_yticks(yticks)
    ax.set_xticks(np.arange(1, N+1, dtype=float) + 0.5)

    # set the axes limits here AFTER the ticks, otherwise borkage
    ax.set_xlim((1, N+1))
    ax.set_ylim((0, maxbits))

    idxs = np.arange(1, N+1)
    bottoms = np.zeros((N,), dtype=float)
    for i in range(alphlen):
        bars = ax.bar(idxs, heights[i, :], width=1., bottom=bottoms)
        bottoms += heights[i, :]
        for j, bar in enumerate(bars):
            if identities[i, j]:
                l = alphkeys[identities[i, j]]
                glyph = font[l]
                ax.add_patch(glyph)
                glyph.set_transform(bar.get_transform())
                bar.set_visible(False)
                glyph.set_edgecolor(edgecolor)
                glyph.set_facecolor(colors[l])
                glyph.set_linewidth(linewidth)
                glyph.set_zorder(-1)

    # set the remaining spine to show the maximum value
    ax.spines['left'].set_bounds(0, max(bottoms))

    fig.savefig(filename, format=format, transparent=transparent, bbox_inches='tight', pad_inches=0.25)

    return filename
Exemple #22
0
 def __init__(self):
     BaseMotif.__init__(self)
     self.references = []