Ejemplo n.º 1
0
def logo_from_frequency(data, output_file):
    # must have integer data only
    array_data = wl.array(data)
    data = wl.LogoData.from_counts("ACGU", array_data)
    options = wl.LogoOptions()
    options.color_scheme = wl.colorscheme.nucleotide
    options.unit_name = "probability"
    options.fineprint = ""
    options.creator_text = ""

    format = wl.LogoFormat(data, options)
    with open(output_file, "w") as f:
        wl.png_formatter(data, format, f)
Ejemplo n.º 2
0
def logo(dist, tag, dir):
    "Generate a logo with the given tag in the given directory."
    import weblogolib as W
    import corebio.seq as S

    data = W.LogoData.from_counts(S.unambiguous_dna_alphabet, dist)
    options = W.LogoOptions(
        logo_title=tag, color_scheme=W.colorscheme.nucleotide, show_xaxis=False, show_yaxis=True, show_fineprint=False
    )
    format = W.LogoFormat(data, options)
    filename = "logo-%s" % tag
    # W.eps_formatter(data, format, open(os.path.join(dir, '%s.eps' % filename), 'w'))
    W.png_formatter(data, format, open(os.path.join(dir, "%s.png" % filename), "w"))
Ejemplo n.º 3
0
def build_logo(sequences, outfilename):

    seqs = corebio.seq.SeqList(alphabet=corebio.seq.dna_alphabet)
    for sequence in sequences:
        seqs.append(corebio.seq.dna(re.sub("\s", "-", sequence)))

    data = weblogolib.LogoData.from_seqs(seqs)
    options = weblogolib.LogoOptions()
    options.color_scheme = weblogolib.classic

    options.title = 'A Logo Title'
    format = weblogolib.LogoFormat(data, options)
    fout = open(outfilename, 'w')
    weblogolib.png_formatter(data, format, fout)
Ejemplo n.º 4
0
def build_logo(sequences, outfilename):

    seqs = corebio.seq.SeqList(alphabet=corebio.seq.dna_alphabet)
    for sequence in sequences:
        seqs.append(corebio.seq.dna(re.sub("\s", "-", sequence)))

    data = weblogolib.LogoData.from_seqs(seqs)
    options = weblogolib.LogoOptions()
    options.color_scheme = weblogolib.classic

    options.title = 'A Logo Title'
    format = weblogolib.LogoFormat(data, options)
    fout = open(outfilename, 'w')
    weblogolib.png_formatter(data, format, fout)
Ejemplo n.º 5
0
    def draw_logo(self, filename, title=None):
        '''
        Draws a sequence logo from the PWM
        
        Requires weblogolib is available.
        
        Does not respect typical glbase config. Particularly config.draw_mode (Output is always a png)
        
        **Arguments**
            filename
                The filename to save the image to.
                
            title (Optional, default=the pwm name)
                A title for the 
        '''
        assert filename, "pwm.draw_logo: You must specify a filename"

        # see if weblogo is available.
        try:
            import weblogolib
            WEBLOGO_AVAILABLE = True
        except Exception:
            WEBLOGO_AVAILABLE = False  # fail silently
            raise AssertionError(
                'pwm.draw_logo: Asking to draw logo, but weblogolib not found/available'
            )

        if not title:
            title = self.name

        data = weblogolib.LogoData.from_counts("ACGT", self.__original_PFM)

        options = weblogolib.LogoOptions()
        options.logo_title = title
        options.title_fontsize = 4
        options.resolution = 200
        options.show_xaxis = True
        options.show_yaxis = True
        options.scale_width = False
        #options.logo_label = "motif: %s" % name
        options.fineprint = False
        options.color_scheme = weblogolib.std_color_schemes["base pairing"]
        format = weblogolib.LogoFormat(data, options)

        out = open(filename, "wb")  # stick it in the parent dir
        weblogolib.png_formatter(data, format, out)
        out.close()
        config.log.info("pwm.draw_logo: Saved '%s' logo" % filename)
Ejemplo n.º 6
0
def seqlogo(source_name, list_weights):
    """

    :param source_name: string source name for folder name
    :param list_weights: list of weights from different layers
    :return: sequence logos in folders for all filters
    """

    for l_ in range(len(list_weights)):
        layer_w = np.moveaxis(list_weights[l_], 2, 0)
        length = layer_w.shape[1]
        path = './' + source_name + '_' + str(length) + '/'
        os.makedirs(path[:-1], exist_ok=True)

        for w_ in range(layer_w.shape[0]):
            data = weblogolib.LogoData.from_counts(
                'ACGT', weights_modification(layer_w[w_]))
            options = weblogolib.LogoOptions(
                fineprint=False,
                logo_title='',
                color_scheme=weblogolib.classic,
                stack_width=weblogolib.std_sizes["large"],
                logo_start=1,
                logo_end=length,
                resolution=600)
            logo_format = weblogolib.LogoFormat(data, options)
            f = open(path + str(w_ + 1) + '.png', 'wb')
            f.write(weblogolib.png_formatter(data, logo_format))
            f.close()
    def create_logo(self, seqs=[]):
        """Create sequence logo for input sequences."""
        # seperate headers
        headers, instances = [list(x)
                              for x in zip(*seqs)]

        if self.options.sequence_type is 'rna':
            alphabet = Alphabet('ACGU')
        elif self.options.sequence_type is 'protein':
            alphabet = Alphabet('ACDEFGHIKLMNPQRSTVWY')
        else:
            alphabet = Alphabet('AGCT')
        motif_corebio = SeqList(alist=instances, alphabet=alphabet)
        data = wbl.LogoData().from_seqs(motif_corebio)

        format = wbl.LogoFormat(data, self.options)

        if self.output_format == 'png':
            return wbl.png_formatter(data, format)
        elif self.output_format == 'png_print':
            return wbl.png_print_formatter(data, format)
        elif self.output_format == 'jpeg':
            return wbl.jpeg_formatter(data, format)
        else:
            return wbl.eps_formatter(data, format)
Ejemplo n.º 8
0
def printlogo(pwm, filename, alphabet="ACGT", mode="pdf"):
    myAlphabet = {"A":0, "C":1, "G":2, "T":3}
    translate = np.array([myAlphabet[i] for i in alphabet])
    pwm = pwm[:, translate]
    "Prints logo from nucleotides as a pdf"
    import cPickle
    cPickle.dump(pwm, open(filename + ".pkl", 'wb'), -1)
    import weblogolib as wl  # @UnresolvedImport
    PWMdata = np.array(pwm)
    data = wl.LogoData.from_counts(wl.std_alphabets["dna"], PWMdata)
    options = wl.LogoOptions(resolution=300)
    options.title = filename
    options.color_scheme = wl.colorscheme.nucleotide
    formatt = wl.LogoFormat(data, options)
    if mode == "pdf":
        fout = open(filename + ".pdf", 'wb')
        fout.write(wl.pdf_formatter(data, formatt))
    elif mode == "png":
        fout = open(filename + ".png", 'wb')
        fout.write(wl.png_formatter(data, formatt))
    else:
        fout = open(filename + ".{0}".format(mode), 'wb')
        exec("""fout.write(wl.{0}_formatter(data, format))""".format(mode))

    fout.close()
Ejemplo n.º 9
0
    def create_logo(self, seqs=[]):
        """Create sequence logo for input sequences."""
        # seperate headers
        headers, instances = [list(x)
                              for x in zip(*seqs)]

        if self.options.sequence_type is 'rna':
            alphabet = Alphabet('ACGU')
        elif self.options.sequence_type is 'protein':
            alphabet = Alphabet('ACDEFGHIKLMNPQRSTVWY')
        else:
            alphabet = Alphabet('AGCT')
        motif_corebio = SeqList(alist=instances, alphabet=alphabet)
        data = wbl.LogoData().from_seqs(motif_corebio)

        format = wbl.LogoFormat(data, self.options)

        if self.output_format == 'png':
            return wbl.png_formatter(data, format)
        elif self.output_format == 'png_print':
            return wbl.png_print_formatter(data, format)
        elif self.output_format == 'jpeg':
            return wbl.jpeg_formatter(data, format)
        else:
            return wbl.eps_formatter(data, format)
Ejemplo n.º 10
0
def printlogo(pwm, filename, alphabet="ACGT", mode="pdf"):
    myAlphabet = {"A": 0, "C": 1, "G": 2, "T": 3}
    translate = np.array([myAlphabet[i] for i in alphabet])
    pwm = pwm[:, translate]
    "Prints logo from nucleotides as a pdf"
    import cPickle
    cPickle.dump(pwm, open(filename + ".pkl", 'wb'), -1)
    import weblogolib as wl  # @UnresolvedImport
    PWMdata = np.array(pwm)
    data = wl.LogoData.from_counts(wl.std_alphabets["dna"], PWMdata)
    options = wl.LogoOptions(resolution=300)
    options.title = filename
    options.color_scheme = wl.colorscheme.nucleotide
    formatt = wl.LogoFormat(data, options)
    if mode == "pdf":
        fout = open(filename + ".pdf", 'wb')
        fout.write(wl.pdf_formatter(data, formatt))
    elif mode == "png":
        fout = open(filename + ".png", 'wb')
        fout.write(wl.png_formatter(data, formatt))
    else:
        fout = open(filename + ".{0}".format(mode), 'wb')
        exec("""fout.write(wl.{0}_formatter(data, format))""".format(mode))

    fout.close()
Ejemplo n.º 11
0
def logo(dist, tag, dir):
    "Generate a logo with the given tag in the given directory."
    import weblogolib as W
    import corebio.seq as S
    data = W.LogoData.from_counts(S.unambiguous_dna_alphabet, dist)
    options = W.LogoOptions(
        logo_title=tag,
        color_scheme=W.colorscheme.nucleotide,
        show_xaxis=False,
        show_yaxis=True,
        show_fineprint=False,
    )
    format = W.LogoFormat(data, options)
    filename = 'logo-%s' % tag
    #W.eps_formatter(data, format, open(os.path.join(dir, '%s.eps' % filename), 'w'))
    W.png_formatter(data, format,
                    open(os.path.join(dir, '%s.png' % filename), 'w'))
Ejemplo n.º 12
0
def outputMotif(theta_motif, theta_background_matrix, lambda_motif, logev, k, outstr):
    from weblogolib import LogoData, LogoOptions, LogoFormat, png_formatter, eps_formatter, unambiguous_dna_alphabet
    _pv_format = "%3.1fe%+04.0f"
    f_string = sprint_logx(log(lambda_motif), 1, _pv_format)
    g_string = sprint_logx(logev, 1, _pv_format)
    print(("Motif {0:s} had a fraction of {1:s}").format(str(k), f_string))
    print(("Motif {0:s} had an E-value of {1:s}").format(str(k), g_string))
    print 'Saving motif as a png...'
    data = LogoData.from_counts(counts=theta_motif,alphabet=unambiguous_dna_alphabet)#,prior=theta_background_matrix[0])#Does prior mess things up?
    options = LogoOptions()
    options.title = 'Motif'
    forma = LogoFormat(data, options)
    fout = open(outstr + "Motif_" + str(k) + '.png', 'w')
    png_formatter(data, forma, fout)
    fout.close()
    print 'Saving motif as an eps...'
    fout = open(outstr + "Motif_" + str(k) + '.eps', 'w')
    eps_formatter(data, forma, fout)
    fout.close()
Ejemplo n.º 13
0
def outputMotif(theta_motif, theta_background_matrix, lambda_motif, logev, k, outstr):
    from weblogolib import LogoData, LogoOptions, LogoFormat, png_formatter, eps_formatter, unambiguous_dna_alphabet
    _pv_format = "%3.1fe%+04.0f"
    f_string = sprint_logx(log(lambda_motif), 1, _pv_format)
    g_string = sprint_logx(logev, 1, _pv_format)
    print(("Motif {0:s} had a fraction of {1:s}").format(str(k), f_string))
    print(("Motif {0:s} had an E-value of {1:s}").format(str(k), g_string))
    print 'Saving motif as a png...'
    data = LogoData.from_counts(counts=theta_motif,alphabet=unambiguous_dna_alphabet)#,prior=theta_background_matrix[0])#Does prior mess things up?
    options = LogoOptions()
    options.title = 'Motif'
    forma = LogoFormat(data, options)
    fout = open(outstr + "Motif_" + str(k) + '.png', 'w')
    png_formatter(data, forma, fout)
    fout.close()
    print 'Saving motif as an eps...'
    fout = open(outstr + "Motif_" + str(k) + '.eps', 'w')
    eps_formatter(data, forma, fout)
    fout.close()
Ejemplo n.º 14
0
def make_logo(fasta_path, png_path, title, start_pos, counts = None):
    
    if counts:
        mat_counts = np.array([counts[l] for l in 'ACGT']).transpose()
        data = weblogolib.LogoData.from_counts(unambiguous_dna_alphabet, mat_counts)
    else:
        with open(fasta_path) as handle:
            seqs = weblogolib.read_seq_data(handle)
            data = weblogolib.LogoData.from_seqs(seqs)
    
    options = weblogolib.LogoOptions()
    options.logo_title = title
    options.resolution = 500
    options.first_index = start_pos
    options.number_interval = 1
    options.rotate_numbers = True
    options.color_scheme = nucleotide
    fmt = weblogolib.LogoFormat(data, options)
    with open(png_path, 'w') as handle:
        weblogolib.png_formatter(data, fmt, handle)
Ejemplo n.º 15
0
def logo(dist, tag, make_png=False, make_eps=True, write_title=True):
    "Generate a logo with the given tag in the given directory."
    import weblogolib as W
    import corebio.seq
    data = W.LogoData.from_counts(corebio.seq.unambiguous_dna_alphabet, dist)
    scale = 5.4 * 4
    options = W.LogoOptions(
        logo_title=write_title and tag or None,
        stack_width=scale,
        stack_aspect_ratio=5,
        color_scheme=W.colorscheme.nucleotide,
        show_xaxis=False,
        show_yaxis=False,
        show_fineprint=False,
    )
    format_ = W.LogoFormat(data, options)
    filename = 'logo-%s' % tag
    if make_eps:
        W.eps_formatter(data, format_, open('%s.eps' % filename, 'w'))
    if make_png:
        W.png_formatter(data, format_, open('%s.png' % filename, 'w'))
Ejemplo n.º 16
0
def plot_motif_from_sites( sites, img_format='png', smallText=None ):
    ldata = wl.LogoData.from_seqs(wl.SeqList(sites, wl.unambiguous_dna_alphabet))
    options = wl.LogoOptions()
    if smallText is not None:
        options.fineprint = smallText ##os.path.dirname(self.dbfile) + ' ' + self.organism
        format = wl.LogoFormat(ldata, options) 
        format.color_scheme = wl.classic
        format.resolution = 150
        if img_format == 'png':
            tmp = wl.png_formatter( ldata, format )
            output = cStringIO.StringIO(tmp)
            img = mpimg.imread(output)
            plt.axis('off')
            imgplot = plt.imshow( img )
            return plt
        elif img_format == 'svg':
            tmp = wl.svg_formatter( ldata, format )
            return tmp
        elif img_format == 'pdf':
            tmp = wl.pdf_formatter( ldata, format )
            return tmp
Ejemplo n.º 17
0
def plot_motif_from_sites( sites, img_format='png', smallText=None ):
    ldata = wl.LogoData.from_seqs(wl.SeqList(sites, wl.unambiguous_dna_alphabet))
    options = wl.LogoOptions()
    if smallText is not None:
        options.fineprint = smallText ##os.path.dirname(self.dbfile) + ' ' + self.organism
        format = wl.LogoFormat(ldata, options) 
        format.color_scheme = wl.classic
        format.resolution = 150
        if img_format == 'png':
            tmp = wl.png_formatter( ldata, format )
            output = cStringIO.StringIO(tmp)
            img = mpimg.imread(output)
            plt.axis('off')
            imgplot = plt.imshow( img )
            return plt
        elif img_format == 'svg':
            tmp = wl.svg_formatter( ldata, format )
            return tmp
        elif img_format == 'pdf':
            tmp = wl.pdf_formatter( ldata, format )
            return tmp
Ejemplo n.º 18
0
    def plot_motif(self, cluster_num, motif_num, img_format='png'):
        #conn = sql3.connect(self.dbfile)
        #cursor = conn.cursor()
        #cursor.execute('select max(iteration) from motif_infos')
        #iteration = cursor.fetchone()[0]

        #query = 'select rowid from motif_infos where iteration=? and cluster=? and motif_num=?'
        #params = [self.iteration, cluster_num, motif_num]
        #cursor.execute(query, params)
        #rowid = cursor.fetchone()[0]
        #mot_info = pd.read_sql('select * from motif_infos where rowid=?', conn, params=[rowid])
        #mot_sites = pd.read_sql('select * from meme_motif_sites where motif_info_id=?', conn, params=[rowid])

        #motif_infos = self.tables['motif_infos']
        #rowid = motif_infos[(motif_infos.iteration==self.iteration) &
        #                    (motif_infos.cluster==cluster_num) & (motif_infos.motif_num==motif_num)].index.values[0]+1
        rowid = self.__get_motif_id(cluster_num, motif_num)
        mot_sites = self.tables['meme_motif_sites'][
            self.tables['meme_motif_sites'].motif_info_id == rowid]

        ldata = wl.LogoData.from_seqs(
            wl.SeqList(mot_sites.seq.values.tolist(),
                       wl.unambiguous_dna_alphabet))
        options = wl.LogoOptions()
        options.fineprint = os.path.dirname(
            self.dbfile) + ' %03d %03d' % (cluster_num, motif_num)
        format = wl.LogoFormat(ldata, options)
        format.color_scheme = wl.classic
        format.resolution = 150
        if img_format == 'png':
            tmp = wl.png_formatter(ldata, format)
            output = cStringIO.StringIO(tmp)
            img = mpimg.imread(output)
            plt.axis('off')
            imgplot = plt.imshow(img)
            #plt.show()
            return plt
        elif img_format == 'svg':
            tmp = wl.svg_formatter(ldata, format)
            return tmp
Ejemplo n.º 19
0
def logo(dist, tag, make_png=True, make_eps=False, write_title=True):
    "Generate a logo named with the given tag."
    import corebio.seq
    import weblogolib as W
    if tuple(map(int, W.__version__.split('.'))) < (3, 4):
        raise ValueError('weblogolib version 3.4 or higher required')
    data = W.LogoData.from_counts(corebio.seq.unambiguous_dna_alphabet, dist)
    scale = 5.4 * 4
    options = W.LogoOptions(
        logo_title=write_title and tag or None,
        stack_width=scale,
        stack_aspect_ratio=5,
        color_scheme=W.colorscheme.nucleotide,
        show_xaxis=False,
        show_yaxis=False,
        show_fineprint=False,
    )
    format_ = W.LogoFormat(data, options)
    filename = 'logo-%s' % tag
    if make_eps:
        open('%s.eps' % filename, 'w').write(W.eps_formatter(data, format_))
    if make_png:
        open('%s.png' % filename, 'w').write(W.png_formatter(data, format_))