def generate_base_weblogo(options, allsequences, output):
    import weblogolib as wl
    from weblogolib.colorscheme import ColorScheme, ColorGroup
    from corebio import seq_io
    from StringIO import StringIO

    ecliptic_color_scheme = ColorScheme([
        ColorGroup('A', iwork_colors.blue),
        ColorGroup('C', iwork_colors.green),
        ColorGroup('G', iwork_colors.yellow),
        ColorGroup('UT', iwork_colors.red),
    ])

    fastainput = StringIO(''.join('>%d\n%s\n' % s for s in enumerate(allsequences)))

    rna = wl.std_alphabets['rna']
    seqs = wl.read_seq_data(fastainput, alphabet=rna)
    logo = wl.LogoData.from_seqs(seqs)
    logooptions = wl.LogoOptions()
    logooptions.unit_name = 'probability'
    logooptions.color_scheme = ecliptic_color_scheme
    logooptions.show_fineprint = False
    logooptions.first_index = -options.flanking_window
    logooptions.title = "Ecliptic"

    format = wl.LogoFormat(logo, logooptions)
    wl.pdf_formatter(logo, format, open(output, 'w'))
Example #2
0
def printlogo(pwm, filename, alphabet="ACGT", mode="pdf"):
    myAlphabet = {"A": 0, "C": 1, "G": 2, "T": 3}
    translate = np.array([myAlphabet[i] for i in alphabet])
    pwm = pwm[:, translate]
    "Prints logo from nucleotides as a pdf"
    import cPickle
    cPickle.dump(pwm, open(filename + ".pkl", 'wb'), -1)
    import weblogolib as wl  # @UnresolvedImport
    PWMdata = np.array(pwm)
    data = wl.LogoData.from_counts(wl.std_alphabets["dna"], PWMdata)
    options = wl.LogoOptions(resolution=300)
    options.title = filename
    options.color_scheme = wl.colorscheme.nucleotide
    formatt = wl.LogoFormat(data, options)
    if mode == "pdf":
        fout = open(filename + ".pdf", 'wb')
        fout.write(wl.pdf_formatter(data, formatt))
    elif mode == "png":
        fout = open(filename + ".png", 'wb')
        fout.write(wl.png_formatter(data, formatt))
    else:
        fout = open(filename + ".{0}".format(mode), 'wb')
        exec("""fout.write(wl.{0}_formatter(data, format))""".format(mode))

    fout.close()
Example #3
0
def generate_logos(motifs, foldername, filetype='png'):
    options = wl.LogoOptions()
    options.color_scheme = wl.std_color_schemes["chemistry"]

    for i, motif in enumerate(motifs):
        if motif:
            my_format = wl.LogoFormat(motif.data, options)
            if 'png' in filetype:
                to_write = wl.png_print_formatter(motif.data, my_format)
                foo = open(
                    os.path.join(foldername,
                                 str(i) + '_' + str(len(motif.seqs)) + ".png"),
                    "wb")
                foo.write(to_write)
                foo.close()
            if 'pdf' in filetype:
                to_write = wl.pdf_formatter(motif.data, my_format)
                foo = open(
                    os.path.join(foldername,
                                 str(i) + '_' + str(len(motif.seqs)) + ".pdf"),
                    "wb")
                foo.write(to_write)
                foo.close()
            if 'txt' in filetype:
                to_write = ''.join(["%s\n" % str(seq) for seq in motif.seqs])
                foo = open(
                    os.path.join(foldername,
                                 str(i) + '_' + str(len(motif.seqs)) + ".txt"),
                    "w")
                foo.write(to_write)
                foo.close()
            else:
                raise ValueError(
                    'Invalid filetype. Available options: png, pdf or txt. ')
Example #4
0
def printlogo(pwm, filename, alphabet="ACGT", mode="pdf"):
    myAlphabet = {"A":0, "C":1, "G":2, "T":3}
    translate = np.array([myAlphabet[i] for i in alphabet])
    pwm = pwm[:, translate]
    "Prints logo from nucleotides as a pdf"
    import cPickle
    cPickle.dump(pwm, open(filename + ".pkl", 'wb'), -1)
    import weblogolib as wl  # @UnresolvedImport
    PWMdata = np.array(pwm)
    data = wl.LogoData.from_counts(wl.std_alphabets["dna"], PWMdata)
    options = wl.LogoOptions(resolution=300)
    options.title = filename
    options.color_scheme = wl.colorscheme.nucleotide
    formatt = wl.LogoFormat(data, options)
    if mode == "pdf":
        fout = open(filename + ".pdf", 'wb')
        fout.write(wl.pdf_formatter(data, formatt))
    elif mode == "png":
        fout = open(filename + ".png", 'wb')
        fout.write(wl.png_formatter(data, formatt))
    else:
        fout = open(filename + ".{0}".format(mode), 'wb')
        exec("""fout.write(wl.{0}_formatter(data, format))""".format(mode))

    fout.close()
def main():
    args = docopt.docopt(__doc__)
    root = args['<workspace>']
    round = args['<round>']
    output_path = args['<pdf_output>']

    # Right now I'm looking at validated designs by default, but the user may 
    # be interested in fixbb designs or restrained models as well.

    workspace = pipeline.ValidatedDesigns(root, round)
    workspace.check_paths()

    designs = [structures.Design(x) for x in workspace.output_subdirs]
    sequences = corebio.seq.SeqList(
            [corebio.seq.Seq(x.resfile_sequence) for x in designs],
            alphabet=corebio.seq.unambiguous_protein_alphabet,
    )

    logo_data = weblogo.LogoData.from_seqs(sequences)
    logo_options = weblogo.LogoOptions()
    logo_options.title = workspace.focus_dir
    logo_format = weblogo.LogoFormat(logo_data, logo_options)

    with open(output_path, 'wb') as logo_file:
        document = weblogo.pdf_formatter(logo_data, logo_format)
        logo_file.write(document)
Example #6
0
def mutect_weblogo_sub(sampN, inFileN, outFileN, pdfFileN):
    inFile = open(inFileN, 'r')
    inFile.readline()  #comment line
    headerL = inFile.readline().rstrip().split('\t')
    idxH = {}
    for i in range(len(headerL)):
        idxH[headerL[i]] = i

    outFile = open(outFileN, 'w')
    for line in inFile:
        colL = line.rstrip().split('\t')
        context = colL[idxH['context']]
        ref = colL[idxH['ref_allele']]
        alt = colL[idxH['alt_allele']]
        status = colL[idxH['judgement']]
        if status == 'REJECT':
            continue

        head = context[:3]
        tail = context[-3:]
        context = head + ref + tail
        if ref not in ['C', 'T']:
            context = mybasic.rc(context)
            ref = mybasic.rc(ref)
            alt = mybasic.rc(alt)

        if ref == 'C' and alt == 'T':  ## TMZ context only
            outFile.write('%s\n' % context)
    outFile.flush()
    outFile.close()

    fin = open(outFileN, 'r')
    seqs = weblogolib.read_seq_data(fin)
    data = weblogolib.LogoData.from_seqs(seqs)
    options = weblogolib.LogoOptions()
    options.show_fineprint = False
    options.first_index = -3
    options.logo_title = sampN
    format = weblogolib.LogoFormat(data, options)
    fout = open(pdfFileN, 'w')
    weblogolib.pdf_formatter(data, format, fout)
Example #7
0
def mutect_weblogo_sub(sampN, inFileN, outFileN, pdfFileN):
	inFile = open(inFileN, 'r')
	inFile.readline() #comment line
	headerL = inFile.readline().rstrip().split('\t')
	idxH = {}
	for i in range(len(headerL)):
		idxH[headerL[i]] = i

	outFile = open(outFileN,'w')
	for line in inFile:
		colL = line.rstrip().split('\t')
		context = colL[idxH['context']]
		ref = colL[idxH['ref_allele']]
		alt = colL[idxH['alt_allele']]
		status = colL[idxH['judgement']]
		if status == 'REJECT':
			continue

		head = context[:3]
		tail = context[-3:]
		context = head + ref + tail
		if ref not in ['C','T']:
			context = mybasic.rc(context)
			ref = mybasic.rc(ref)
			alt = mybasic.rc(alt)

		if ref == 'C' and alt == 'T':## TMZ context only
			outFile.write('%s\n' % context)
	outFile.flush()
	outFile.close()
	
	fin = open(outFileN,'r')
	seqs = weblogolib.read_seq_data(fin)
	data = weblogolib.LogoData.from_seqs(seqs)
	options = weblogolib.LogoOptions()
	options.show_fineprint = False
	options.first_index = -3
	options.logo_title = sampN
	format = weblogolib.LogoFormat(data, options)
	fout = open(pdfFileN, 'w')
	weblogolib.pdf_formatter(data, format, fout)
def plot_motif_from_sites( sites, img_format='png', smallText=None ):
    ldata = wl.LogoData.from_seqs(wl.SeqList(sites, wl.unambiguous_dna_alphabet))
    options = wl.LogoOptions()
    if smallText is not None:
        options.fineprint = smallText ##os.path.dirname(self.dbfile) + ' ' + self.organism
        format = wl.LogoFormat(ldata, options) 
        format.color_scheme = wl.classic
        format.resolution = 150
        if img_format == 'png':
            tmp = wl.png_formatter( ldata, format )
            output = cStringIO.StringIO(tmp)
            img = mpimg.imread(output)
            plt.axis('off')
            imgplot = plt.imshow( img )
            return plt
        elif img_format == 'svg':
            tmp = wl.svg_formatter( ldata, format )
            return tmp
        elif img_format == 'pdf':
            tmp = wl.pdf_formatter( ldata, format )
            return tmp
Example #9
0
def plot_motif_from_sites( sites, img_format='png', smallText=None ):
    ldata = wl.LogoData.from_seqs(wl.SeqList(sites, wl.unambiguous_dna_alphabet))
    options = wl.LogoOptions()
    if smallText is not None:
        options.fineprint = smallText ##os.path.dirname(self.dbfile) + ' ' + self.organism
        format = wl.LogoFormat(ldata, options) 
        format.color_scheme = wl.classic
        format.resolution = 150
        if img_format == 'png':
            tmp = wl.png_formatter( ldata, format )
            output = cStringIO.StringIO(tmp)
            img = mpimg.imread(output)
            plt.axis('off')
            imgplot = plt.imshow( img )
            return plt
        elif img_format == 'svg':
            tmp = wl.svg_formatter( ldata, format )
            return tmp
        elif img_format == 'pdf':
            tmp = wl.pdf_formatter( ldata, format )
            return tmp
Example #10
0
    def do_seqlogo(self, input_fasta, output_seqlogo, col_list):
        """
        create sequence logo of given fasta file and then label it with col_list
        """

        import weblogolib
        import corebio
        from weblogolib.colorscheme import ColorScheme
        from weblogolib.colorscheme import ColorGroup
        #assigning colors
        hydrophobicity = ColorScheme([
            ColorGroup("RKDENQ", "blue"),
            ColorGroup("SGHTAP", "green"),
            ColorGroup("YVMCLFIW",  "black"),
            ColorGroup("O",  "dark orange")
        ])

        #Here i changed this so that the column begins with 1 not 0
        col_list2 = [i+1 for i in col_list]
        fin = open(input_fasta)
        seqs = weblogolib.read_seq_data(fin)
        seqs.alphabet = corebio.seq.Alphabet('ACDEFGHIKLMNPQRSTVWYO')
        data = weblogolib.LogoData.from_seqs(seqs)
        data.composition = 'equiprobable'
        options = weblogolib.LogoOptions()
        options.yaxis_scale = 4.5
        options.yaxis_tic_interval = 2.25
        options.annotate = col_list2
        options.stack_width = 30
        options.stacks_per_line = 35
        options.scale_width = 'No'
        options.color_scheme = hydrophobicity
        format_logo = weblogolib.LogoFormat(data, options)
        fout = open(output_seqlogo, 'w')
        pdf_logo = weblogolib.pdf_formatter(data, format_logo)
        fout.write(pdf_logo)
        fout.close()
Example #11
0
def draw_logo(df,seq_dict,inv_dict,dicttype):
    '''Draw logo of sequences.'''
    
    #Set Logo options
    '''stack width in points, not default size of 10.8, but set to this 
        in weblogo call below'''
    stackwidth = 9.5 
    barheight = 5.5 # height of bars in points if using overlay
    barspacing = 2.0 # spacing between bars in points if using overlay
    '''ratio of stack height:width, doesn't count part going 
        over maximum value of 1'''
    stackaspectratio = 4.4 
    ymax = 1.0 
    logo_options = weblogolib.LogoOptions()
    logo_options.fineprint = None
    #logo_options.stacks_per_line = nperline
    logo_options.stack_aspect_ratio = stackaspectratio
    logo_options.show_errorbars = True
    
    logo_options.errorbar_fraction = .75
    logo_options.errorbar_gray = .9
    logo_options.errorbar_width_fraction = .9
    logo_options.stack_width = stackwidth
    #Command to uncomment if you want each column to have height = 1
    #logo_options.unit_name = 'probability'
    logo_options.show_yaxis = False
    #logo_options.yaxis_scale = ymax 

    #for dna
    if dicttype == 'dna':
        al = weblogolib.unambiguous_dna_alphabet
        column_headers = ['freq_' + inv_dict[i] for i in range(len(seq_dict))]
        counts_arr = np.array(df[column_headers])
        data = weblogolib.LogoData.from_counts(al,counts_arr)
    
    

    
    
        colormapping = {}
        colormapping['A'] = '#008000'
        colormapping['T'] = '#FF0000'
        colormapping['C'] = '#0000FF'
        colormapping['G'] = '#FFA500'
    
    
        color_scheme = weblogolib.colorscheme.ColorScheme()
    
        for x in [inv_dict[i] for i in range(len(seq_dict))]:
           if hasattr(color_scheme, 'rules'):
                    color_scheme.rules.append(weblogolib.colorscheme.SymbolColor(x, colormapping[x], "'%s'" % x))
           else:
                    # this part is needed for weblogo 3.4
                    color_scheme.groups.append(
                        weblogolib.colorscheme.ColorGroup(x, colormapping[x], "'%s'" % x))
        logo_options.color_scheme = color_scheme
    #for protein
    if dicttype == 'protein':
        al = weblogolib.unambiguous_protein_alphabet
        column_headers = ['freq_' + inv_dict[i] for i in range(len(seq_dict))]
        counts_arr = np.array(df[column_headers])
        data = weblogolib.LogoData.from_counts(al,counts_arr)    
    #for rna
    if dicttype == 'rna':
        al = weblogolib.unambiguous_rna_alphabet
        column_headers = ['freq_' + inv_dict[i] for i in range(len(seq_dict))]
        counts_arr = np.array(df[column_headers])
        data = weblogolib.LogoData.from_counts(al,counts_arr)

    #set logo format and output
    myformat = weblogolib.LogoFormat(data,logo_options)
    myimage = weblogolib.pdf_formatter(data,myformat)   
    return myimage
Example #12
0
import weblogolib as wll
import pickle

seqs = ['attcgtgatagctgtcgtaaag','ttttgttacctgcctctaactt','aagtgtgacgccgtgcaaataa','tgccgtgattatagacactttt','atttgcgatgcgtcgcgcattt','taatgagattcagatcacatat','taatgtgacgtcctttgcatac','gaaggcgacctgggtcatgctg','aggtgttaaattgatcacgttt','cgatgcgaggcggatcgaaaaa','aaattcaatattcatcacactt','agatgtgagccagctcaccata','agatgtgattagattattattc','aattgtgatgtgtatcgaagtg','ttatttgaaccagatcgcatta','aaatgtaagctgtgccacgttt','aagtgtgacatggaataaatta','ttgtttgatttcgcgcatattc','aaacgtgatttcatgcgtcatt','atgtgtgcggcaattcacattt','taatgttatacatatcactcta','ttttatgacgaggcacacacat','aagttcgatatttctcgttttt','ttttgcgatcaaaataacactt','aaacgtgatcaacccctcaatt','taatgtgagttagctcactcat','aattgtgagcggataacaattt','ttgtgtgatctctgttacagaa','TAAtgtggagatgcgcacaTAA','TTTtgcaagcaacatcacgAAA','GACctcggtttagttcacaGAA','aattgtgacacagtgcaaattc','aaccgtgctcccactcgcagtc','TCTTGTGATTCAGATCACAAAG','ttttgtgagttttgtcaccaaa','aaatgttatccacatcacaatt','ttatttgccacaggtaacaaaa','atgcctgacggagttcacactt','taacgtgatcatatcaacagaa','Ttttgtggcctgcttcaaactt','ttttatgatttggttcaattct','aattgtgaacatcatcacgttc','ttttgtgatctgtttaaatgtt','agaggtgattttgatcacggaa','atttgtgagtggtcgcacatat','gattgtgattcgattcacattt','gtgtgtaaacgtgaacgcaatc','aactgtgaaacgaaacatattt','TCTTGTGATGTGGTTAACCAAT']

#fIn = open('/Users/biggus/SourceCodeEtc/weblogo-3.0/tests/data/cap.fas')
#seqs = wll.read_seq_data(fIn)


for i in range(len(seqs)):
    seqs[i] = wll.Seq(seqs[i])
    
seqs = wll.SeqList(seqs)
seqs.alphabet = wll.which_alphabet(seqs)

data = wll.LogoData.from_seqs(seqs)
#pickle.dump(data,open('/Users/biggus/SourceCodeEtc/weblogo-3.0/tests/out/fromList.data.pkl','w'))
#exit('Just Dumped List Pkl')
options = wll.LogoOptions()
options.title = 'A Logo Title'
format = wll.LogoFormat(data, options)
fout = open('/Users/biggus/SourceCodeEtc/weblogo-3.0/tests/out/cap2.xmpl.pdf', 'w')
wll.pdf_formatter( data, format, fout)
None
print 'done'