Exemple #1
0
def generate_logo(seqfile, title):
    '''
    Generate the sequence logo from the specified sequences.

    Args:
        seqfile (str): The path to a sequence file.

    '''
    with open(seqfile, 'r') as fh:
        seqlen = len(fh.readline().rstrip('\n'))
        fh.seek(0)
        seqs = wl.read_seq_data(fh)

    data = wl.LogoData.from_seqs(seqs)

    options = wl.LogoOptions()
    options.title = title
    options.fineprint = ''
    #options.stack_width = 16

    options.first_index = -1 * int(seqlen / 2)

    form = wl.LogoFormat(data, options)

    eps = wl.eps_formatter(data, form)
    eps_file = seqfile[:-4] + '.eps'

    with open(eps_file, 'wb') as fh:
        fh.write(eps)
def generate_base_weblogo(options, allsequences, output):
    import weblogolib as wl
    from weblogolib.colorscheme import ColorScheme, ColorGroup
    from corebio import seq_io
    from StringIO import StringIO

    ecliptic_color_scheme = ColorScheme([
        ColorGroup('A', iwork_colors.blue),
        ColorGroup('C', iwork_colors.green),
        ColorGroup('G', iwork_colors.yellow),
        ColorGroup('UT', iwork_colors.red),
    ])

    fastainput = StringIO(''.join('>%d\n%s\n' % s for s in enumerate(allsequences)))

    rna = wl.std_alphabets['rna']
    seqs = wl.read_seq_data(fastainput, alphabet=rna)
    logo = wl.LogoData.from_seqs(seqs)
    logooptions = wl.LogoOptions()
    logooptions.unit_name = 'probability'
    logooptions.color_scheme = ecliptic_color_scheme
    logooptions.show_fineprint = False
    logooptions.first_index = -options.flanking_window
    logooptions.title = "Ecliptic"

    format = wl.LogoFormat(logo, logooptions)
    wl.pdf_formatter(logo, format, open(output, 'w'))
Exemple #3
0
def readSequence(f, ip):
    if f == sys.stdin:
        fin = StringIO(sys.stdin.read())
    else:
       fin = open(f, "r")
    seqs = read_seq_data(fin, ip)
    if seqs.alphabet != unambiguous_protein_alphabet:
        raise Exception("input sequences should be protein seqeunces")
    return seqs
    def runCalculation(self):
        """
        Load the fasta file and run the sequence entropy calcluation.
        """   
 
        # Calculate the sequence entropy of each column in a fasta file
        f = open(self.fasta_file,'r')
        self.data = wl.LogoData.from_seqs(wl.read_seq_data(f)) 
        f.close()
    def runCalculation(self):
        """
        Load the fasta file and run the sequence entropy calcluation.
        """

        # Calculate the sequence entropy of each column in a fasta file
        f = open(self.fasta_file, 'r')
        self.data = wl.LogoData.from_seqs(wl.read_seq_data(f))
        f.close()
def generate_weblogo(seq_id, seq_strs):
    seqs = weblogolib.read_seq_data(io.StringIO('\n'.join(seq_strs)),
                                    input_parser=array_io.read)
    data = weblogolib.LogoData.from_seqs(seqs)
    options = weblogolib.LogoOptions()
    options.title = seq_id
    options.unit_name = "probability"
    format = weblogolib.LogoFormat(data, options)
    eps = weblogolib.eps_formatter(data, format)
    with open(os.path.join(WEBLOGO_PATH, seq_id + '.eps'), 'wb') as f:
        f.write(eps)
Exemple #7
0
def generate_weblogo(file_path, seq_strs, unit_name=None):
    seqs = weblogolib.read_seq_data(
        io.StringIO('\n'.join(seq_strs)),
        input_parser=array_io.read,
        alphabet=corebio_seq.reduced_protein_alphabet)
    data = weblogolib.LogoData.from_seqs(seqs)
    options = weblogolib.LogoOptions(color_scheme=get_color_scheme())
    if unit_name is not None:
        options.unit_name = unit_name
    format = weblogolib.LogoFormat(data, options)
    eps = weblogolib.eps_formatter(data, format)
    with open(file_path, 'wb') as f:
        f.write(eps)
Exemple #8
0
def create_logo(input_seqs_fname, logo_fname, options):
    """
    Create a logo plot png using weblogo from a fasta created with write_logo_input()
    """
    with open(input_seqs_fname, "rU") as f:
        seqs = w.read_seq_data(f)
    data = w.LogoData.from_seqs(seqs)

    subprocess.check_call(("rm " + input_seqs_fname).split())

    format = w.LogoFormat(data, options)
    with open(logo_fname, "w") as f:
        f.write(w.png_print_formatter(data, format))
Exemple #9
0
def read_logodata(handle):
    """Get weblogo data for a sequence alignment.

    Returns a list of tuples: (posn, letter_counts, entropy, weight)
    """
    seqs = weblogolib.read_seq_data(handle,
                                    alphabet=unambiguous_protein_alphabet)
    ldata = weblogolib.LogoData.from_seqs(seqs)
    letters = ldata.alphabet.letters()
    counts = ldata.counts.array
    logodata = []
    for i, coldata, entropy, weight in zip(range(len(counts)), counts,
                                           ldata.entropy, ldata.weight):
        cnts = dict((let, int(cnt))
                    for let, cnt in zip(letters, coldata))
        logodata.append((i + 1, cnts, entropy, weight))
    return logodata
Exemple #10
0
def _build_logodata_core(fin, fin_compos, fin_weight, options, second_data=None):
    motif_flag = False
    isCodon = False

    try:
        # Try reading data in transfac format first.
        from corebio.matrix import Motif

        motif = Motif.read_transfac(fin, alphabet=options.alphabet)
        motif_flag = True
    except ValueError, motif_err:
        # Failed reading Motif, try reading as multiple sequence data.
        if options.alphabet is True:
            isCodon = True
            options.alphabet = None
        seqs = read_seq_data(
            fin, options.input_parser.read, alphabet=options.alphabet, ignore_lower_case=options.ignore_lower_case
        )
Exemple #11
0
def _build_logodata(options):
    motif_flag = False

    fin = options.fin
    if fin is None:
        from StringIO import StringIO
        fin = StringIO(sys.stdin.read())

    try:
        # Try reading data in transfac format first.
        from corebio.matrix import Motif
        motif = Motif.read_transfac(fin, alphabet=options.alphabet)
        motif_flag = True
    except ValueError, motif_err:
        # Failed reading Motif, try reading as multiple sequence data.
        seqs = read_seq_data(fin,
                             options.input_parser.read,
                             alphabet=options.alphabet,
                             ignore_lower_case=options.ignore_lower_case)
Exemple #12
0
def _build_logodata(options):

    if options.input_parser != "transfac":
        seqs = read_seq_data(options.fin,
                             options.input_parser.read,
                             alphabet=options.alphabet,
                             ignore_lower_case=options.ignore_lower_case)

        if options.reverse:
            seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet)

        if options.complement:
            seqs = SeqList([Seq(s, seqs.alphabet).complement() for s in seqs],
                           seqs.alphabet)

        prior = parse_prior(options.composition, seqs.alphabet, options.weight)
        data = LogoData.from_seqs(seqs, prior)

    else:
        from corebio.matrix import Motif

        if options.ignore_lower_case:
            raise ValueError(
                "error: option --ignore-lower-case incompatible with matrix input"
            )

        #FIXME : implement
        if options.reverse:
            raise ValueError(
                "error: option --reverse incompatible with matrix input")

        #FIXME : implement
        if options.complement:
            raise ValueError(
                "error: option --complement incompatible with matrix input")

        motif = Motif.read_transfac(options.fin, alphabet=options.alphabet)
        prior = parse_prior(options.composition, motif.alphabet,
                            options.weight)
        data = LogoData.from_counts(motif.alphabet, motif, prior)

    return data
Exemple #13
0
def _build_logodata(options) :
    motif_flag=False
    
    fin = options.fin;
    if fin is None : 
        from StringIO import StringIO 
        fin = StringIO(sys.stdin.read())
    
   
    try:
        # Try reading data in transfac format first.     
        from corebio.matrix import Motif
        motif = Motif.read_transfac(fin, alphabet=options.alphabet)
        motif_flag = True
    except ValueError, motif_err :
        # Failed reading Motif, try reading as multiple sequence data.
        seqs = read_seq_data(fin, 
            options.input_parser.read,
            alphabet=options.alphabet,
            ignore_lower_case = options.ignore_lower_case)   
def make_logo(fasta_path, png_path, title, start_pos, counts = None):
    
    if counts:
        mat_counts = np.array([counts[l] for l in 'ACGT']).transpose()
        data = weblogolib.LogoData.from_counts(unambiguous_dna_alphabet, mat_counts)
    else:
        with open(fasta_path) as handle:
            seqs = weblogolib.read_seq_data(handle)
            data = weblogolib.LogoData.from_seqs(seqs)
    
    options = weblogolib.LogoOptions()
    options.logo_title = title
    options.resolution = 500
    options.first_index = start_pos
    options.number_interval = 1
    options.rotate_numbers = True
    options.color_scheme = nucleotide
    fmt = weblogolib.LogoFormat(data, options)
    with open(png_path, 'w') as handle:
        weblogolib.png_formatter(data, fmt, handle)
Exemple #15
0
def mutect_weblogo_sub(sampN, inFileN, outFileN, pdfFileN):
    inFile = open(inFileN, 'r')
    inFile.readline()  #comment line
    headerL = inFile.readline().rstrip().split('\t')
    idxH = {}
    for i in range(len(headerL)):
        idxH[headerL[i]] = i

    outFile = open(outFileN, 'w')
    for line in inFile:
        colL = line.rstrip().split('\t')
        context = colL[idxH['context']]
        ref = colL[idxH['ref_allele']]
        alt = colL[idxH['alt_allele']]
        status = colL[idxH['judgement']]
        if status == 'REJECT':
            continue

        head = context[:3]
        tail = context[-3:]
        context = head + ref + tail
        if ref not in ['C', 'T']:
            context = mybasic.rc(context)
            ref = mybasic.rc(ref)
            alt = mybasic.rc(alt)

        if ref == 'C' and alt == 'T':  ## TMZ context only
            outFile.write('%s\n' % context)
    outFile.flush()
    outFile.close()

    fin = open(outFileN, 'r')
    seqs = weblogolib.read_seq_data(fin)
    data = weblogolib.LogoData.from_seqs(seqs)
    options = weblogolib.LogoOptions()
    options.show_fineprint = False
    options.first_index = -3
    options.logo_title = sampN
    format = weblogolib.LogoFormat(data, options)
    fout = open(pdfFileN, 'w')
    weblogolib.pdf_formatter(data, format, fout)
Exemple #16
0
def mutect_weblogo_sub(sampN, inFileN, outFileN, pdfFileN):
	inFile = open(inFileN, 'r')
	inFile.readline() #comment line
	headerL = inFile.readline().rstrip().split('\t')
	idxH = {}
	for i in range(len(headerL)):
		idxH[headerL[i]] = i

	outFile = open(outFileN,'w')
	for line in inFile:
		colL = line.rstrip().split('\t')
		context = colL[idxH['context']]
		ref = colL[idxH['ref_allele']]
		alt = colL[idxH['alt_allele']]
		status = colL[idxH['judgement']]
		if status == 'REJECT':
			continue

		head = context[:3]
		tail = context[-3:]
		context = head + ref + tail
		if ref not in ['C','T']:
			context = mybasic.rc(context)
			ref = mybasic.rc(ref)
			alt = mybasic.rc(alt)

		if ref == 'C' and alt == 'T':## TMZ context only
			outFile.write('%s\n' % context)
	outFile.flush()
	outFile.close()
	
	fin = open(outFileN,'r')
	seqs = weblogolib.read_seq_data(fin)
	data = weblogolib.LogoData.from_seqs(seqs)
	options = weblogolib.LogoOptions()
	options.show_fineprint = False
	options.first_index = -3
	options.logo_title = sampN
	format = weblogolib.LogoFormat(data, options)
	fout = open(pdfFileN, 'w')
	weblogolib.pdf_formatter(data, format, fout)
Exemple #17
0
    def do_seqlogo(self, input_fasta, output_seqlogo, col_list):
        """
        create sequence logo of given fasta file and then label it with col_list
        """

        import weblogolib
        import corebio
        from weblogolib.colorscheme import ColorScheme
        from weblogolib.colorscheme import ColorGroup
        #assigning colors
        hydrophobicity = ColorScheme([
            ColorGroup("RKDENQ", "blue"),
            ColorGroup("SGHTAP", "green"),
            ColorGroup("YVMCLFIW",  "black"),
            ColorGroup("O",  "dark orange")
        ])

        #Here i changed this so that the column begins with 1 not 0
        col_list2 = [i+1 for i in col_list]
        fin = open(input_fasta)
        seqs = weblogolib.read_seq_data(fin)
        seqs.alphabet = corebio.seq.Alphabet('ACDEFGHIKLMNPQRSTVWYO')
        data = weblogolib.LogoData.from_seqs(seqs)
        data.composition = 'equiprobable'
        options = weblogolib.LogoOptions()
        options.yaxis_scale = 4.5
        options.yaxis_tic_interval = 2.25
        options.annotate = col_list2
        options.stack_width = 30
        options.stacks_per_line = 35
        options.scale_width = 'No'
        options.color_scheme = hydrophobicity
        format_logo = weblogolib.LogoFormat(data, options)
        fout = open(output_seqlogo, 'w')
        pdf_logo = weblogolib.pdf_formatter(data, format_logo)
        fout.write(pdf_logo)
        fout.close()
Exemple #18
0
def main(htdocs_directory = None) :
 
    logooptions = weblogolib.LogoOptions() 
      
    # A list of form fields.
    # The default for checkbox values must be False (irrespective of
    # the default in logooptions) since a checked checkbox returns 'true'
    # but an unchecked checkbox returns nothing.
    controls = [
        Field( 'sequences', ''),
        Field( 'format', 'png', weblogolib.formatters.get ,
            options=['png_print', 'png', 'jpeg', 'eps', 'pdf', 'svg', 'logodata'] , #TODO: Should copy list from __init__.formatters
            errmsg="Unknown format option."),
        Field( 'stacks_per_line', logooptions.stacks_per_line , int, 
            errmsg='Invalid number of stacks per line.'),
        Field( 'stack_width','medium', weblogolib.std_sizes.get,
            options=['small', 'medium', 'large'], errmsg='Invalid logo size.'),
        Field( 'alphabet','alphabet_auto', alphabets.get,
            options=['alphabet_auto', 'alphabet_protein', 'alphabet_dna', 
                        'alphabet_rna'],
            errmsg="Unknown sequence type."),
        Field( 'unit_name', 'bits', 
            options=[ 'probability', 'bits', 'nats', 'kT', 'kJ/mol', 
                        'kcal/mol']),
        Field( 'first_index', 1, int_or_none),
        Field( 'logo_start', '', int_or_none),
        Field( 'logo_end', '', int_or_none),
        Field( 'composition', 'comp_auto', composition.get,
            options=['comp_none','comp_auto','comp_equiprobable','comp_CG',
            'comp_Celegans','comp_Dmelanogaster','comp_Ecoli',
            'comp_Hsapiens','comp_Mmusculus','comp_Scerevisiae'], 
            errmsg= "Illegal sequence composition."),
        Field( 'percentCG', '', float_or_none, errmsg="Invalid CG percentage."),
        Field( 'show_errorbars', False , truth),
        Field( 'logo_title', logooptions.logo_title ),
        Field( 'logo_label', logooptions.logo_label ),
        Field( 'show_xaxis', False, truth),
        Field( 'xaxis_label', logooptions.xaxis_label ),
        Field( 'show_yaxis', False, truth),  
        Field( 'yaxis_label', logooptions.yaxis_label, string_or_none ),
        Field( 'yaxis_scale', logooptions.yaxis_scale , float_or_none,
            errmsg="The yaxis scale must be a positive number." ),
        Field( 'yaxis_tic_interval', logooptions.yaxis_tic_interval , 
                float_or_none),
        Field( 'show_ends', False, truth), 
        Field( 'show_fineprint', False , truth), 
        Field( 'color_scheme', 'color_auto', color_schemes.get,
            options=color_schemes.keys() ,
            errmsg = 'Unknown color scheme'),
        Field( 'color0', ''),
        Field( 'symbols0', ''),
        Field( 'desc0', ''),
        Field( 'color1', ''),
        Field( 'symbols1', ''),
        Field( 'desc1', ''),
        Field( 'color2', ''),
        Field( 'symbols2', ''),
        Field( 'desc2', ''),
        Field( 'color3', ''),
        Field( 'symbols3', ''),
        Field( 'desc3', ''),
        Field( 'color4', ''),
        Field( 'symbols4', ''),
        Field( 'desc4', ''),
        Field( 'ignore_lower_case', False, truth), 
        Field( 'scale_width', False, truth), 
        ]
    
    form = {}
    for c in controls :
        form[c.name] = c


    form_values = cgilib.FieldStorage()
    
    # Send default form?
    if len(form_values) == 0 or "cmd_reset" in form_values:
        # Load default truth values now.
        form['show_errorbars'].value = logooptions.show_errorbars
        form['show_xaxis'].value = logooptions.show_xaxis
        form['show_yaxis'].value = logooptions.show_yaxis
        form['show_ends'].value = logooptions.show_ends
        form['show_fineprint'].value = logooptions.show_fineprint
        form['scale_width'].value = logooptions.scale_width
        
        send_form(controls, htdocs_directory = htdocs_directory) 
        return
    
    # Get form content
    for c in controls :
        c.value = form_values.getfirst( c.name, c.default) 
       
       
    options_from_form = ['format', 'stacks_per_line', 'stack_width', 
        'alphabet', 'unit_name', 'first_index', 'logo_start','logo_end',
         'composition', 
        'show_errorbars', 'logo_title', 'logo_label', 'show_xaxis', 
        'xaxis_label',
        'show_yaxis', 'yaxis_label', 'yaxis_scale', 'yaxis_tic_interval',
        'show_ends', 'show_fineprint', 'scale_width']
    
    
    errors = []
    for optname in options_from_form :
        try :
            value =  form[optname].get_value()
            if value!=None : setattr(logooptions, optname, value)
        except ValueError as err :
            errors.append(err.args)            

    
    # Construct custom color scheme
    custom = ColorScheme()
    for i in range(0,5) :
        color = form["color%d"%i].get_value()
        symbols = form["symbols%d"%i].get_value()
        desc = form["desc%d"%i].get_value() 

        if color :
            try :
                custom.groups.append(weblogolib.ColorGroup(symbols, color, desc))
            except ValueError as e:
                errors.append( ('color%d'%i, "Invalid color: %s" % color) )
    
    if form["color_scheme"].value == 'color_custom' :
        logooptions.color_scheme =  custom
    else :
        try :
            logooptions.color_scheme = form["color_scheme"].get_value()
        except ValueError as err:
            errors.append(err.args)            

    sequences = None

    # FIXME: Ugly fix: Must check that sequence_file key exists
    # FIXME: Sending malformed or missing form keys should not cause a crash
    # sequences_file = form["sequences_file"]
    if "sequences_file" in form_values:
        sequences = form_values.getvalue("sequences_file") 
        #assert type(sequences) == str

    if not sequences or len(sequences)  ==0:
        sequences = form["sequences"].get_value()
    
    if not sequences or len(sequences)  ==0:
        errors.append( ("sequences", "Please enter a multiple-sequence alignment in the box above, or select a file to upload."))
  


    # If we have uncovered errors or we want the chance to edit the logo 
    # ("cmd_edit" command from examples page) then we return the form now.
    # We do not proceed to the time consuming logo creation step unless
    # required by a 'create' or 'validate' command, and no errors have been
    # found yet.
    if errors or "cmd_edit" in form_values:
        send_form(controls, errors, htdocs_directory)
        return    
 
        
    try :
        comp = form["composition"].get_value()
        percentCG = form["percentCG"].get_value()
        ignore_lower_case = ("ignore_lower_case" in form_values)
        if comp == 'percentCG':
            comp = str(percentCG / 100)

        from corebio.matrix import Motif
         
        try:
            # Try reading data in transfac format first. 
            # TODO Refactor this code 
            motif = Motif.read_transfac(StringIO( sequences), alphabet=logooptions.alphabet)
            prior = weblogolib.parse_prior( comp,motif.alphabet)  
            data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior)          
        except ValueError as motif_err:
            seqs = weblogolib.read_seq_data(StringIO( sequences), 
                                        alphabet=logooptions.alphabet,
                                        ignore_lower_case=ignore_lower_case
                                        )
            prior = weblogolib.parse_prior(comp, seqs.alphabet)
            data = weblogolib.LogoData.from_seqs(seqs, prior) 
            
        logoformat =  weblogolib.LogoFormat(data, logooptions)
        format = form["format"].value
        logo = weblogolib.formatters[format](data, logoformat)            
    except ValueError as err:
        errors.append(err.args)
    except IOError as err:
        errors.append(err.args)
    except RuntimeError as err:
        errors.append(err.args)

    if errors or "cmd_validate" in form_values:
        send_form(controls, errors, htdocs_directory)
        return

    #
    #  RETURN LOGO OVER HTTP
    #

    print("Content-Type:", mime_type[format])
    # Content-Disposition: inline       Open logo in browser window
    # Content-Disposition: attachment   Download logo
    if "download" in form_values:
        print('Content-Disposition: attachment; ' \
              'filename="logo.%s"' % extension[format])
    else:
        print('Content-Disposition: inline; ' \
              'filename="logo.%s"' % extension[format])
    # Separate header from data
    print()
    # Finally, and at last, send the logo.

    if sys.version_info[0] >= 3:
        sys.stdout.buffer.write(logo)
    else: 
        sys.stdout.write(logo)
Exemple #19
0
def main(htdocs_directory=None):

    logooptions = weblogolib.LogoOptions()

    # A list of form fields.
    # The default for checkbox values must be False (irrespective of
    # the default in logooptions) since a checked checkbox returns 'true'
    # but an unchecked checkbox returns nothing.
    controls = [
        Field("sequences", ""),
        Field(
            "format",
            "png",
            weblogolib.formatters.get,
            options=[
                "png_print",
                "png",
                "jpeg",
                "eps",
                "pdf",
                "svg",
                "logodata",
            ],  # TODO: Should copy list from __init__.formatters
            errmsg="Unknown format option.",
        ),
        Field("stacks_per_line", logooptions.stacks_per_line, int, errmsg="Invalid number of stacks per line."),
        Field(
            "stack_width",
            "medium",
            weblogolib.std_sizes.get,
            options=["small", "medium", "large"],
            errmsg="Invalid logo size.",
        ),
        Field(
            "alphabet",
            "alphabet_auto",
            alphabets.get,
            options=["alphabet_auto", "alphabet_protein", "alphabet_dna", "alphabet_rna"],
            errmsg="Unknown sequence type.",
        ),
        Field("unit_name", "bits", options=["probability", "bits", "nats", "kT", "kJ/mol", "kcal/mol"]),
        Field("first_index", 1, int_or_none),
        Field("logo_start", "", int_or_none),
        Field("logo_end", "", int_or_none),
        Field(
            "composition",
            "comp_auto",
            composition.get,
            options=[
                "comp_none",
                "comp_auto",
                "comp_equiprobable",
                "comp_CG",
                "comp_Celegans",
                "comp_Dmelanogaster",
                "comp_Ecoli",
                "comp_Hsapiens",
                "comp_Mmusculus",
                "comp_Scerevisiae",
            ],
            errmsg="Illegal sequence composition.",
        ),
        Field("percentCG", "", float_or_none, errmsg="Invalid CG percentage."),
        Field("show_errorbars", False, truth),
        Field("logo_title", logooptions.logo_title),
        Field("logo_label", logooptions.logo_label),
        Field("show_xaxis", False, truth),
        Field("xaxis_label", logooptions.xaxis_label),
        Field("show_yaxis", False, truth),
        Field("yaxis_label", logooptions.yaxis_label, string_or_none),
        Field(
            "yaxis_scale", logooptions.yaxis_scale, float_or_none, errmsg="The yaxis scale must be a positive number."
        ),
        Field("yaxis_tic_interval", logooptions.yaxis_tic_interval, float_or_none),
        Field("show_ends", False, truth),
        Field("show_fineprint", False, truth),
        Field(
            "color_scheme", "color_auto", color_schemes.get, options=color_schemes.keys(), errmsg="Unknown color scheme"
        ),
        Field("color0", ""),
        Field("symbols0", ""),
        Field("desc0", ""),
        Field("color1", ""),
        Field("symbols1", ""),
        Field("desc1", ""),
        Field("color2", ""),
        Field("symbols2", ""),
        Field("desc2", ""),
        Field("color3", ""),
        Field("symbols3", ""),
        Field("desc3", ""),
        Field("color4", ""),
        Field("symbols4", ""),
        Field("desc4", ""),
        Field("ignore_lower_case", False, truth),
        Field("scale_width", False, truth),
    ]

    form = {}
    for c in controls:
        form[c.name] = c

    form_values = cgilib.FieldStorage()

    # Send default form?
    if len(form_values) == 0 or "cmd_reset" in form_values:
        # Load default truth values now.
        form["show_errorbars"].value = logooptions.show_errorbars
        form["show_xaxis"].value = logooptions.show_xaxis
        form["show_yaxis"].value = logooptions.show_yaxis
        form["show_ends"].value = logooptions.show_ends
        form["show_fineprint"].value = logooptions.show_fineprint
        form["scale_width"].value = logooptions.scale_width

        send_form(controls, htdocs_directory=htdocs_directory)
        return

    # Get form content
    for c in controls:
        c.value = form_values.getfirst(c.name, c.default)

    options_from_form = [
        "format",
        "stacks_per_line",
        "stack_width",
        "alphabet",
        "unit_name",
        "first_index",
        "logo_start",
        "logo_end",
        "composition",
        "show_errorbars",
        "logo_title",
        "logo_label",
        "show_xaxis",
        "xaxis_label",
        "show_yaxis",
        "yaxis_label",
        "yaxis_scale",
        "yaxis_tic_interval",
        "show_ends",
        "show_fineprint",
        "scale_width",
    ]

    errors = []
    for optname in options_from_form:
        try:
            value = form[optname].get_value()
            if value != None:
                setattr(logooptions, optname, value)
        except ValueError as err:
            errors.append(err.args)

    # Construct custom color scheme
    custom = ColorScheme()
    for i in range(0, 5):
        color = form["color%d" % i].get_value()
        symbols = form["symbols%d" % i].get_value()
        desc = form["desc%d" % i].get_value()

        if color:
            try:
                custom.groups.append(weblogolib.ColorGroup(symbols, color, desc))
            except ValueError as e:
                errors.append(("color%d" % i, "Invalid color: %s" % color))

    if form["color_scheme"].value == "color_custom":
        logooptions.color_scheme = custom
    else:
        try:
            logooptions.color_scheme = form["color_scheme"].get_value()
        except ValueError as err:
            errors.append(err.args)

    sequences = None

    # FIXME: Ugly fix: Must check that sequence_file key exists
    # FIXME: Sending malformed or missing form keys should not cause a crash
    # sequences_file = form["sequences_file"]
    if "sequences_file" in form_values:
        sequences = form_values.getvalue("sequences_file")
        # assert type(sequences) == str

    if not sequences or len(sequences) == 0:
        sequences = form["sequences"].get_value()
        # If a user tries to paste a very large file into sequence textarea,
        # then WebLogo runs very slow for no apparently good reason. (Might be client side bug?)
        # So we limit the maximum sequence size.
        # Form field also limits size, but not necessarly respected. Also can truncate data
        # without warning, so we'll set textarea maximum to be larger than MAX_SEQUENCE_SIZE
        SEQUENCES_MAXLENGTH = 100000
        if len(sequences) > SEQUENCES_MAXLENGTH:
            errors.append(("sequences", "Sequence data too large for text input. Use file upload instead."))
            controls[0] = Field("sequences", "")

    if not sequences or len(sequences) == 0:
        errors.append(
            ("sequences", "Please enter a multiple-sequence alignment in the box above, or select a file to upload.")
        )

    # If we have uncovered errors or we want the chance to edit the logo
    # ("cmd_edit" command from examples page) then we return the form now.
    # We do not proceed to the time consuming logo creation step unless
    # required by a 'create' or 'validate' command, and no errors have been
    # found yet.
    if errors or "cmd_edit" in form_values:
        send_form(controls, errors, htdocs_directory)
        return

    try:
        comp = form["composition"].get_value()
        percentCG = form["percentCG"].get_value()
        ignore_lower_case = "ignore_lower_case" in form_values
        if comp == "percentCG":
            comp = str(percentCG / 100)

        from corebio.matrix import Motif

        try:
            # Try reading data in transfac format first.
            # TODO Refactor this code
            motif = Motif.read_transfac(StringIO(sequences), alphabet=logooptions.alphabet)
            prior = weblogolib.parse_prior(comp, motif.alphabet)
            data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior)
        except ValueError as motif_err:
            seqs = weblogolib.read_seq_data(
                StringIO(sequences), alphabet=logooptions.alphabet, ignore_lower_case=ignore_lower_case
            )
            prior = weblogolib.parse_prior(comp, seqs.alphabet)
            data = weblogolib.LogoData.from_seqs(seqs, prior)

        logoformat = weblogolib.LogoFormat(data, logooptions)
        format = form["format"].value
        logo = weblogolib.formatters[format](data, logoformat)
    except ValueError as err:
        errors.append(err.args)
    except IOError as err:
        errors.append(err.args)
    except RuntimeError as err:
        errors.append(err.args)

    if errors or "cmd_validate" in form_values:
        send_form(controls, errors, htdocs_directory)
        return

    #
    #  RETURN LOGO OVER HTTP
    #

    print("Content-Type:", mime_type[format])
    # Content-Disposition: inline       Open logo in browser window
    # Content-Disposition: attachment   Download logo
    if "download" in form_values:
        print("Content-Disposition: attachment; " 'filename="logo.%s"' % extension[format])
    else:
        print("Content-Disposition: inline; " 'filename="logo.%s"' % extension[format])
    # Separate header from data
    print()
    # Finally, and at last, send the logo.

    if sys.version_info[0] >= 3:
        sys.stdout.buffer.write(logo)
    else:
        sys.stdout.write(logo)
Exemple #20
0
     comp = form["composition"].get_value()
     percentCG = form["percentCG"].get_value()
     ignore_lower_case = form_values.has_key("ignore_lower_case") 
     if comp=='percentCG': comp = str(percentCG/100)     
     
     from corebio.matrix import Motif
      
     try:
         # Try reading data in transfac format first. 
         # TODO Refactor this code 
         motif = Motif.read_transfac(StringIO( sequences), alphabet=logooptions.alphabet)
         prior = weblogolib.parse_prior( comp,motif.alphabet)  
         data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior)          
     except ValueError, motif_err :
         seqs = weblogolib.read_seq_data(StringIO( sequences), 
                                     alphabet=logooptions.alphabet,
                                     ignore_lower_case=ignore_lower_case
                                     )
         prior = weblogolib.parse_prior(comp, seqs.alphabet)                        
         data = weblogolib.LogoData.from_seqs(seqs, prior) 
         
     logoformat =  weblogolib.LogoFormat(data, logooptions)
     format = form["format"].value
     weblogolib.formatters[format](data, logoformat, logo)            
 except ValueError, err :
     errors.append( err.args )
 except IOError, err :
     errors.append( err.args)
 except RuntimeError, err :
     errors.append( err.args )
           
 if form_values.has_key("cmd_validate") or errors :
Exemple #21
0
        if comp == 'percentCG':
            comp = str(percentCG / 100)

        from corebio.matrix import Motif

        try:
            # Try reading data in transfac format first.
            # TODO Refactor this code
            motif = Motif.read_transfac(seq_file,
                                        alphabet=logooptions.alphabet)
            prior = weblogolib.parse_prior(comp, motif.alphabet)
            data = weblogolib.LogoData.from_counts(motif.alphabet, motif,
                                                   prior)
        except ValueError as motif_err:
            seqs = weblogolib.read_seq_data(
                seq_file,
                alphabet=logooptions.alphabet,
                ignore_lower_case=ignore_lower_case)
            prior = weblogolib.parse_prior(comp, seqs.alphabet)
            data = weblogolib.LogoData.from_seqs(seqs, prior)

        logoformat = weblogolib.LogoFormat(data, logooptions)
        format = form["format"].value
        logo = weblogolib.formatters[format](data, logoformat)
    except ValueError as err:
        errors.append(err.args)
    except IOError as err:
        errors.append(err.args)
    except RuntimeError as err:
        errors.append(err.args)

    if errors or "cmd_validate" in form_values:
Exemple #22
0
        ignore_lower_case = form_values.has_key("ignore_lower_case")
        if comp == 'percentCG': comp = str(percentCG / 100)

        from corebio.matrix import Motif

        try:
            # Try reading data in transfac format first.
            # TODO Refactor this code
            motif = Motif.read_transfac(StringIO(sequences),
                                        alphabet=logooptions.alphabet)
            prior = weblogolib.parse_prior(comp, motif.alphabet)
            data = weblogolib.LogoData.from_counts(motif.alphabet, motif,
                                                   prior)
        except ValueError, motif_err:
            seqs = weblogolib.read_seq_data(
                StringIO(sequences),
                alphabet=logooptions.alphabet,
                ignore_lower_case=ignore_lower_case)
            prior = weblogolib.parse_prior(comp, seqs.alphabet)
            data = weblogolib.LogoData.from_seqs(seqs, prior)

        logoformat = weblogolib.LogoFormat(data, logooptions)
        format = form["format"].value
        weblogolib.formatters[format](data, logoformat, logo)
    except ValueError, err:
        errors.append(err.args)
    except IOError, err:
        errors.append(err.args)
    except RuntimeError, err:
        errors.append(err.args)

    if form_values.has_key("cmd_validate") or errors:
Exemple #23
0
def main(htdocs_directory=None):

    logooptions = weblogolib.LogoOptions()

    # A list of form fields.
    # The default for checkbox values must be False (irrespective of
    # the default in logooptions) since a checked checkbox returns 'true'
    # but an unchecked checkbox returns nothing.
    controls = [
        Field('sequences', ''),
        Field(
            'format',
            'png',
            weblogolib.formatters.get,
            options=[
                'png_print', 'png', 'jpeg', 'eps', 'pdf', 'svg', 'logodata'
            ],  #TODO: Should copy list from __init__.formatters
            errmsg="Unknown format option."),
        Field('stacks_per_line',
              logooptions.stacks_per_line,
              int,
              errmsg='Invalid number of stacks per line.'),
        Field('stack_width',
              'medium',
              weblogolib.std_sizes.get,
              options=['small', 'medium', 'large'],
              errmsg='Invalid logo size.'),
        Field('alphabet',
              'alphabet_auto',
              alphabets.get,
              options=[
                  'alphabet_auto', 'alphabet_protein', 'alphabet_dna',
                  'alphabet_rna'
              ],
              errmsg="Unknown sequence type."),
        Field('unit_name',
              'bits',
              options=[
                  'probability', 'bits', 'nats', 'kT', 'kJ/mol', 'kcal/mol'
              ]),
        Field('first_index', 1, int_or_none),
        Field('logo_start', '', int_or_none),
        Field('logo_end', '', int_or_none),
        Field('composition',
              'comp_auto',
              composition.get,
              options=[
                  'comp_none', 'comp_auto', 'comp_equiprobable', 'comp_CG',
                  'comp_Celegans', 'comp_Dmelanogaster', 'comp_Ecoli',
                  'comp_Hsapiens', 'comp_Mmusculus', 'comp_Scerevisiae'
              ],
              errmsg="Illegal sequence composition."),
        Field('percentCG', '', float_or_none, errmsg="Invalid CG percentage."),
        Field('show_errorbars', False, truth),
        Field('logo_title', logooptions.logo_title),
        Field('logo_label', logooptions.logo_label),
        Field('show_xaxis', False, truth),
        Field('xaxis_label', logooptions.xaxis_label),
        Field('show_yaxis', False, truth),
        Field('yaxis_label', logooptions.yaxis_label, string_or_none),
        Field('yaxis_scale',
              logooptions.yaxis_scale,
              float_or_none,
              errmsg="The yaxis scale must be a positive number."),
        Field('yaxis_tic_interval', logooptions.yaxis_tic_interval,
              float_or_none),
        Field('show_ends', False, truth),
        Field('show_fineprint', False, truth),
        Field('color_scheme',
              'color_auto',
              color_schemes.get,
              options=color_schemes.keys(),
              errmsg='Unknown color scheme'),
        Field('color0', ''),
        Field('symbols0', ''),
        Field('desc0', ''),
        Field('color1', ''),
        Field('symbols1', ''),
        Field('desc1', ''),
        Field('color2', ''),
        Field('symbols2', ''),
        Field('desc2', ''),
        Field('color3', ''),
        Field('symbols3', ''),
        Field('desc3', ''),
        Field('color4', ''),
        Field('symbols4', ''),
        Field('desc4', ''),
        Field('ignore_lower_case', False, truth),
        Field('scale_width', False, truth),
    ]

    form = {}
    for c in controls:
        form[c.name] = c

    form_values = cgilib.FieldStorage()

    # Send default form?
    if len(form_values) == 0 or "cmd_reset" in form_values:
        # Load default truth values now.
        form['show_errorbars'].value = logooptions.show_errorbars
        form['show_xaxis'].value = logooptions.show_xaxis
        form['show_yaxis'].value = logooptions.show_yaxis
        form['show_ends'].value = logooptions.show_ends
        form['show_fineprint'].value = logooptions.show_fineprint
        form['scale_width'].value = logooptions.scale_width

        send_form(controls, htdocs_directory=htdocs_directory)
        return

    # Get form content
    for c in controls:
        c.value = form_values.getfirst(c.name, c.default)

    options_from_form = [
        'format', 'stacks_per_line', 'stack_width', 'alphabet', 'unit_name',
        'first_index', 'logo_start', 'logo_end', 'composition',
        'show_errorbars', 'logo_title', 'logo_label', 'show_xaxis',
        'xaxis_label', 'show_yaxis', 'yaxis_label', 'yaxis_scale',
        'yaxis_tic_interval', 'show_ends', 'show_fineprint', 'scale_width'
    ]

    errors = []
    for optname in options_from_form:
        try:
            value = form[optname].get_value()
            if value != None: setattr(logooptions, optname, value)
        except ValueError as err:
            errors.append(err.args)

    # Construct custom color scheme
    custom = ColorScheme()
    for i in range(0, 5):
        color = form["color%d" % i].get_value()
        symbols = form["symbols%d" % i].get_value()
        desc = form["desc%d" % i].get_value()

        if color:
            try:
                custom.groups.append(
                    weblogolib.ColorGroup(symbols, color, desc))
            except ValueError as e:
                errors.append(('color%d' % i, "Invalid color: %s" % color))

    if form["color_scheme"].value == 'color_custom':
        logooptions.color_scheme = custom
    else:
        try:
            logooptions.color_scheme = form["color_scheme"].get_value()
        except ValueError as err:
            errors.append(err.args)

    sequences = None

    # FIXME: Ugly fix: Must check that sequence_file key exists
    # FIXME: Sending malformed or missing form keys should not cause a crash
    # sequences_file = form["sequences_file"]
    if "sequences_file" in form_values:
        sequences = form_values.getvalue("sequences_file")
        #assert type(sequences) == str

    if not sequences or len(sequences) == 0:
        sequences = form["sequences"].get_value()

    if not sequences or len(sequences) == 0:
        errors.append((
            "sequences",
            "Please enter a multiple-sequence alignment in the box above, or select a file to upload."
        ))

    # If we have uncovered errors or we want the chance to edit the logo
    # ("cmd_edit" command from examples page) then we return the form now.
    # We do not proceed to the time consuming logo creation step unless
    # required by a 'create' or 'validate' command, and no errors have been
    # found yet.
    if errors or "cmd_edit" in form_values:
        send_form(controls, errors, htdocs_directory)
        return

    try:
        comp = form["composition"].get_value()
        percentCG = form["percentCG"].get_value()
        ignore_lower_case = ("ignore_lower_case" in form_values)
        if comp == 'percentCG':
            comp = str(percentCG / 100)

        from corebio.matrix import Motif

        try:
            # Try reading data in transfac format first.
            # TODO Refactor this code
            motif = Motif.read_transfac(StringIO(sequences),
                                        alphabet=logooptions.alphabet)
            prior = weblogolib.parse_prior(comp, motif.alphabet)
            data = weblogolib.LogoData.from_counts(motif.alphabet, motif,
                                                   prior)
        except ValueError as motif_err:
            seqs = weblogolib.read_seq_data(
                StringIO(sequences),
                alphabet=logooptions.alphabet,
                ignore_lower_case=ignore_lower_case)
            prior = weblogolib.parse_prior(comp, seqs.alphabet)
            data = weblogolib.LogoData.from_seqs(seqs, prior)

        logoformat = weblogolib.LogoFormat(data, logooptions)
        format = form["format"].value
        logo = weblogolib.formatters[format](data, logoformat)
    except ValueError as err:
        errors.append(err.args)
    except IOError as err:
        errors.append(err.args)
    except RuntimeError as err:
        errors.append(err.args)

    if errors or "cmd_validate" in form_values:
        send_form(controls, errors, htdocs_directory)
        return

    #
    #  RETURN LOGO OVER HTTP
    #

    print("Content-Type:", mime_type[format])
    # Content-Disposition: inline       Open logo in browser window
    # Content-Disposition: attachment   Download logo
    if "download" in form_values:
        print('Content-Disposition: attachment; ' \
              'filename="logo.%s"' % extension[format])
    else:
        print('Content-Disposition: inline; ' \
              'filename="logo.%s"' % extension[format])
    # Separate header from data
    print()
    # Finally, and at last, send the logo.

    if sys.version_info[0] >= 3:
        sys.stdout.buffer.write(logo)
    else:
        sys.stdout.write(logo)
    args = parser.parse_args()

    tree_yielder = dendropy.Tree.yield_from_files(
        files=[args.trees_path],
        schema="newick",
        preserve_underscores=True
    )

    naive_seqs = [tree.find_node_with_taxon_label("naive").annotations.get_value("ancestral") for tree in tree_yielder]
    aa_naive_seqs = [translate(seq) for seq in naive_seqs]
    aa_naive_seqs_d = {("naive" + str(i)): seq for i, seq in enumerate(aa_naive_seqs)}
    write_to_fasta(aa_naive_seqs_d, args.output_base + ".fasta")

    with open(args.output_base + ".fasta", "rU") as f:
        seqs = w.read_seq_data(f)
    data = w.LogoData.from_seqs(seqs)
    subprocess.check_call(("rm " + args.output_base + ".fasta").split())

    options = w.LogoOptions()
    options.unit_name = "probability"
    options.yaxis_label = "Probability"
    options.xaxis_label = "Site Position"
    options.show_fineprint = False
    options.stacks_per_line = 500
    options.tic_length = 10

    format = w.LogoFormat(data, options)
    with open(args.output_base + ".png", 'w') as f:
        f.write(w.png_print_formatter(data, format))