def test_parse_prior_equiprobable(self) : self.assertTrue( all(20.*equiprobable_distribution(20) == parse_prior( 'equiprobable', unambiguous_protein_alphabet, weight=20.) ) ) self.assertTrue( all( 1.2* equiprobable_distribution(3) == parse_prior( ' equiprobablE ', Alphabet('123'), 1.2 ) ) )
def test_parse_prior_none(self): self.assertEqual(None, parse_prior(None, unambiguous_protein_alphabet)) self.assertEqual(None, parse_prior('none', unambiguous_protein_alphabet)) self.assertEqual(None, parse_prior('noNe', None))
def test_parse_prior_none(self) : self.assertEquals( None, parse_prior(None, unambiguous_protein_alphabet ) ) self.assertEquals( None, parse_prior( 'none', unambiguous_protein_alphabet ) ) self.assertEquals( None, parse_prior( 'noNe', None) )
def test_weight(self): self.assertTrue( all(2. * equiprobable_distribution(4) == parse_prior( 'automatic', unambiguous_dna_alphabet))) self.assertTrue( all(123.123 * equiprobable_distribution(4) == parse_prior( 'auto', unambiguous_dna_alphabet, 123.123)))
def test_auto(self): self.assertTrue( all(4. * equiprobable_distribution(4) == parse_prior( 'auto', unambiguous_dna_alphabet))) self.assertTrue( all(4. * equiprobable_distribution(4) == parse_prior( 'automatic', unambiguous_dna_alphabet)))
def test_parse_prior_equiprobable(self): self.assertTrue(all(20. * equiprobable_distribution(20) == parse_prior('equiprobable', unambiguous_protein_alphabet, weight=20.))) self.assertTrue( all(1.2 * equiprobable_distribution(3) == parse_prior(' equiprobablE ', Alphabet('123'), 1.2)))
def test_parse_prior_float(self): self.assertTrue(all(equiprobable_distribution(4) == parse_prior("0.5", unambiguous_dna_alphabet, 1.0))) self.assertTrue(all(equiprobable_distribution(4) == parse_prior(" 0.500 ", unambiguous_dna_alphabet, 1.0))) self.assertTrue( all(array((0.3, 0.2, 0.2, 0.3), float64) == parse_prior(" 0.40 ", unambiguous_dna_alphabet, 1.0)) )
def test_parse_prior_float(self): self.assertTrue(all(equiprobable_distribution(4) == parse_prior('0.5', unambiguous_dna_alphabet, 1.))) self.assertTrue(all(equiprobable_distribution(4) == parse_prior(' 0.500 ', unambiguous_dna_alphabet, 1.))) self.assertTrue(all(array((0.3, 0.2, 0.2, 0.3), float64) == parse_prior(' 0.40 ', unambiguous_dna_alphabet, 1.)))
def test_parse_prior_equiprobable(self): self.assertTrue( all( 20.0 * equiprobable_distribution(20) == parse_prior("equiprobable", unambiguous_protein_alphabet, weight=20.0) ) ) self.assertTrue(all(1.2 * equiprobable_distribution(3) == parse_prior(" equiprobablE ", Alphabet("123"), 1.2)))
def test_parse_prior_percentage(self): # print(parse_prior('50%', unambiguous_dna_alphabet, 1.)) self.assertTrue(all(equiprobable_distribution(4) == parse_prior("50%", unambiguous_dna_alphabet, 1.0))) self.assertTrue(all(equiprobable_distribution(4) == parse_prior(" 50.0 % ", unambiguous_dna_alphabet, 1.0))) self.assertTrue( all(array((0.3, 0.2, 0.2, 0.3), float64) == parse_prior(" 40.0 % ", unambiguous_dna_alphabet, 1.0)) )
def test_parse_prior_percentage(self): # print(parse_prior('50%', unambiguous_dna_alphabet, 1.)) self.assertTrue(all(equiprobable_distribution(4) == parse_prior('50%', unambiguous_dna_alphabet, 1.))) self.assertTrue(all(equiprobable_distribution(4) == parse_prior(' 50.0 % ', unambiguous_dna_alphabet, 1.))) self.assertTrue(all(array((0.3, 0.2, 0.2, 0.3), float64) == parse_prior(' 40.0 % ', unambiguous_dna_alphabet, 1.)))
def _build_logodata(options): if options.input_parser != "transfac": seqs = read_seq_data(options.fin, options.input_parser.read, alphabet=options.alphabet, ignore_lower_case=options.ignore_lower_case) if options.reverse: seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet) if options.complement: seqs = SeqList([Seq(s, seqs.alphabet).complement() for s in seqs], seqs.alphabet) prior = parse_prior(options.composition, seqs.alphabet, options.weight) data = LogoData.from_seqs(seqs, prior) else: from corebio.matrix import Motif if options.ignore_lower_case: raise ValueError( "error: option --ignore-lower-case incompatible with matrix input" ) #FIXME : implement if options.reverse: raise ValueError( "error: option --reverse incompatible with matrix input") #FIXME : implement if options.complement: raise ValueError( "error: option --complement incompatible with matrix input") motif = Motif.read_transfac(options.fin, alphabet=options.alphabet) prior = parse_prior(options.composition, motif.alphabet, options.weight) data = LogoData.from_counts(motif.alphabet, motif, prior) return data
motif = Motif.read_transfac(fin, alphabet=options.alphabet) motif_flag = True except ValueError, motif_err : # Failed reading Motif, try reading as multiple sequence data. seqs = read_seq_data(fin, options.input_parser.read, alphabet=options.alphabet, ignore_lower_case = options.ignore_lower_case) if motif_flag : if options.ignore_lower_case: raise ValueError("error: option --ignore-lower-case incompatible with matrix input") if options.reverse: motif.reverse() if options.complement: motif.complement() prior = parse_prior( options.composition,motif.alphabet, options.weight) data = LogoData.from_counts(motif.alphabet, motif, prior) else : if options.reverse: seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet) if options.complement : seqs= SeqList( [Seq(s,seqs.alphabet).complement() for s in seqs], seqs.alphabet) prior = parse_prior( options.composition,seqs.alphabet, options.weight) data = LogoData.from_seqs(seqs, prior) return data
# we can't send any useful feedback logo = StringIO() try: comp = form["composition"].get_value() percentCG = form["percentCG"].get_value() ignore_lower_case = form_values.has_key("ignore_lower_case") if comp == 'percentCG': comp = str(percentCG / 100) from corebio.matrix import Motif try: # Try reading data in transfac format first. # TODO Refactor this code motif = Motif.read_transfac(StringIO(sequences), alphabet=logooptions.alphabet) prior = weblogolib.parse_prior(comp, motif.alphabet) data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior) except ValueError, motif_err: seqs = weblogolib.read_seq_data( StringIO(sequences), alphabet=logooptions.alphabet, ignore_lower_case=ignore_lower_case) prior = weblogolib.parse_prior(comp, seqs.alphabet) data = weblogolib.LogoData.from_seqs(seqs, prior) logoformat = weblogolib.LogoFormat(data, logooptions) format = form["format"].value weblogolib.formatters[format](data, logoformat, logo) except ValueError, err: errors.append(err.args)
isCodon = True options.alphabet = None seqs = read_seq_data( fin, options.input_parser.read, alphabet=options.alphabet, ignore_lower_case=options.ignore_lower_case ) if motif_flag: if options.ignore_lower_case: raise ValueError("option --ignore-lower-case incompatible with matrix input") if options.reverse: motif.reverse() if options.complement: motif.complement() if not isCodon: prior, compos = parse_prior(fin_compos, motif.alphabet, fin_weight) data = LogoData.from_counts(motif.alphabet, motif, options.stats_func, prior, compos, second_data) else: raise ValueError("option --sequence-type 'codon' incompatible with matrix input") else: if options.codon_frame < 0 and isCodon: options.reverse = True options.complement = True if options.reverse: seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet) if options.complement: seqs = SeqList([Seq(s, seqs.alphabet).complement() for s in seqs], seqs.alphabet) if isCodon:
def test_explicit(self): s = "{'A':10, 'C':40, 'G':40, 'T':10}" p = array((10, 40, 40, 10), float64) * 2. / 100. self.assertTrue(all(p == parse_prior(s, unambiguous_dna_alphabet)))
def LogoPlot(sites, datatype, data, plotfile, nperline, numberevery=10, allowunsorted=False, ydatamax=1.01, overlay=None): """Constructs a sequence logo showing amino-acid or nucleotide preferences. The heights of each letter is equal to the preference of that site for that amino acid or nucleotide. Note that stop codons may or may not be included in the logo depending on whether they are present in *pi_d*. CALLING VARIABLES: * *sites* is a list of all of the sites that are being included in the logo, as strings or numbers. They must be in natural sort order (as is done by *dms_tools.utils.NaturalSort*) or an error will be raised **unless** *allowunsorted* is *True*. The sites in the plot are ordered in the same arrangement listed in *sites*. These should be **strings**, not integers. * *datatype* should be one of the two following strings depending on whether we are making a plot of preferences or differential preferences: 'prefs' or 'diffprefs' * *data* is a dictionary that has a key for every entry in *sites*. For every site *r* in *sites*, *sites[r][x]* is the preference or differential preference for character *x*. Preferences must sum to one; differential preferences to zero. All sites must have the same set of characters. The characters must be the set of nucleotides (*dms_tools.nts*) or the set of amino acids with or without stop codons (*dms_tools.aminoacids_nostop* or *dms_tools.aminoacids_withstop*). * *plotfile* is a string giving the name of the created PDF file of the logo plot. It must end in the extension ``.pdf``. * *nperline* is the number of sites per line. Often 40 to 80 are good values. * *numberevery* is specifies how frequently we put labels for the sites on x-axis. * *allowunsorted* : if *True* then we allow the entries in *sites* to **not** be sorted. This means that the logo plot will **not** have sites in sorted order. * *ydatamax* : meaningful only if *datatype* is 'diffprefs'. In this case, it gives the maximum that the logo stacks extend in the positive and negative directions. Cannot be smaller than the maximum extent of the differential preferences. * *overlay* : this argument allows you to make overlay bars that indicated other properties for the sites. By default, this option is *None*, meaning that no overlay is created. If you set it to something else, it must be a list giving either one or two properties. Each property is a tuple: *(prop_d, shortname, longname)* where: - *prop_d* is a dictionary keyed by site numbers that are in *sites*. For each *r* in *sites*, *prop_d[r]* gives the value of the property, or if there is no entry in *prop_d* for *r*, then the property is undefined and is colored white. Properties can either be: * continuous: in this case, all of the values should be numbers. * discrete : in this case, all of the values should be strings. While in practice, if you have more than a few discrete categories (different strings), the plot will be a mess. - *shortname* : short name for the property; will not format well if more than 4 or 5 characters. - *longname* : longer name for property used on axes label. Can be the same as *shortname* if you don't need a different long name. """ assert datatype in ['prefs', 'diffprefs'] # check data, and get characters assert sites, "No sites specified" assert set(sites) == set(data.keys()), "There is not a complete match between sites and the keys of data" characters = data[sites[0]].keys() if set(characters) == set(dms_tools.nts): alphabet_type = 'nt' elif set(characters) == set(dms_tools.aminoacids_nostop) or set(characters) == set(dms_tools.aminoacids_withstop): alphabet_type = 'aa' else: raise ValueError("Invalid set of character keys in data. Do not specify either nucleotides or amino acids:\n%s" % str(characters)) for r in sites: if set(data[r].keys()) != set(characters): raise ValueError("Not all sites in data have the same set of characters") firstblankchar = 'B' # character for first blank space for diffprefs assert firstblankchar not in characters, "firstblankchar in characters" lastblankchar = 'b' # character for last blank space for diffprefs assert lastblankchar not in characters, "lastblankchar in characters" separatorchar = '-' # separates positive and negative for diffprefs assert lastblankchar not in characters, "lastblankchar in characters" separatorheight = 0.02 # height of separator as fraction of total for diffprefs if os.path.splitext(plotfile)[1].lower() != '.pdf': raise ValueError("plotfile must end in .pdf: %s" % plotfile) if os.path.isfile(plotfile): os.remove(plotfile) # remove existing plot if not allowunsorted: sorted_sites = [r for r in sites] dms_tools.utils.NaturalSort(sorted_sites) if sorted_sites != sites: raise ValueError("sites is not properly sorted") # Following are specifications of weblogo sizing taken from its documentation stackwidth = 9.5 # stack width in points, not default size of 10.8, but set to this in weblogo call below barheight = 5.5 # height of bars in points if using overlay barspacing = 2.0 # spacing between bars in points if using overlay stackaspectratio = 4.4 # ratio of stack height:width, doesn't count part going over maximum value of 1 if overlay: if not (1 <= len(overlay) <= 2): raise ValueError("overlay must be a list of one or two entries; instead it had %d entries" % len(overlay)) ymax = (stackaspectratio * stackwidth + len(overlay) * (barspacing + barheight)) / float(stackaspectratio * stackwidth) aspectratio = ymax * stackaspectratio # effective aspect ratio for full range else: ymax = 1.0 aspectratio = stackaspectratio rmargin = 11.5 # right margin in points, fixed by weblogo stackheightmargin = 16 # margin between stacks in points, fixed by weblogo try: # write data into transfacfile (a temporary file) (fd, transfacfile) = tempfile.mkstemp() f = os.fdopen(fd, 'w') ordered_alphabets = {} # keyed by site index (0, 1, ...) with values ordered lists for characters from bottom to top if datatype == 'prefs': chars_for_string = characters f.write('ID ID\nBF BF\nP0 %s\n' % ' '.join(chars_for_string)) for (isite, r) in enumerate(sites): f.write('%s %s\n' % (r, ' '.join([str(data[r][x]) for x in characters]))) pi_r = [(data[r][x], x) for x in characters] pi_r.sort() ordered_alphabets[isite] = [tup[1] for tup in pi_r] # order from smallest to biggest elif datatype == 'diffprefs': chars_for_string = characters + [firstblankchar, lastblankchar, separatorchar] ydatamax *= 2.0 # maximum possible range of data, multiply by two for range f.write('ID ID\nBF BF\nP0 %s\n' % ' '.join(chars_for_string)) for (isite, r) in enumerate(sites): positivesum = sum([data[r][x] for x in characters if data[r][x] > 0]) + separatorheight / 2.0 negativesum = sum([data[r][x] for x in characters if data[r][x] < 0]) - separatorheight / 2.0 if abs(positivesum + negativesum) > 1.0e-3: raise ValueError("Differential preferences sum of %s is not close to zero for site %s" % (positivesum + negativesum, r)) if 2.0 * positivesum > ydatamax: raise ValueError("You need to increase ydatamax: the total differential preferences sum to more than the y-axis limits. Right now, ydatamax is %.3f while the total differential preferences are %.3f" % (ydatamax, 2.0 * positivesum)) f.write('%s' % r) deltapi_r = [] for x in characters: deltapi_r.append((data[r][x], x)) f.write(' %s' % (abs(data[r][x]) / float(ydatamax))) deltapi_r.sort() firstpositiveindex = 0 while deltapi_r[firstpositiveindex][0] < 0: firstpositiveindex += 1 ordered_alphabets[isite] = [firstblankchar] + [tup[1] for tup in deltapi_r[ : firstpositiveindex]] + [separatorchar] + [tup[1] for tup in deltapi_r[firstpositiveindex : ]] + [lastblankchar] # order from most negative to most positive with blank characters and separators f.write(' %g %g %g\n' % (0.5 * (ydatamax + 2.0 * negativesum) / ydatamax, 0.5 * (ydatamax + 2.0 * negativesum) / ydatamax, separatorheight)) # heights for blank charactors and separators else: raise ValueError("Invalid datatype of %s" % datatype) f.close() # create web logo charstring = ''.join(chars_for_string) assert len(charstring) == len(chars_for_string), "Length of charstring doesn't match length of chars_for_string. Do you have unallowable multi-letter characters?\n%s" % (str(chars_for_string)) logoprior = weblogolib.parse_prior('equiprobable', charstring, 0) motif = _my_Motif.read_transfac(open(transfacfile), charstring) logodata = weblogolib.LogoData.from_counts(motif.alphabet, motif, logoprior) logo_options = weblogolib.LogoOptions() logo_options.fineprint = None logo_options.stacks_per_line = nperline logo_options.stack_aspect_ratio = aspectratio logo_options.stack_width = stackwidth logo_options.unit_name = 'probability' logo_options.show_yaxis = False logo_options.yaxis_scale = ymax if alphabet_type == 'aa': (cmap, colormapping, mapper) = KyteDoolittleColorMapping() elif alphabet_type == 'nt': colormapping = {} colormapping['A'] = '#008000' colormapping['T'] = '#FF0000' colormapping['C'] = '#0000FF' colormapping['G'] = '#FFA500' else: raise ValueError("Invalid alphabet_type %s" % alphabet_type) colormapping[firstblankchar] = colormapping[lastblankchar] = '#000000' # black, but color doesn't matter as modified weblogo code replaces with empty space colormapping[separatorchar] = '#000000' # black color_scheme = weblogolib.colorscheme.ColorScheme() for x in chars_for_string: color_scheme.groups.append(weblogolib.colorscheme.ColorGroup(x, colormapping[x], "'%s'" % x)) logo_options.color_scheme = color_scheme logo_options.annotate = [{True:r, False:''}[0 == isite % numberevery] for (isite, r) in enumerate(sites)] logoformat = weblogolib.LogoFormat(logodata, logo_options) # _my_pdf_formatter is modified from weblogo version 3.4 source code # to allow custom ordering of the symbols. pdf = _my_pdf_formatter(logodata, logoformat, ordered_alphabets) open(plotfile, 'w').write(pdf) assert os.path.isfile(plotfile), "Failed to find expected plotfile %s" % plotfile finally: # close if still open try: f.close() except: pass # remove temporary file if os.path.isfile(transfacfile): os.remove(transfacfile) # now build the overlay if overlay: try: (fdoverlay, overlayfile) = tempfile.mkstemp(suffix='.pdf') (fdmerged, mergedfile) = tempfile.mkstemp(suffix='.pdf') foverlay = os.fdopen(fdoverlay, 'wb') foverlay.close() # close, but we still have the path overlayfile... fmerged = os.fdopen(fdmerged, 'wb') LogoOverlay(sites, overlayfile, overlay, nperline, sitewidth=stackwidth, rmargin=rmargin, logoheight=stackwidth * stackaspectratio + stackheightmargin, barheight=barheight, barspacing=barspacing) plotfile_f = open(plotfile, 'rb') plot = PyPDF2.PdfFileReader(plotfile_f).getPage(0) overlayfile_f = open(overlayfile, 'rb') overlay = PyPDF2.PdfFileReader(overlayfile_f).getPage(0) xshift = overlay.artBox[2] - plot.artBox[2] overlay.mergeTranslatedPage(plot, xshift, 0) overlay.compressContentStreams() output = PyPDF2.PdfFileWriter() output.addPage(overlay) output.write(fmerged) fmerged.close() shutil.move(mergedfile, plotfile) finally: try: plotfile_f.close() except: pass try: overlayfile_f.close() except: pass try: foverlay.close() except: pass try: fmerged.close() except: pass for fname in [overlayfile, mergedfile]: if os.path.isfile(fname): os.remove(fname)
def main(htdocs_directory=None): logooptions = weblogolib.LogoOptions() # A list of form fields. # The default for checkbox values must be False (irrespective of # the default in logooptions) since a checked checkbox returns 'true' # but an unchecked checkbox returns nothing. controls = [ Field("sequences", ""), Field( "format", "png", weblogolib.formatters.get, options=[ "png_print", "png", "jpeg", "eps", "pdf", "svg", "logodata", ], # TODO: Should copy list from __init__.formatters errmsg="Unknown format option.", ), Field("stacks_per_line", logooptions.stacks_per_line, int, errmsg="Invalid number of stacks per line."), Field( "stack_width", "medium", weblogolib.std_sizes.get, options=["small", "medium", "large"], errmsg="Invalid logo size.", ), Field( "alphabet", "alphabet_auto", alphabets.get, options=["alphabet_auto", "alphabet_protein", "alphabet_dna", "alphabet_rna"], errmsg="Unknown sequence type.", ), Field("unit_name", "bits", options=["probability", "bits", "nats", "kT", "kJ/mol", "kcal/mol"]), Field("first_index", 1, int_or_none), Field("logo_start", "", int_or_none), Field("logo_end", "", int_or_none), Field( "composition", "comp_auto", composition.get, options=[ "comp_none", "comp_auto", "comp_equiprobable", "comp_CG", "comp_Celegans", "comp_Dmelanogaster", "comp_Ecoli", "comp_Hsapiens", "comp_Mmusculus", "comp_Scerevisiae", ], errmsg="Illegal sequence composition.", ), Field("percentCG", "", float_or_none, errmsg="Invalid CG percentage."), Field("show_errorbars", False, truth), Field("logo_title", logooptions.logo_title), Field("logo_label", logooptions.logo_label), Field("show_xaxis", False, truth), Field("xaxis_label", logooptions.xaxis_label), Field("show_yaxis", False, truth), Field("yaxis_label", logooptions.yaxis_label, string_or_none), Field( "yaxis_scale", logooptions.yaxis_scale, float_or_none, errmsg="The yaxis scale must be a positive number." ), Field("yaxis_tic_interval", logooptions.yaxis_tic_interval, float_or_none), Field("show_ends", False, truth), Field("show_fineprint", False, truth), Field( "color_scheme", "color_auto", color_schemes.get, options=color_schemes.keys(), errmsg="Unknown color scheme" ), Field("color0", ""), Field("symbols0", ""), Field("desc0", ""), Field("color1", ""), Field("symbols1", ""), Field("desc1", ""), Field("color2", ""), Field("symbols2", ""), Field("desc2", ""), Field("color3", ""), Field("symbols3", ""), Field("desc3", ""), Field("color4", ""), Field("symbols4", ""), Field("desc4", ""), Field("ignore_lower_case", False, truth), Field("scale_width", False, truth), ] form = {} for c in controls: form[c.name] = c form_values = cgilib.FieldStorage() # Send default form? if len(form_values) == 0 or "cmd_reset" in form_values: # Load default truth values now. form["show_errorbars"].value = logooptions.show_errorbars form["show_xaxis"].value = logooptions.show_xaxis form["show_yaxis"].value = logooptions.show_yaxis form["show_ends"].value = logooptions.show_ends form["show_fineprint"].value = logooptions.show_fineprint form["scale_width"].value = logooptions.scale_width send_form(controls, htdocs_directory=htdocs_directory) return # Get form content for c in controls: c.value = form_values.getfirst(c.name, c.default) options_from_form = [ "format", "stacks_per_line", "stack_width", "alphabet", "unit_name", "first_index", "logo_start", "logo_end", "composition", "show_errorbars", "logo_title", "logo_label", "show_xaxis", "xaxis_label", "show_yaxis", "yaxis_label", "yaxis_scale", "yaxis_tic_interval", "show_ends", "show_fineprint", "scale_width", ] errors = [] for optname in options_from_form: try: value = form[optname].get_value() if value != None: setattr(logooptions, optname, value) except ValueError as err: errors.append(err.args) # Construct custom color scheme custom = ColorScheme() for i in range(0, 5): color = form["color%d" % i].get_value() symbols = form["symbols%d" % i].get_value() desc = form["desc%d" % i].get_value() if color: try: custom.groups.append(weblogolib.ColorGroup(symbols, color, desc)) except ValueError as e: errors.append(("color%d" % i, "Invalid color: %s" % color)) if form["color_scheme"].value == "color_custom": logooptions.color_scheme = custom else: try: logooptions.color_scheme = form["color_scheme"].get_value() except ValueError as err: errors.append(err.args) sequences = None # FIXME: Ugly fix: Must check that sequence_file key exists # FIXME: Sending malformed or missing form keys should not cause a crash # sequences_file = form["sequences_file"] if "sequences_file" in form_values: sequences = form_values.getvalue("sequences_file") # assert type(sequences) == str if not sequences or len(sequences) == 0: sequences = form["sequences"].get_value() # If a user tries to paste a very large file into sequence textarea, # then WebLogo runs very slow for no apparently good reason. (Might be client side bug?) # So we limit the maximum sequence size. # Form field also limits size, but not necessarly respected. Also can truncate data # without warning, so we'll set textarea maximum to be larger than MAX_SEQUENCE_SIZE SEQUENCES_MAXLENGTH = 100000 if len(sequences) > SEQUENCES_MAXLENGTH: errors.append(("sequences", "Sequence data too large for text input. Use file upload instead.")) controls[0] = Field("sequences", "") if not sequences or len(sequences) == 0: errors.append( ("sequences", "Please enter a multiple-sequence alignment in the box above, or select a file to upload.") ) # If we have uncovered errors or we want the chance to edit the logo # ("cmd_edit" command from examples page) then we return the form now. # We do not proceed to the time consuming logo creation step unless # required by a 'create' or 'validate' command, and no errors have been # found yet. if errors or "cmd_edit" in form_values: send_form(controls, errors, htdocs_directory) return try: comp = form["composition"].get_value() percentCG = form["percentCG"].get_value() ignore_lower_case = "ignore_lower_case" in form_values if comp == "percentCG": comp = str(percentCG / 100) from corebio.matrix import Motif try: # Try reading data in transfac format first. # TODO Refactor this code motif = Motif.read_transfac(StringIO(sequences), alphabet=logooptions.alphabet) prior = weblogolib.parse_prior(comp, motif.alphabet) data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior) except ValueError as motif_err: seqs = weblogolib.read_seq_data( StringIO(sequences), alphabet=logooptions.alphabet, ignore_lower_case=ignore_lower_case ) prior = weblogolib.parse_prior(comp, seqs.alphabet) data = weblogolib.LogoData.from_seqs(seqs, prior) logoformat = weblogolib.LogoFormat(data, logooptions) format = form["format"].value logo = weblogolib.formatters[format](data, logoformat) except ValueError as err: errors.append(err.args) except IOError as err: errors.append(err.args) except RuntimeError as err: errors.append(err.args) if errors or "cmd_validate" in form_values: send_form(controls, errors, htdocs_directory) return # # RETURN LOGO OVER HTTP # print("Content-Type:", mime_type[format]) # Content-Disposition: inline Open logo in browser window # Content-Disposition: attachment Download logo if "download" in form_values: print("Content-Disposition: attachment; " 'filename="logo.%s"' % extension[format]) else: print("Content-Disposition: inline; " 'filename="logo.%s"' % extension[format]) # Separate header from data print() # Finally, and at last, send the logo. if sys.version_info[0] >= 3: sys.stdout.buffer.write(logo) else: sys.stdout.write(logo)
def DifferentialPreferencesLogo(sites, dpi_d, plotfile, nperline, overlay, sitenumbermapping=None, numberevery=10, ydatamax=1.0): """Creates a logo plot of differential amino-acid preferences. This plot shows the differential amino-acid preferences, which can potentially total up to 1.0 in each direction. For each stack, there is a center black line, and positive preferences are shown above that line while negative preferences are shown below it. All calling arguments have the same meaning as for the function *EquilibriumFreqsLogo* with the following two exceptions: *dpi_d* replaces the *pi_d* argument used for *EquilibriumFreqsLogo*. *dpi_d* is a dictionary keyed by every integer in *sites*. *sites*. The value of *dpi_d[isite]* is itself a dictionary, which has keys 'dPI_A', 'dPI_C', 'dPI_D', etc for all 20 one-letter upper-case amino acid codes. The values for these keys are the differential preference of that amino acid at that site. So *dpi_d[isite]['dPI_M']* is the differential preference for methionine at site *isite*. *dpi_d* is allowed to either contain or not contain stop codons. If it contains stop codons, then there should be a key 'dPI_*' giving the preference for a stop codon for each dictionary *dpi_d[isite]*. However, we only check that there are actually stop codons by looking to see if there is a key 'dPI_*' in *dpi_d[sites[0]]* -- if there is not, then we don't look for stop codons at any other sites either. Note that even though stop codons are denoted by an asterisk in *dpi_d*, they are plotted using an *X* character in the sequence logo. *ydatamax* is the maximum that the logo stacks extend in the positive and negative directions. Is 1.0 by default. """ stopchar = 'X' # character for stop codon in logo plot firstblankchar = 'B' # character for first blank space lastblankchar = 'b' # character for last blank space separatorchar = '-' # separates positive and negative separatorheight = 0.02 # height of separator as fraction of total if not WebLogoAvailable(): raise ValueError("Cannot run weblogo") if overlay and not PyPdfAvailable(): raise ValueError("Cannot use overlay as pyPdf is not available.") if overlay and not mapmuts.plot.PylabAvailable(): raise ValueError("Cannot use overlay as pylab is not available.") if overlay: if not (len(overlay) == 2 and isinstance(overlay[0], dict) and isinstance(overlay[1], dict)): raise ValueError("overlay is not a list of two dictionaries.") if sites != [i for i in range(sites[0], sites[-1] + 1)]: raise ValueError("sites does not specify consecutive numbers") if os.path.splitext(plotfile)[1] != '.pdf': raise ValueError("plotfile must end in .pdf: %s" % plotfile) if os.path.isfile(plotfile): os.remove(plotfile) # remove existing plot # # Following are specifications of weblogo sizing taken from its documentation # or specified when weblogo is called stackwidth = 9.5 # stack width in points, not default size of 10.8, but set to this in weblogo call below barheight = 5.5 # height of bars in points if using overlay barspacing = 2.0 # spacing between bars in points if using overlay stackaspectratio = 4.4 # ratio of stack height:width, doesn't count part going over maximum value of 1 if overlay: ymax = (stackaspectratio * stackwidth + len(overlay) * (barspacing + barheight)) / float(stackaspectratio * stackwidth) aspectratio = ymax * stackaspectratio # effective aspect ratio for full range else: ymax = 1.0 aspectratio = stackaspectratio rmargin = 11.5 # right margin in points, fixed by weblogo stackheightmargin = 16 # margin between stacks in points, fixed by weblogo # End specifications of weblogo sizing taken from its documentation # assert sites, "No sites specified" if 'dPI_*' in dpi_d[sites[0]]: includestop = True else: includestop = False aas = mapmuts.sequtils.AminoAcids(includestop=includestop) if includestop: aas_for_string = aas[ : -1] + [stopchar] else: aas_for_string = aas aas_for_string = [aa for aa in aas_for_string] + [firstblankchar, lastblankchar, separatorchar] ydatamax *= 2.0 # maximum possible range of data, multiply by two for range try: # write data into transfacfile (a temporary file) transfacfile = tempfile.mkstemp()[1] f = open(transfacfile, 'w') f.write('ID ID\nBF BF\nP0 %s\n' % ' '.join(aas_for_string)) ordered_alphabets = {} # keyed by site (consecutive 0-index) with values ordered lists of aas from bottom to top isite = 0 for site in sites: positivesum = sum([dpi_d[site]['dPI_%s' % aa] for aa in aas if dpi_d[site]['dPI_%s' % aa] > 0]) + separatorheight / 2.0 negativesum = sum([dpi_d[site]['dPI_%s' % aa] for aa in aas if dpi_d[site]['dPI_%s' % aa] < 0]) - separatorheight / 2.0 if abs(positivesum + negativesum) > 1.0e-6: raise ValueError("Differential preference sums of %g and %g not close to zero for site %d" % (positivesum, negativesum, site)) f.write('%d' % site) dpi_aa = [] for aa in aas: y = dpi_d[site]['dPI_%s' % aa] dpi_aa.append((y, aa)) f.write(' %g' % (abs(y) / float(ydatamax))) dpi_aa.sort() firstpositiveindex = 0 while dpi_aa[firstpositiveindex][0] < 0: firstpositiveindex += 1 ordered_alphabets[isite] = [firstblankchar] + [tup[1] for tup in dpi_aa[ : firstpositiveindex]] + [separatorchar] + [tup[1] for tup in dpi_aa[firstpositiveindex : ]] + [lastblankchar] isite += 1 if 2.0 * positivesum > ydatamax: raise ValueError("You need to increase ymax: the total differential preferences sum to more than the y-axis limits") f.write(' %g %g %g\n' % (0.5 * (ydatamax + 2.0 * negativesum) / ydatamax, 0.5 * (ydatamax + 2.0 * negativesum) / ydatamax, separatorheight)) f.close() # create web logo aastring = ''.join(aas_for_string) logoprior = weblogolib.parse_prior('equiprobable', aastring, 0) motif = _my_Motif.read_transfac(open(transfacfile), aastring) logodata = weblogolib.LogoData.from_counts(motif.alphabet, motif, logoprior) logo_options = weblogolib.LogoOptions() logo_options.fineprint = None logo_options.stacks_per_line = nperline logo_options.stack_aspect_ratio = aspectratio logo_options.stack_width = stackwidth logo_options.unit_name = 'probability' logo_options.show_yaxis = False logo_options.yaxis_scale = ymax logo_options.first_index = sites[0] (cmap, colormapping, mapper) = mapmuts.plot.KyteDoolittleColorMapping() colormapping[firstblankchar] = colormapping[lastblankchar] = '#FFFFFF' # white colormapping[separatorchar] = '#000000' # black color_scheme = weblogolib.colorscheme.ColorScheme() for (aa, aaforstring) in zip(aas + [firstblankchar, lastblankchar, separatorchar], aas_for_string): color_scheme.groups.append(weblogolib.colorscheme.ColorGroup(aaforstring, colormapping[aa], "'%s'" % aaforstring)) logo_options.color_scheme = color_scheme # add site number mapping if sitenumbermapping: annotate = [] isite = 0 for site in sites: if isite % numberevery == 0: annotate.append(sitenumbermapping[site].strip()) else: annotate.append('') isite += 1 logo_options.annotate = annotate logoformat = weblogolib.LogoFormat(logodata, logo_options) # _my_pdf_formatter is modified from weblogo version 3.4 source code # to allow custom ordering of the symbols. pdf = _my_pdf_formatter(logodata, logoformat, ordered_alphabets) open(plotfile, 'w').write(pdf) finally: # remove temporary file if os.path.isfile(transfacfile): os.remove(transfacfile) # now build the overlay if overlay: # make the overlay plot overlayfile = '_overlay_tempfile.pdf' mergedfile = '_merged_tempfile.pdf' mapmuts.plot.LogoOverlay(sites, overlayfile, overlay[0], overlay[1], nperline, sitewidth=stackwidth, rmargin=rmargin, logoheight=stackwidth * stackaspectratio + stackheightmargin, barheight=barheight, barspacing=barspacing) # overlay onto plotfile using pyPdf plot = pyPdf.PdfFileReader(open(plotfile, 'rb')).getPage(0) overlay = pyPdf.PdfFileReader(open(overlayfile, 'rb')).getPage(0) xshift = overlay.artBox[2] - plot.artBox[2] overlay.mergeTranslatedPage(plot, xshift, 0) output = pyPdf.PdfFileWriter() output.addPage(overlay) outputstream = open(mergedfile, 'wb') output.write(outputstream) outputstream.close() os.rename(mergedfile, plotfile) os.remove(overlayfile)
def test_auto(self) : self.assertTrue( all(2.*equiprobable_distribution(4) == parse_prior( 'auto', unambiguous_dna_alphabet ) ) ) self.assertTrue( all(2.*equiprobable_distribution(4) == parse_prior( 'automatic', unambiguous_dna_alphabet ) ) )
def test_weight(self): self.assertTrue(all(2.0 * equiprobable_distribution(4) == parse_prior("automatic", unambiguous_dna_alphabet))) self.assertTrue( all(123.123 * equiprobable_distribution(4) == parse_prior("auto", unambiguous_dna_alphabet, 123.123)) )
def test_explicit(self): s = "{'A':10, 'C':40, 'G':40, 'T':10}" p = array((10, 40, 40, 10), float64) * 2.0 / 100.0 self.assertTrue(all(p == parse_prior(s, unambiguous_dna_alphabet)))
def main(htdocs_directory = None) : logooptions = weblogolib.LogoOptions() # A list of form fields. # The default for checkbox values must be False (irrespective of # the default in logooptions) since a checked checkbox returns 'true' # but an unchecked checkbox returns nothing. controls = [ Field( 'sequences', ''), Field( 'format', 'png', weblogolib.formatters.get , options=['png_print', 'png', 'jpeg', 'eps', 'pdf', 'svg', 'logodata'] , #TODO: Should copy list from __init__.formatters errmsg="Unknown format option."), Field( 'stacks_per_line', logooptions.stacks_per_line , int, errmsg='Invalid number of stacks per line.'), Field( 'stack_width','medium', weblogolib.std_sizes.get, options=['small', 'medium', 'large'], errmsg='Invalid logo size.'), Field( 'alphabet','alphabet_auto', alphabets.get, options=['alphabet_auto', 'alphabet_protein', 'alphabet_dna', 'alphabet_rna'], errmsg="Unknown sequence type."), Field( 'unit_name', 'bits', options=[ 'probability', 'bits', 'nats', 'kT', 'kJ/mol', 'kcal/mol']), Field( 'first_index', 1, int_or_none), Field( 'logo_start', '', int_or_none), Field( 'logo_end', '', int_or_none), Field( 'composition', 'comp_auto', composition.get, options=['comp_none','comp_auto','comp_equiprobable','comp_CG', 'comp_Celegans','comp_Dmelanogaster','comp_Ecoli', 'comp_Hsapiens','comp_Mmusculus','comp_Scerevisiae'], errmsg= "Illegal sequence composition."), Field( 'percentCG', '', float_or_none, errmsg="Invalid CG percentage."), Field( 'show_errorbars', False , truth), Field( 'logo_title', logooptions.logo_title ), Field( 'logo_label', logooptions.logo_label ), Field( 'show_xaxis', False, truth), Field( 'xaxis_label', logooptions.xaxis_label ), Field( 'show_yaxis', False, truth), Field( 'yaxis_label', logooptions.yaxis_label, string_or_none ), Field( 'yaxis_scale', logooptions.yaxis_scale , float_or_none, errmsg="The yaxis scale must be a positive number." ), Field( 'yaxis_tic_interval', logooptions.yaxis_tic_interval , float_or_none), Field( 'show_ends', False, truth), Field( 'show_fineprint', False , truth), Field( 'color_scheme', 'color_auto', color_schemes.get, options=color_schemes.keys() , errmsg = 'Unknown color scheme'), Field( 'color0', ''), Field( 'symbols0', ''), Field( 'desc0', ''), Field( 'color1', ''), Field( 'symbols1', ''), Field( 'desc1', ''), Field( 'color2', ''), Field( 'symbols2', ''), Field( 'desc2', ''), Field( 'color3', ''), Field( 'symbols3', ''), Field( 'desc3', ''), Field( 'color4', ''), Field( 'symbols4', ''), Field( 'desc4', ''), Field( 'ignore_lower_case', False, truth), Field( 'scale_width', False, truth), ] form = {} for c in controls : form[c.name] = c form_values = cgilib.FieldStorage() # Send default form? if len(form_values) == 0 or "cmd_reset" in form_values: # Load default truth values now. form['show_errorbars'].value = logooptions.show_errorbars form['show_xaxis'].value = logooptions.show_xaxis form['show_yaxis'].value = logooptions.show_yaxis form['show_ends'].value = logooptions.show_ends form['show_fineprint'].value = logooptions.show_fineprint form['scale_width'].value = logooptions.scale_width send_form(controls, htdocs_directory = htdocs_directory) return # Get form content for c in controls : c.value = form_values.getfirst( c.name, c.default) options_from_form = ['format', 'stacks_per_line', 'stack_width', 'alphabet', 'unit_name', 'first_index', 'logo_start','logo_end', 'composition', 'show_errorbars', 'logo_title', 'logo_label', 'show_xaxis', 'xaxis_label', 'show_yaxis', 'yaxis_label', 'yaxis_scale', 'yaxis_tic_interval', 'show_ends', 'show_fineprint', 'scale_width'] errors = [] for optname in options_from_form : try : value = form[optname].get_value() if value!=None : setattr(logooptions, optname, value) except ValueError as err : errors.append(err.args) # Construct custom color scheme custom = ColorScheme() for i in range(0,5) : color = form["color%d"%i].get_value() symbols = form["symbols%d"%i].get_value() desc = form["desc%d"%i].get_value() if color : try : custom.groups.append(weblogolib.ColorGroup(symbols, color, desc)) except ValueError as e: errors.append( ('color%d'%i, "Invalid color: %s" % color) ) if form["color_scheme"].value == 'color_custom' : logooptions.color_scheme = custom else : try : logooptions.color_scheme = form["color_scheme"].get_value() except ValueError as err: errors.append(err.args) sequences = None # FIXME: Ugly fix: Must check that sequence_file key exists # FIXME: Sending malformed or missing form keys should not cause a crash # sequences_file = form["sequences_file"] if "sequences_file" in form_values: sequences = form_values.getvalue("sequences_file") #assert type(sequences) == str if not sequences or len(sequences) ==0: sequences = form["sequences"].get_value() if not sequences or len(sequences) ==0: errors.append( ("sequences", "Please enter a multiple-sequence alignment in the box above, or select a file to upload.")) # If we have uncovered errors or we want the chance to edit the logo # ("cmd_edit" command from examples page) then we return the form now. # We do not proceed to the time consuming logo creation step unless # required by a 'create' or 'validate' command, and no errors have been # found yet. if errors or "cmd_edit" in form_values: send_form(controls, errors, htdocs_directory) return try : comp = form["composition"].get_value() percentCG = form["percentCG"].get_value() ignore_lower_case = ("ignore_lower_case" in form_values) if comp == 'percentCG': comp = str(percentCG / 100) from corebio.matrix import Motif try: # Try reading data in transfac format first. # TODO Refactor this code motif = Motif.read_transfac(StringIO( sequences), alphabet=logooptions.alphabet) prior = weblogolib.parse_prior( comp,motif.alphabet) data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior) except ValueError as motif_err: seqs = weblogolib.read_seq_data(StringIO( sequences), alphabet=logooptions.alphabet, ignore_lower_case=ignore_lower_case ) prior = weblogolib.parse_prior(comp, seqs.alphabet) data = weblogolib.LogoData.from_seqs(seqs, prior) logoformat = weblogolib.LogoFormat(data, logooptions) format = form["format"].value logo = weblogolib.formatters[format](data, logoformat) except ValueError as err: errors.append(err.args) except IOError as err: errors.append(err.args) except RuntimeError as err: errors.append(err.args) if errors or "cmd_validate" in form_values: send_form(controls, errors, htdocs_directory) return # # RETURN LOGO OVER HTTP # print("Content-Type:", mime_type[format]) # Content-Disposition: inline Open logo in browser window # Content-Disposition: attachment Download logo if "download" in form_values: print('Content-Disposition: attachment; ' \ 'filename="logo.%s"' % extension[format]) else: print('Content-Disposition: inline; ' \ 'filename="logo.%s"' % extension[format]) # Separate header from data print() # Finally, and at last, send the logo. if sys.version_info[0] >= 3: sys.stdout.buffer.write(logo) else: sys.stdout.write(logo)
except ValueError, motif_err: # Failed reading Motif, try reading as multiple sequence data. seqs = read_seq_data(fin, options.input_parser.read, alphabet=options.alphabet, ignore_lower_case=options.ignore_lower_case) if motif_flag: if options.ignore_lower_case: raise ValueError( "error: option --ignore-lower-case incompatible with matrix input" ) if options.reverse: motif.reverse() if options.complement: motif.complement() prior = parse_prior(options.composition, motif.alphabet, options.weight) data = LogoData.from_counts(motif.alphabet, motif, prior) else: if options.reverse: seqs = SeqList([s.reverse() for s in seqs], seqs.alphabet) if options.complement: seqs = SeqList([Seq(s, seqs.alphabet).complement() for s in seqs], seqs.alphabet) prior = parse_prior(options.composition, seqs.alphabet, options.weight) data = LogoData.from_seqs(seqs, prior) return data
# handle any errors. Once the "Content-Type:" header has been sent # we can't send any useful feedback logo = StringIO() try : comp = form["composition"].get_value() percentCG = form["percentCG"].get_value() ignore_lower_case = form_values.has_key("ignore_lower_case") if comp=='percentCG': comp = str(percentCG/100) from corebio.matrix import Motif try: # Try reading data in transfac format first. # TODO Refactor this code motif = Motif.read_transfac(StringIO( sequences), alphabet=logooptions.alphabet) prior = weblogolib.parse_prior( comp,motif.alphabet) data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior) except ValueError, motif_err : seqs = weblogolib.read_seq_data(StringIO( sequences), alphabet=logooptions.alphabet, ignore_lower_case=ignore_lower_case ) prior = weblogolib.parse_prior(comp, seqs.alphabet) data = weblogolib.LogoData.from_seqs(seqs, prior) logoformat = weblogolib.LogoFormat(data, logooptions) format = form["format"].value weblogolib.formatters[format](data, logoformat, logo) except ValueError, err : errors.append( err.args ) except IOError, err :
def main(htdocs_directory=None): logooptions = weblogolib.LogoOptions() # A list of form fields. # The default for checkbox values must be False (irrespective of # the default in logooptions) since a checked checkbox returns 'true' # but an unchecked checkbox returns nothing. controls = [ Field('sequences', ''), Field( 'format', 'png', weblogolib.formatters.get, options=[ 'png_print', 'png', 'jpeg', 'eps', 'pdf', 'svg', 'logodata' ], #TODO: Should copy list from __init__.formatters errmsg="Unknown format option."), Field('stacks_per_line', logooptions.stacks_per_line, int, errmsg='Invalid number of stacks per line.'), Field('stack_width', 'medium', weblogolib.std_sizes.get, options=['small', 'medium', 'large'], errmsg='Invalid logo size.'), Field('alphabet', 'alphabet_auto', alphabets.get, options=[ 'alphabet_auto', 'alphabet_protein', 'alphabet_dna', 'alphabet_rna' ], errmsg="Unknown sequence type."), Field('unit_name', 'bits', options=[ 'probability', 'bits', 'nats', 'kT', 'kJ/mol', 'kcal/mol' ]), Field('first_index', 1, int_or_none), Field('logo_start', '', int_or_none), Field('logo_end', '', int_or_none), Field('composition', 'comp_auto', composition.get, options=[ 'comp_none', 'comp_auto', 'comp_equiprobable', 'comp_CG', 'comp_Celegans', 'comp_Dmelanogaster', 'comp_Ecoli', 'comp_Hsapiens', 'comp_Mmusculus', 'comp_Scerevisiae' ], errmsg="Illegal sequence composition."), Field('percentCG', '', float_or_none, errmsg="Invalid CG percentage."), Field('show_errorbars', False, truth), Field('logo_title', logooptions.logo_title), Field('logo_label', logooptions.logo_label), Field('show_xaxis', False, truth), Field('xaxis_label', logooptions.xaxis_label), Field('show_yaxis', False, truth), Field('yaxis_label', logooptions.yaxis_label, string_or_none), Field('yaxis_scale', logooptions.yaxis_scale, float_or_none, errmsg="The yaxis scale must be a positive number."), Field('yaxis_tic_interval', logooptions.yaxis_tic_interval, float_or_none), Field('show_ends', False, truth), Field('show_fineprint', False, truth), Field('color_scheme', 'color_auto', color_schemes.get, options=color_schemes.keys(), errmsg='Unknown color scheme'), Field('color0', ''), Field('symbols0', ''), Field('desc0', ''), Field('color1', ''), Field('symbols1', ''), Field('desc1', ''), Field('color2', ''), Field('symbols2', ''), Field('desc2', ''), Field('color3', ''), Field('symbols3', ''), Field('desc3', ''), Field('color4', ''), Field('symbols4', ''), Field('desc4', ''), Field('ignore_lower_case', False, truth), Field('scale_width', False, truth), ] form = {} for c in controls: form[c.name] = c form_values = cgilib.FieldStorage() # Send default form? if len(form_values) == 0 or "cmd_reset" in form_values: # Load default truth values now. form['show_errorbars'].value = logooptions.show_errorbars form['show_xaxis'].value = logooptions.show_xaxis form['show_yaxis'].value = logooptions.show_yaxis form['show_ends'].value = logooptions.show_ends form['show_fineprint'].value = logooptions.show_fineprint form['scale_width'].value = logooptions.scale_width send_form(controls, htdocs_directory=htdocs_directory) return # Get form content for c in controls: c.value = form_values.getfirst(c.name, c.default) options_from_form = [ 'format', 'stacks_per_line', 'stack_width', 'alphabet', 'unit_name', 'first_index', 'logo_start', 'logo_end', 'composition', 'show_errorbars', 'logo_title', 'logo_label', 'show_xaxis', 'xaxis_label', 'show_yaxis', 'yaxis_label', 'yaxis_scale', 'yaxis_tic_interval', 'show_ends', 'show_fineprint', 'scale_width' ] errors = [] for optname in options_from_form: try: value = form[optname].get_value() if value != None: setattr(logooptions, optname, value) except ValueError as err: errors.append(err.args) # Construct custom color scheme custom = ColorScheme() for i in range(0, 5): color = form["color%d" % i].get_value() symbols = form["symbols%d" % i].get_value() desc = form["desc%d" % i].get_value() if color: try: custom.groups.append( weblogolib.ColorGroup(symbols, color, desc)) except ValueError as e: errors.append(('color%d' % i, "Invalid color: %s" % color)) if form["color_scheme"].value == 'color_custom': logooptions.color_scheme = custom else: try: logooptions.color_scheme = form["color_scheme"].get_value() except ValueError as err: errors.append(err.args) sequences = None # FIXME: Ugly fix: Must check that sequence_file key exists # FIXME: Sending malformed or missing form keys should not cause a crash # sequences_file = form["sequences_file"] if "sequences_file" in form_values: sequences = form_values.getvalue("sequences_file") #assert type(sequences) == str if not sequences or len(sequences) == 0: sequences = form["sequences"].get_value() if not sequences or len(sequences) == 0: errors.append(( "sequences", "Please enter a multiple-sequence alignment in the box above, or select a file to upload." )) # If we have uncovered errors or we want the chance to edit the logo # ("cmd_edit" command from examples page) then we return the form now. # We do not proceed to the time consuming logo creation step unless # required by a 'create' or 'validate' command, and no errors have been # found yet. if errors or "cmd_edit" in form_values: send_form(controls, errors, htdocs_directory) return try: comp = form["composition"].get_value() percentCG = form["percentCG"].get_value() ignore_lower_case = ("ignore_lower_case" in form_values) if comp == 'percentCG': comp = str(percentCG / 100) from corebio.matrix import Motif try: # Try reading data in transfac format first. # TODO Refactor this code motif = Motif.read_transfac(StringIO(sequences), alphabet=logooptions.alphabet) prior = weblogolib.parse_prior(comp, motif.alphabet) data = weblogolib.LogoData.from_counts(motif.alphabet, motif, prior) except ValueError as motif_err: seqs = weblogolib.read_seq_data( StringIO(sequences), alphabet=logooptions.alphabet, ignore_lower_case=ignore_lower_case) prior = weblogolib.parse_prior(comp, seqs.alphabet) data = weblogolib.LogoData.from_seqs(seqs, prior) logoformat = weblogolib.LogoFormat(data, logooptions) format = form["format"].value logo = weblogolib.formatters[format](data, logoformat) except ValueError as err: errors.append(err.args) except IOError as err: errors.append(err.args) except RuntimeError as err: errors.append(err.args) if errors or "cmd_validate" in form_values: send_form(controls, errors, htdocs_directory) return # # RETURN LOGO OVER HTTP # print("Content-Type:", mime_type[format]) # Content-Disposition: inline Open logo in browser window # Content-Disposition: attachment Download logo if "download" in form_values: print('Content-Disposition: attachment; ' \ 'filename="logo.%s"' % extension[format]) else: print('Content-Disposition: inline; ' \ 'filename="logo.%s"' % extension[format]) # Separate header from data print() # Finally, and at last, send the logo. if sys.version_info[0] >= 3: sys.stdout.buffer.write(logo) else: sys.stdout.write(logo)
def LogoPlot(sites, datatype, data, plotfile, nperline, numberevery=10, allowunsorted=False, ydatamax=1.01, overlay=None, fix_limits={}, fixlongname=False, overlay_cmap=None, ylimits=None, relativestackheight=1, custom_cmap='jet', map_metric='kd', noseparator=False): """Constructs a sequence logo showing amino-acid or nucleotide preferences. The heights of each letter is equal to the preference of that site for that amino acid or nucleotide. Note that stop codons may or may not be included in the logo depending on whether they are present in *pi_d*. CALLING VARIABLES: * *sites* is a list of all of the sites that are being included in the logo, as strings. They must be in natural sort order (as is done by *dms_tools.utils.NaturalSort*) or an error will be raised **unless** *allowunsorted* is *True*. The sites in the plot are ordered in the same arrangement listed in *sites*. These should be **strings**, not integers. * *datatype* should be one of the following strings: * 'prefs' for preferences * 'diffprefs' for differential preferences * 'diffsel' for differential selection * *data* is a dictionary that has a key for every entry in *sites*. For every site *r* in *sites*, *sites[r][x]* is the value for character *x*. Preferences must sum to one; differential preferences to zero. All sites must have the same set of characters. The characters must be the set of nucleotides (*dms_tools.nts*) or the set of amino acids with or without stop codons (*dms_tools.aminoacids_nostop* or *dms_tools.aminoacids_withstop*). * *plotfile* is a string giving the name of the created PDF file of the logo plot. It must end in the extension ``.pdf``. * *nperline* is the number of sites per line. Often 40 to 80 are good values. * *numberevery* is specifies how frequently we put labels for the sites on x-axis. * *allowunsorted* : if *True* then we allow the entries in *sites* to **not** be sorted. This means that the logo plot will **not** have sites in sorted order. * *ydatamax* : meaningful only if *datatype* is 'diffprefs'. In this case, it gives the maximum that the logo stacks extend in the positive and negative directions. Cannot be smaller than the maximum extent of the differential preferences. * *ylimits*: is **mandatory** if *datatype* is 'diffsel', and meaningless otherwise. It is *(ymin, ymax)* where *ymax > 0 > ymin*, and gives extent of the data in the positive and negative directions. Must encompass the actual maximum and minimum of the data. * *overlay* : this argument allows you to make overlay bars that indicated other properties for the sites. By default, this option is *None*, meaning that no overlay is created. If you set it to something else, it must be a list giving either one or two properties. Each property is a tuple: *(prop_d, shortname, longname)* where: - *prop_d* is a dictionary keyed by site numbers that are in *sites*. For each *r* in *sites*, *prop_d[r]* gives the value of the property, or if there is no entry in *prop_d* for *r*, then the property is undefined and is colored white. Properties can either be: * continuous: in this case, all of the values should be numbers. * discrete : in this case, all of the values should be strings. While in practice, if you have more than a few discrete categories (different strings), the plot will be a mess. - *shortname* : short name for the property; will not format well if more than 4 or 5 characters. - *longname* : longer name for property used on axes label. Can be the same as *shortname* if you don't need a different long name. * *fix_limits* is only meaningful if *overlay* is being used. In this case, for any *shortname* in *overlay* that also keys an entry in *fix_limits*, we use *fix_limits[shortname]* to set the limits for *shortname*. Specifically, *fix_limits[shortname]* should be the 2-tuple *(ticks, ticknames)*. *ticks* should be a list of tick locations (numbers) and *ticknames* should be a list of the corresponding tick label for that tick. * If *fixlongname* is *True*, then we use the *longname* in *overlay* exactly as written; otherwise we add a parenthesis indicating the *shortname* for which this *longname* stands. * *overlay_cmap* can be the name of a valid *matplotlib.colors.Colormap*, such as the string *jet* or *bwr*. Otherwise, it can be *None* and a (hopefully) good choice will be made for you. * *custom_cmap* can be the name of a valid *matplotlib.colors.Colormap* which will be used to color amino-acid one-letter codes in the logoplot by the *map_metric* when either 'kd' or 'mw' is used as *map_metric*. * *relativestackheight* indicates how high the letter stack is relative to the default. The default is multiplied by this number, so make it > 1 for a higher letter stack. * *map_metric* specifies the amino-acid property metric used to map colors to amino-acid letters. Valid options are 'kd' (Kyte-Doolittle hydrophobicity scale, default), 'mw' (molecular weight), 'functionalgroup' (functional groups: small, nucleophilic, hydrophobic, aromatic, basic, acidic, and amide), and 'charge' (charge at neutral pH). If 'charge' is used, then the argument for *custom_cmap* will no longer be meaningful, since 'charge' uses its own blue/black/red colormapping. Similarly, 'functionalgroup' uses its own colormapping. * *noseparator* is only meaningful if *datatype* is 'diffsel' or 'diffprefs'. If it set to *True*, then we do **not** print a black horizontal line to separate positive and negative values. """ assert datatype in ['prefs', 'diffprefs', 'diffsel'], "Invalid datatype {0}".format(datatype) # check data, and get characters assert sites, "No sites specified" assert set(sites) == set(data.keys()), "Not a match between sites and the keys of data" characters = list(data[sites[0]].keys()) if set(characters) == set(dms_tools.nts): alphabet_type = 'nt' elif set(characters) == set(dms_tools.aminoacids_nostop) or set(characters) == set(dms_tools.aminoacids_withstop): alphabet_type = 'aa' else: raise ValueError("Invalid set of characters in data. Does not specify either nucleotides or amino acids:\n%s" % str(characters)) for r in sites: if set(data[r].keys()) != set(characters): raise ValueError("Not all sites in data have the same set of characters") firstblankchar = 'B' # character for first blank space for diffprefs / diffsel assert firstblankchar not in characters, "firstblankchar in characters" lastblankchar = 'b' # character for last blank space for diffprefs / diffsel assert lastblankchar not in characters, "lastblankchar in characters" separatorchar = '-' # separates positive and negative for diffprefs / diffsel assert separatorchar not in characters, "lastblankchar in characters" if noseparator: separatorheight = 0 else: separatorheight = 0.02 # height of separator as frac of total for diffprefs / diffsel if os.path.splitext(plotfile)[1].lower() != '.pdf': raise ValueError("plotfile must end in .pdf: %s" % plotfile) if os.path.isfile(plotfile): os.remove(plotfile) # remove existing plot if not allowunsorted: sorted_sites = [r for r in sites] dms_tools.utils.NaturalSort(sorted_sites) if sorted_sites != sites: raise ValueError("sites is not properly sorted") # Following are specifications of weblogo sizing taken from its documentation stackwidth = 9.5 # stack width in points, not default size of 10.8, but set to this in weblogo call below barheight = 5.5 # height of bars in points if using overlay barspacing = 2.0 # spacing between bars in points if using overlay stackaspectratio = 4.4 # ratio of stack height:width, doesn't count part going over maximum value of 1 assert relativestackheight > 0, "relativestackheight must be > 0" stackaspectratio *= relativestackheight if overlay: if not (1 <= len(overlay) <= 3): raise ValueError("overlay must be a list of between one and three entries; instead it had %d entries" % len(overlay)) ymax = (stackaspectratio * stackwidth + len(overlay) * (barspacing + barheight)) / float(stackaspectratio * stackwidth) aspectratio = ymax * stackaspectratio # effective aspect ratio for full range else: ymax = 1.0 aspectratio = stackaspectratio rmargin = 11.5 # right margin in points, fixed by weblogo stackheightmargin = 16 # margin between stacks in points, fixed by weblogo try: # write data into transfacfile (a temporary file) (fd, transfacfile) = tempfile.mkstemp() f = os.fdopen(fd, 'w') ordered_alphabets = {} # keyed by site index (0, 1, ...) with values ordered lists for characters from bottom to top if datatype == 'prefs': chars_for_string = characters f.write('ID ID\nBF BF\nP0 %s\n' % ' '.join(chars_for_string)) for (isite, r) in enumerate(sites): f.write('%d %s\n' % (isite, ' '.join([str(data[r][x]) for x in characters]))) pi_r = [(data[r][x], x) for x in characters] pi_r.sort() ordered_alphabets[isite] = [tup[1] for tup in pi_r] # order from smallest to biggest elif datatype == 'diffprefs': chars_for_string = characters + [firstblankchar, lastblankchar, separatorchar] ydatamax *= 2.0 # maximum possible range of data, multiply by two for range f.write('ID ID\nBF BF\nP0 %s\n' % ' '.join(chars_for_string)) for (isite, r) in enumerate(sites): positivesum = sum([data[r][x] for x in characters if data[r][x] > 0]) + separatorheight / 2.0 negativesum = sum([data[r][x] for x in characters if data[r][x] < 0]) - separatorheight / 2.0 if abs(positivesum + negativesum) > 1.0e-3: raise ValueError("Differential preferences sum of %s is not close to zero for site %s" % (positivesum + negativesum, r)) if 2.0 * positivesum > ydatamax: raise ValueError("You need to increase ydatamax: the total differential preferences sum to more than the y-axis limits. Right now, ydatamax is %.3f while the total differential preferences are %.3f" % (ydatamax, 2.0 * positivesum)) f.write('%d' % isite) deltapi_r = [] for x in characters: deltapi_r.append((data[r][x], x)) f.write(' %s' % (abs(data[r][x]) / float(ydatamax))) deltapi_r.sort() firstpositiveindex = 0 while deltapi_r[firstpositiveindex][0] < 0: firstpositiveindex += 1 ordered_alphabets[isite] = [firstblankchar] + [tup[1] for tup in deltapi_r[ : firstpositiveindex]] + [separatorchar] + [tup[1] for tup in deltapi_r[firstpositiveindex : ]] + [lastblankchar] # order from most negative to most positive with blank characters and separators f.write(' %g %g %g\n' % (0.5 * (ydatamax + 2.0 * negativesum) / ydatamax, 0.5 * (ydatamax + 2.0 * negativesum) / ydatamax, separatorheight)) # heights for blank charactors and separators elif datatype == 'diffsel': assert ylimits, "You must specify ylimits if using diffsel" (dataymin, dataymax) = ylimits assert dataymax > 0 > dataymin, "Invalid ylimits of {0}".format(ylimits) yextent = float(dataymax - dataymin) separatorheight *= yextent chars_for_string = characters + [firstblankchar, lastblankchar, separatorchar] f.write('ID ID\nBF BF\nP0 {0}\n'.format(' '.join(chars_for_string))) for (isite, r) in enumerate(sites): positivesum = sum([data[r][x] for x in characters if data[r][x] > 0]) + separatorheight / 2.0 negativesum = sum([data[r][x] for x in characters if data[r][x] < 0]) - separatorheight / 2.0 assert positivesum <= dataymax, "Data exceeds ylimits in positive direction" assert negativesum >= dataymin, "Data exceeds ylimits in negative direction" f.write('{0}'.format(isite)) diffsel_r = [] for x in characters: diffsel_r.append((data[r][x], x)) f.write(' {0}'.format(abs(data[r][x]) / yextent)) diffsel_r.sort() firstpositiveindex = 0 while diffsel_r[firstpositiveindex][0] < 0: firstpositiveindex += 1 ordered_alphabets[isite] = [firstblankchar] + [tup[1] for tup in diffsel_r[ : firstpositiveindex]] + [separatorchar] + [tup[1] for tup in diffsel_r[firstpositiveindex : ]] + [lastblankchar] # order from most negative to most positive with blank characters and separators f.write(' %g %g %g\n' % ((negativesum - dataymin) / yextent, (dataymax - positivesum) / yextent, separatorheight / yextent)) # heights for blank charactors and separators else: raise ValueError("Invalid datatype of %s" % datatype) f.close() # create web logo charstring = ''.join(chars_for_string) assert len(charstring) == len(chars_for_string), "Length of charstring doesn't match length of chars_for_string. Do you have unallowable multi-letter characters?\n%s" % (str(chars_for_string)) logoprior = weblogolib.parse_prior('equiprobable', charstring, 0) motif = _my_Motif.read_transfac(open(transfacfile), charstring) logodata = weblogolib.LogoData.from_counts(motif.alphabet, motif, logoprior) logo_options = weblogolib.LogoOptions() logo_options.fineprint = None logo_options.stacks_per_line = nperline logo_options.stack_aspect_ratio = aspectratio logo_options.stack_width = stackwidth logo_options.unit_name = 'probability' logo_options.show_yaxis = False logo_options.yaxis_scale = ymax if alphabet_type == 'aa': map_functions = {'kd':KyteDoolittleColorMapping, 'mw': MWColorMapping, 'charge' : ChargeColorMapping, 'functionalgroup':FunctionalGroupColorMapping} map_fcn = map_functions[map_metric] (cmap, colormapping, mapper) = map_fcn(maptype=custom_cmap) elif alphabet_type == 'nt': colormapping = {} colormapping['A'] = '#008000' colormapping['T'] = '#FF0000' colormapping['C'] = '#0000FF' colormapping['G'] = '#FFA500' else: raise ValueError("Invalid alphabet_type %s" % alphabet_type) colormapping[firstblankchar] = colormapping[lastblankchar] = '#000000' # black, but color doesn't matter as modified weblogo code replaces with empty space colormapping[separatorchar] = '#000000' # black color_scheme = weblogolib.colorscheme.ColorScheme() for x in chars_for_string: if hasattr(color_scheme, 'rules'): color_scheme.rules.append(weblogolib.colorscheme.SymbolColor(x, colormapping[x], "'%s'" % x)) else: # this part is needed for weblogo 3.4 color_scheme.groups.append(weblogolib.colorscheme.ColorGroup(x, colormapping[x], "'%s'" % x)) logo_options.color_scheme = color_scheme logo_options.annotate = [{True:r, False:''}[0 == isite % numberevery] for (isite, r) in enumerate(sites)] logoformat = weblogolib.LogoFormat(logodata, logo_options) # _my_pdf_formatter is modified from weblogo version 3.4 source code # to allow custom ordering of the symbols. pdf = _my_pdf_formatter(logodata, logoformat, ordered_alphabets) with open(plotfile, 'wb') as f: f.write(pdf) assert os.path.isfile(plotfile), "Failed to find expected plotfile %s" % plotfile finally: # close if still open try: f.close() except: pass # remove temporary file if os.path.isfile(transfacfile): os.remove(transfacfile) # now build the overlay if overlay: try: (fdoverlay, overlayfile) = tempfile.mkstemp(suffix='.pdf') (fdmerged, mergedfile) = tempfile.mkstemp(suffix='.pdf') foverlay = os.fdopen(fdoverlay, 'wb') foverlay.close() # close, but we still have the path overlayfile... fmerged = os.fdopen(fdmerged, 'wb') LogoOverlay(sites, overlayfile, overlay, nperline, sitewidth=stackwidth, rmargin=rmargin, logoheight=stackwidth * stackaspectratio + stackheightmargin, barheight=barheight, barspacing=barspacing, fix_limits=fix_limits, fixlongname=fixlongname, overlay_cmap=overlay_cmap) plotfile_f = open(plotfile, 'rb') plot = PyPDF2.PdfFileReader(plotfile_f).getPage(0) overlayfile_f = open(overlayfile, 'rb') overlay = PyPDF2.PdfFileReader(overlayfile_f).getPage(0) xshift = overlay.artBox[2] - plot.artBox[2] overlay.mergeTranslatedPage(plot, xshift, 0) overlay.compressContentStreams() output = PyPDF2.PdfFileWriter() output.addPage(overlay) output.write(fmerged) fmerged.close() shutil.move(mergedfile, plotfile) finally: try: plotfile_f.close() except: pass try: overlayfile_f.close() except: pass try: foverlay.close() except: pass try: fmerged.close() except: pass for fname in [overlayfile, mergedfile]: if os.path.isfile(fname): os.remove(fname)