def render_weblogo(seqs, residues_per_line=None): fin = io.StringIO() for i in range(len(seqs)): fin.write(f"> seq_{i}\n") fin.write(seqs[i] + "\n") fin.seek(0) seqs = weblogo.read_seq_data(fin) logodata = weblogo.LogoData.from_seqs(seqs) logooptions = weblogo.LogoOptions() logooptions.unit_name = 'probability' logooptions.stacks_per_line = residues_per_line or len(seqs[0]) logooptions.color_scheme = weblogo.colorscheme.chemistry logoformat = weblogo.LogoFormat(logodata, logooptions) return Image(weblogo.png_print_formatter(logodata, logoformat))
def get_sequences_logo(sequences=None, probability_matrix=None, prior=None, alphabet=None, formatter='pdf', as_pillow=False, **kwargs): """ Create a logo based on a set of sequences. Only one of `sequences` and `probability_matrix` should be given. By default, this function assumes that `sequences` would be a list of protein sequences, while `probability_matrix` would be for DNA motifs. If this is not the case, then the appropriate `alphabet` should be given. The most likely values for `alphabet` are: * weblogo.seq.unambiguous_dna_alphabet * weblogo.seq.unambiguous_rna_alphabet * weblogo.seq.unambiguous_protein_alphabet Parameters ---------- sequences : iterable of strings The sequences probability_matrix: 2d np.array The proability_matrix of a motif namedtuple prior : 1d np.array where len(prior) == len(alphabet) A prior to adjust the probabilities as each position alphabet : corebio.seq.Alphabet, or None The alphabet for creating the motif. If None is given, then the basic protein alphabet is used. formatter : string The format of the image. It must be present in the weblogolib.formatters dictionary. as_pillow : bool Whether to return the raw image data (False, default) or as a pillow image (True) kwargs : key=value pairs Other keyswords to control the image. See weblogolib.LogoOptions for complete details. A few likely keywords are: - unit_name: for example, "bits" (default) or "probability" - fineprint: some text to show at the bottom (default: "WebLogo 3.5.0") - show_fineprint: whether to show the fineprint at all - yaxis_scale: this appears to control the yaxis maximum - stack_width: how wide to make each letter - stack_aspect_ratio: the ratio of letter width to height Returns ------- logo : binary string or pillow Image The raw image data. It can be written to a file opened in binary mode ("wb") or opened as a pillow image ("Image.open(io.BytesIO(logo))"). If `as_pillow` is `True`, then the pillow image is already created and returned. """ # ensure we have exactly one of `sequences` and `probability_matrix` seq_is_none = (sequences is None) pm_is_none = (probability_matrix is None) if (seq_is_none and pm_is_none): msg = ("[get_sequences_logo] exactly one of `sequences` and " "`probability_matrix` must be given") raise TypeError(msg) if alphabet is None: if sequences is not None: alphabet = weblogo.seq.unambiguous_protein_alphabet elif probability_matrix is not None: alphabet = weblogo.seq.unambiguous_dna_alphabet data = None if sequences is not None: # create the SeqList # it seems like there should be a better way to to this... sequences = '\n'.join(sequences) f = io.StringIO(sequences) sequences = weblogo.seq_io.array_io.read(f) sequences.alphabet = alphabet data = weblogo.LogoData.from_seqs(sequences, prior) else: # so we have a probability matrix data = weblogo.LogoData.from_counts(alphabet, probability_matrix, prior) options = weblogo.LogoOptions(**kwargs) logo_format = weblogo.LogoFormat(data, options) formatter = weblogo.formatters[formatter] logo = formatter(data, logo_format) if as_pillow: logo = Image.open(io.BytesIO(logo)) return logo
def main(htdocs_directory=None): logooptions = weblogo.LogoOptions() # A list of form fields. # The default for checkbox values must be False (irrespective of # the default in logooptions) since a checked checkbox returns 'true' # but an unchecked checkbox returns nothing. controls = [ Field('sequences', ''), Field('sequences_url', ''), Field( 'format', 'png', weblogo.formatters.get, options=[ 'png_print', 'png', 'jpeg', 'eps', 'pdf', 'svg', 'logodata' ], # TODO: Should copy list from __init__.formatters errmsg="Unknown format option."), Field('stacks_per_line', logooptions.stacks_per_line, int, errmsg='Invalid number of stacks per line.'), Field('stack_width', 'medium', weblogo.std_sizes.get, options=['small', 'medium', 'large'], errmsg='Invalid logo size.'), Field('alphabet', 'alphabet_auto', alphabets.get, options=[ 'alphabet_auto', 'alphabet_protein', 'alphabet_dna', 'alphabet_rna' ], errmsg="Unknown sequence type."), Field('unit_name', 'bits', options=[ 'probability', 'bits', 'nats', 'kT', 'kJ/mol', 'kcal/mol' ]), Field('first_index', 1, int_or_none), Field('logo_start', '', int_or_none), Field('logo_end', '', int_or_none), Field('composition', 'comp_auto', composition.get, options=[ 'comp_none', 'comp_auto', 'comp_equiprobable', 'comp_CG', 'comp_Celegans', 'comp_Dmelanogaster', 'comp_Ecoli', 'comp_Hsapiens', 'comp_Mmusculus', 'comp_Scerevisiae' ], errmsg="Illegal sequence composition."), Field('percentCG', '', float_or_none, errmsg="Invalid CG percentage."), Field('show_errorbars', False, truth), Field('logo_title', logooptions.logo_title), Field('logo_label', logooptions.logo_label), Field('show_xaxis', False, truth), Field('xaxis_label', logooptions.xaxis_label), Field('show_yaxis', False, truth), Field('yaxis_label', logooptions.yaxis_label, string_or_none), Field('yaxis_scale', logooptions.yaxis_scale, float_or_none, errmsg="The yaxis scale must be a positive number."), Field('yaxis_tic_interval', logooptions.yaxis_tic_interval, float_or_none), Field('show_ends', False, truth), Field('show_fineprint', False, truth), Field('color_scheme', 'color_auto', color_schemes.get, options=color_schemes.keys(), errmsg='Unknown color scheme'), Field('color0', ''), Field('symbols0', ''), Field('desc0', ''), Field('color1', ''), Field('symbols1', ''), Field('desc1', ''), Field('color2', ''), Field('symbols2', ''), Field('desc2', ''), Field('color3', ''), Field('symbols3', ''), Field('desc3', ''), Field('color4', ''), Field('symbols4', ''), Field('desc4', ''), Field('ignore_lower_case', False, truth), Field('scale_width', False, truth), ] form = {} for c in controls: form[c.name] = c form_values = cgilib.FieldStorage() # Send default form? if len(form_values) == 0 or "cmd_reset" in form_values: # Load default truth values now. form['show_errorbars'].value = logooptions.show_errorbars form['show_xaxis'].value = logooptions.show_xaxis form['show_yaxis'].value = logooptions.show_yaxis form['show_ends'].value = logooptions.show_ends form['show_fineprint'].value = logooptions.show_fineprint form['scale_width'].value = logooptions.scale_width send_form(controls, htdocs_directory=htdocs_directory) return # Get form content for c in controls: c.value = form_values.getfirst(c.name, c.default) options_from_form = [ 'format', 'stacks_per_line', 'stack_width', 'alphabet', 'unit_name', 'first_index', 'logo_start', 'logo_end', 'composition', 'show_errorbars', 'logo_title', 'logo_label', 'show_xaxis', 'xaxis_label', 'show_yaxis', 'yaxis_label', 'yaxis_scale', 'yaxis_tic_interval', 'show_ends', 'show_fineprint', 'scale_width' ] errors = [] for optname in options_from_form: try: value = form[optname].get_value() if value is not None: setattr(logooptions, optname, value) except ValueError as err: errors.append(err.args) # Construct custom color scheme custom = ColorScheme() for i in range(0, 5): color = form["color%d" % i].get_value() symbols = form["symbols%d" % i].get_value() desc = form["desc%d" % i].get_value() if color: try: custom.rules.append(SymbolColor(symbols, color, desc)) except ValueError: errors.append(('color%d' % i, "Invalid color: %s" % color)) if form["color_scheme"].value == 'color_custom': logooptions.color_scheme = custom else: try: logooptions.color_scheme = form["color_scheme"].get_value() except ValueError as err: errors.append(err.args) # FIXME: Ugly fix: Must check that sequence_file key exists # FIXME: Sending malformed or missing form keys should not cause a crash # sequences_file = form["sequences_file"] sequences_from_file = None if "sequences_file" in form_values: sequences_from_file = form_values.getvalue("sequences_file") sequences_from_textfield = form["sequences"].get_value() sequences_url = form["sequences_url"].get_value() sequences = None seq_file = None if sequences_from_file: if sequences_from_textfield or sequences_url: errors.append( ("sequences_file", "Cannot upload, sequence source conflict")) else: sequences = sequences_from_file seq_file = TextIOWrapper(BytesIO(sequences), encoding='utf-8') elif sequences_from_textfield: if sequences_url: errors.append( ("sequences", "Cannot upload, sequence source conflict")) else: # check SEQUENCES_MAXLENGT # If a user tries to paste a very large file into sequence textarea, # then WebLogo runs very slow for no apparently good reason. (Might be client side bug?) # So we limit the maximum sequence size. # Form field also limits size, but not necessarly respected. Also can truncate data # without warning, so we'll set textarea maximum to be larger than MAX_SEQUENCE_SIZE SEQUENCES_MAXLENGTH = 100000 if len(sequences_from_textfield) > SEQUENCES_MAXLENGTH: errors.append(( "sequences", "Sequence data too large for text input. Use file upload instead." )) controls[0] = Field('sequences', '') else: sequences = sequences_from_textfield seq_file = StringIO(sequences) elif sequences_url: from . import _from_URL_fileopen try: seq_file = _from_URL_fileopen(sequences_url) except ValueError: errors.append(("sequences_url", "Cannot parse URL")) except IOError: errors.append(("sequences_url", "Cannot load sequences from URL")) else: errors.append(( "sequences", "Please enter a multiple-sequence alignment in the box above, or select a " "file to upload.")) # If we have uncovered errors or we want the chance to edit the logo # ("cmd_edit" command from examples page) then we return the form now. # We do not proceed to the time consuming logo creation step unless # required by a 'create' or 'validate' command, and no errors have been # found yet. if errors or "cmd_edit" in form_values: send_form(controls, errors, htdocs_directory) return try: comp = form["composition"].get_value() percentCG = form["percentCG"].get_value() ignore_lower_case = ("ignore_lower_case" in form_values) if comp == 'percentCG': comp = str(percentCG / 100) from .matrix import Motif try: # Try reading data in transfac format first. # TODO Refactor this code motif = Motif.read_transfac(seq_file, alphabet=logooptions.alphabet) prior = weblogo.parse_prior(comp, motif.alphabet) data = weblogo.LogoData.from_counts(motif.alphabet, motif, prior) except ValueError: seqs = weblogo.read_seq_data(seq_file, alphabet=logooptions.alphabet, ignore_lower_case=ignore_lower_case) prior = weblogo.parse_prior(comp, seqs.alphabet) data = weblogo.LogoData.from_seqs(seqs, prior) logoformat = weblogo.LogoFormat(data, logooptions) format = form["format"].value logo = weblogo.formatters[format](data, logoformat) except ValueError as err: errors.append(err.args) except IOError as err: errors.append(err.args) except RuntimeError as err: errors.append(err.args) if errors or "cmd_validate" in form_values: send_form(controls, errors, htdocs_directory) return # # RETURN LOGO OVER HTTP # print("Content-Type:", mime_type[format]) # Content-Disposition: inline Open logo in browser window # Content-Disposition: attachment Download logo if "download" in form_values: print('Content-Disposition: attachment; ' 'filename="logo.%s"' % extension[format]) else: print('Content-Disposition: inline; ' 'filename="logo.%s"' % extension[format]) # Separate header from data print() sys.stdout.flush() # Finally, and at last, send the logo. sys.stdout.buffer.write(logo)
def generate_weblogo(fname, count_mat, idx_first_residue=1, residue_min=1, residue_max=None, title=""): """ Generates logo representation of PBs frequency along protein sequence through the weblogo library. The weblogo reference: G. E. Crooks, G. Hon, J.-M. Chandonia, and S. E. Brenner. 'WebLogo: A Sequence Logo Generator.' Genome Research 14:1188–90 (2004) doi:10.1101/gr.849004. http://weblogo.threeplusone.com/ Parameters ---------- fname : str The path to the file to write in count_mat : numpy array an occurence matrix returned by `count_matrix`. idx_first_residue: int the index of the first residue in the matrix residue_min: int the lower bound of residue frame residue_max: int the upper bound of residue frame title: str the title of the weblogo. Default is empty. """ # Slice the matrix count = utils._slice_matrix(count_mat, idx_first_residue, residue_min, residue_max) # Create a custom color scheme for PB colors = weblogo.ColorScheme([ ColorGroup("d", "#1240AB", "strand main"), ColorGroup("abcdef", "#1240AB", "strand others"), ColorGroup("ghij", "#0BD500", "coil"), ColorGroup("m", "#FD0006", "helix main"), ColorGroup("klnop", "#FD0006", "helix others") ]) # Load data from an occurence matrix data = weblogo.LogoData.from_counts(PB.NAMES, count) # Create options options = weblogo.LogoOptions(fineprint=False, logo_title=title, color_scheme=colors, stack_width=weblogo.std_sizes["large"], first_residue=residue_min) # Generate weblogo logo = weblogo.LogoFormat(data, options) # Retrieve image format image_format = os.path.splitext(fname)[1][1:] # Retrieve the right function given the image format try: if image_format == 'jpg': image_format = 'jpeg' formatter = weblogo.formatters[image_format] except KeyError: raise ValueError( "Invalid format image '{0}'." " Valid ones are : eps, png, pdf, jpg/jpeg, svg".format( image_format)) # Format the logo image = formatter(data, logo) # Write it with open(fname, "wb") as f: f.write(image)
def main(): args = docopt.docopt(__doc__) root = args['<workspace>'] round = args['<round>'] directory = args['<directory>'] if directory: models, filters = structures.load(directory) resfile = pipeline.load_resfile(directory) resis = sorted(int(i) for i in resfile.designable) print(resis) title = directory sequences = [ ''.join(models['sequence'][i][j - 1] for j in resis) for i in models.index ] else: workspace = pipeline.ValidatedDesigns(root, round) workspace.check_paths() title = workspace.focus_dir designs = [structures.Design(x) for x in workspace.output_subdirs] sequences = [x.resfile_sequence for x in designs] sequences = weblogo.seq.SeqList( [weblogo.seq.Seq(x) for x in sequences], alphabet=weblogo.seq.unambiguous_protein_alphabet, ) logo_data = weblogo.LogoData.from_seqs(sequences) logo_options = weblogo.LogoOptions() logo_options.title = title logo_format = weblogo.LogoFormat(logo_data, logo_options) if args['--output']: preview = False logo_file = open(args['--output'], 'wb') with open(args['--output'][:-len('pdf')] + 'txt', 'w') as f: for line in str(logo_data): f.write(line) else: preview = True logo_file = tempfile.NamedTemporaryFile('wb', prefix='weblogo_', suffix='.pdf') ext = os.path.splitext(logo_file.name)[-1] formatters = { '.pdf': weblogo.pdf_formatter, '.svg': weblogo.svg_formatter, '.eps': weblogo.eps_formatter, '.png': weblogo.png_formatter, '.jpeg': weblogo.jpeg_formatter, '.txt': weblogo.txt_formatter, } if ext not in formatters: scripting.print_error_and_die( "'{0}' is not a supported file format".format(ext)) document = formatters[ext](logo_data, logo_format) logo_file.write(document) logo_file.flush() if preview: pdf = os.environ.get('PDF', 'evince'), logo_file.name subprocess.call(pdf) logo_file.close()