Exemple #1
0
def render_weblogo(seqs, residues_per_line=None):
    fin = io.StringIO()
    for i in range(len(seqs)):
        fin.write(f"> seq_{i}\n")
        fin.write(seqs[i] + "\n")
    fin.seek(0)

    seqs = weblogo.read_seq_data(fin)
    logodata = weblogo.LogoData.from_seqs(seqs)
    logooptions = weblogo.LogoOptions()
    logooptions.unit_name = 'probability'
    logooptions.stacks_per_line = residues_per_line or len(seqs[0])
    logooptions.color_scheme = weblogo.colorscheme.chemistry
    logoformat = weblogo.LogoFormat(logodata, logooptions)
    return Image(weblogo.png_print_formatter(logodata, logoformat))
def get_sequences_logo(sequences=None,
                       probability_matrix=None,
                       prior=None,
                       alphabet=None,
                       formatter='pdf',
                       as_pillow=False,
                       **kwargs):
    """ Create a logo based on a set of sequences.

    Only one of `sequences` and `probability_matrix` should be given.

    By default, this function assumes that `sequences` would be a list of
    protein sequences, while `probability_matrix` would be for DNA motifs. If
    this is not the case, then the appropriate `alphabet` should be given.

    The most likely values for `alphabet` are:
    * weblogo.seq.unambiguous_dna_alphabet
    * weblogo.seq.unambiguous_rna_alphabet
    * weblogo.seq.unambiguous_protein_alphabet

    Parameters
    ----------
    sequences : iterable of strings
        The sequences

    probability_matrix: 2d np.array
        The proability_matrix of a motif namedtuple

    prior : 1d np.array where len(prior) == len(alphabet)
        A prior to adjust the probabilities as each position

    alphabet : corebio.seq.Alphabet, or None
        The alphabet for creating the motif. If None is given, then the basic
        protein alphabet is used.

    formatter : string
        The format of the image. It must be present in the
        weblogolib.formatters dictionary.

    as_pillow : bool
        Whether to return the raw image data (False, default) or as a
        pillow image (True)

    kwargs : key=value pairs
        Other keyswords to control the image. See weblogolib.LogoOptions for
        complete details. A few likely keywords are:
        - unit_name: for example, "bits" (default) or "probability"
        - fineprint: some text to show at the bottom (default: "WebLogo 3.5.0")
        - show_fineprint: whether to show the fineprint at all
        - yaxis_scale: this appears to control the yaxis maximum
        - stack_width: how wide to make each letter
        - stack_aspect_ratio: the ratio of letter width to height

    Returns
    -------
    logo : binary string or pillow Image
        The raw image data. It can be written to a file opened in binary mode
        ("wb") or opened as a pillow image ("Image.open(io.BytesIO(logo))").

        If `as_pillow` is `True`, then the pillow image is already created
        and returned.
    """
    # ensure we have exactly one of `sequences` and `probability_matrix`
    seq_is_none = (sequences is None)
    pm_is_none = (probability_matrix is None)

    if (seq_is_none and pm_is_none):
        msg = ("[get_sequences_logo] exactly one of `sequences` and "
               "`probability_matrix` must be given")
        raise TypeError(msg)

    if alphabet is None:
        if sequences is not None:
            alphabet = weblogo.seq.unambiguous_protein_alphabet
        elif probability_matrix is not None:
            alphabet = weblogo.seq.unambiguous_dna_alphabet

    data = None
    if sequences is not None:

        # create the SeqList

        # it seems like there should be a better way to to this...
        sequences = '\n'.join(sequences)
        f = io.StringIO(sequences)
        sequences = weblogo.seq_io.array_io.read(f)
        sequences.alphabet = alphabet
        data = weblogo.LogoData.from_seqs(sequences, prior)
    else:  # so we have a probability matrix
        data = weblogo.LogoData.from_counts(alphabet, probability_matrix,
                                            prior)

    options = weblogo.LogoOptions(**kwargs)
    logo_format = weblogo.LogoFormat(data, options)
    formatter = weblogo.formatters[formatter]

    logo = formatter(data, logo_format)

    if as_pillow:
        logo = Image.open(io.BytesIO(logo))

    return logo
Exemple #3
0
def main(htdocs_directory=None):
    logooptions = weblogo.LogoOptions()

    # A list of form fields.
    # The default for checkbox values must be False (irrespective of
    # the default in logooptions) since a checked checkbox returns 'true'
    # but an unchecked checkbox returns nothing.
    controls = [
        Field('sequences', ''),
        Field('sequences_url', ''),
        Field(
            'format',
            'png',
            weblogo.formatters.get,
            options=[
                'png_print', 'png', 'jpeg', 'eps', 'pdf', 'svg', 'logodata'
            ],
            # TODO: Should copy list from __init__.formatters
            errmsg="Unknown format option."),
        Field('stacks_per_line',
              logooptions.stacks_per_line,
              int,
              errmsg='Invalid number of stacks per line.'),
        Field('stack_width',
              'medium',
              weblogo.std_sizes.get,
              options=['small', 'medium', 'large'],
              errmsg='Invalid logo size.'),
        Field('alphabet',
              'alphabet_auto',
              alphabets.get,
              options=[
                  'alphabet_auto', 'alphabet_protein', 'alphabet_dna',
                  'alphabet_rna'
              ],
              errmsg="Unknown sequence type."),
        Field('unit_name',
              'bits',
              options=[
                  'probability', 'bits', 'nats', 'kT', 'kJ/mol', 'kcal/mol'
              ]),
        Field('first_index', 1, int_or_none),
        Field('logo_start', '', int_or_none),
        Field('logo_end', '', int_or_none),
        Field('composition',
              'comp_auto',
              composition.get,
              options=[
                  'comp_none', 'comp_auto', 'comp_equiprobable', 'comp_CG',
                  'comp_Celegans', 'comp_Dmelanogaster', 'comp_Ecoli',
                  'comp_Hsapiens', 'comp_Mmusculus', 'comp_Scerevisiae'
              ],
              errmsg="Illegal sequence composition."),
        Field('percentCG', '', float_or_none, errmsg="Invalid CG percentage."),
        Field('show_errorbars', False, truth),
        Field('logo_title', logooptions.logo_title),
        Field('logo_label', logooptions.logo_label),
        Field('show_xaxis', False, truth),
        Field('xaxis_label', logooptions.xaxis_label),
        Field('show_yaxis', False, truth),
        Field('yaxis_label', logooptions.yaxis_label, string_or_none),
        Field('yaxis_scale',
              logooptions.yaxis_scale,
              float_or_none,
              errmsg="The yaxis scale must be a positive number."),
        Field('yaxis_tic_interval', logooptions.yaxis_tic_interval,
              float_or_none),
        Field('show_ends', False, truth),
        Field('show_fineprint', False, truth),
        Field('color_scheme',
              'color_auto',
              color_schemes.get,
              options=color_schemes.keys(),
              errmsg='Unknown color scheme'),
        Field('color0', ''),
        Field('symbols0', ''),
        Field('desc0', ''),
        Field('color1', ''),
        Field('symbols1', ''),
        Field('desc1', ''),
        Field('color2', ''),
        Field('symbols2', ''),
        Field('desc2', ''),
        Field('color3', ''),
        Field('symbols3', ''),
        Field('desc3', ''),
        Field('color4', ''),
        Field('symbols4', ''),
        Field('desc4', ''),
        Field('ignore_lower_case', False, truth),
        Field('scale_width', False, truth),
    ]

    form = {}
    for c in controls:
        form[c.name] = c

    form_values = cgilib.FieldStorage()

    # Send default form?
    if len(form_values) == 0 or "cmd_reset" in form_values:
        # Load default truth values now.
        form['show_errorbars'].value = logooptions.show_errorbars
        form['show_xaxis'].value = logooptions.show_xaxis
        form['show_yaxis'].value = logooptions.show_yaxis
        form['show_ends'].value = logooptions.show_ends
        form['show_fineprint'].value = logooptions.show_fineprint
        form['scale_width'].value = logooptions.scale_width

        send_form(controls, htdocs_directory=htdocs_directory)
        return

    # Get form content
    for c in controls:
        c.value = form_values.getfirst(c.name, c.default)

    options_from_form = [
        'format', 'stacks_per_line', 'stack_width', 'alphabet', 'unit_name',
        'first_index', 'logo_start', 'logo_end', 'composition',
        'show_errorbars', 'logo_title', 'logo_label', 'show_xaxis',
        'xaxis_label', 'show_yaxis', 'yaxis_label', 'yaxis_scale',
        'yaxis_tic_interval', 'show_ends', 'show_fineprint', 'scale_width'
    ]

    errors = []
    for optname in options_from_form:
        try:
            value = form[optname].get_value()
            if value is not None:
                setattr(logooptions, optname, value)
        except ValueError as err:
            errors.append(err.args)

    # Construct custom color scheme
    custom = ColorScheme()
    for i in range(0, 5):
        color = form["color%d" % i].get_value()
        symbols = form["symbols%d" % i].get_value()
        desc = form["desc%d" % i].get_value()

        if color:
            try:
                custom.rules.append(SymbolColor(symbols, color, desc))
            except ValueError:
                errors.append(('color%d' % i, "Invalid color: %s" % color))

    if form["color_scheme"].value == 'color_custom':
        logooptions.color_scheme = custom
    else:
        try:
            logooptions.color_scheme = form["color_scheme"].get_value()
        except ValueError as err:
            errors.append(err.args)

    # FIXME: Ugly fix: Must check that sequence_file key exists
    # FIXME: Sending malformed or missing form keys should not cause a crash
    # sequences_file = form["sequences_file"]
    sequences_from_file = None
    if "sequences_file" in form_values:
        sequences_from_file = form_values.getvalue("sequences_file")

    sequences_from_textfield = form["sequences"].get_value()
    sequences_url = form["sequences_url"].get_value()

    sequences = None
    seq_file = None

    if sequences_from_file:
        if sequences_from_textfield or sequences_url:
            errors.append(
                ("sequences_file", "Cannot upload, sequence source conflict"))
        else:
            sequences = sequences_from_file
            seq_file = TextIOWrapper(BytesIO(sequences), encoding='utf-8')
    elif sequences_from_textfield:
        if sequences_url:
            errors.append(
                ("sequences", "Cannot upload, sequence source conflict"))
        else:
            # check SEQUENCES_MAXLENGT
            # If a user tries to paste a very large file into sequence textarea,
            # then WebLogo runs very slow for no apparently good reason. (Might be client side bug?)
            # So we limit the maximum sequence size.
            # Form field also limits size, but not necessarly respected. Also can truncate data
            # without warning, so we'll set textarea maximum to be larger than MAX_SEQUENCE_SIZE
            SEQUENCES_MAXLENGTH = 100000
            if len(sequences_from_textfield) > SEQUENCES_MAXLENGTH:
                errors.append((
                    "sequences",
                    "Sequence data too large for text input. Use file upload instead."
                ))
                controls[0] = Field('sequences', '')
            else:
                sequences = sequences_from_textfield
                seq_file = StringIO(sequences)

    elif sequences_url:
        from . import _from_URL_fileopen
        try:
            seq_file = _from_URL_fileopen(sequences_url)
        except ValueError:
            errors.append(("sequences_url", "Cannot parse URL"))
        except IOError:
            errors.append(("sequences_url", "Cannot load sequences from URL"))

    else:
        errors.append((
            "sequences",
            "Please enter a multiple-sequence alignment in the box above, or select a "
            "file to upload."))

    # If we have uncovered errors or we want the chance to edit the logo
    # ("cmd_edit" command from examples page) then we return the form now.
    # We do not proceed to the time consuming logo creation step unless
    # required by a 'create' or 'validate' command, and no errors have been
    # found yet.
    if errors or "cmd_edit" in form_values:
        send_form(controls, errors, htdocs_directory)
        return

    try:
        comp = form["composition"].get_value()
        percentCG = form["percentCG"].get_value()
        ignore_lower_case = ("ignore_lower_case" in form_values)
        if comp == 'percentCG':
            comp = str(percentCG / 100)

        from .matrix import Motif

        try:
            # Try reading data in transfac format first.
            # TODO Refactor this code
            motif = Motif.read_transfac(seq_file,
                                        alphabet=logooptions.alphabet)
            prior = weblogo.parse_prior(comp, motif.alphabet)
            data = weblogo.LogoData.from_counts(motif.alphabet, motif, prior)
        except ValueError:
            seqs = weblogo.read_seq_data(seq_file,
                                         alphabet=logooptions.alphabet,
                                         ignore_lower_case=ignore_lower_case)
            prior = weblogo.parse_prior(comp, seqs.alphabet)
            data = weblogo.LogoData.from_seqs(seqs, prior)

        logoformat = weblogo.LogoFormat(data, logooptions)
        format = form["format"].value
        logo = weblogo.formatters[format](data, logoformat)
    except ValueError as err:
        errors.append(err.args)
    except IOError as err:
        errors.append(err.args)
    except RuntimeError as err:
        errors.append(err.args)

    if errors or "cmd_validate" in form_values:
        send_form(controls, errors, htdocs_directory)
        return

    #
    #  RETURN LOGO OVER HTTP
    #

    print("Content-Type:", mime_type[format])
    # Content-Disposition: inline       Open logo in browser window
    # Content-Disposition: attachment   Download logo
    if "download" in form_values:
        print('Content-Disposition: attachment; '
              'filename="logo.%s"' % extension[format])
    else:
        print('Content-Disposition: inline; '
              'filename="logo.%s"' % extension[format])
    # Separate header from data
    print()
    sys.stdout.flush()

    # Finally, and at last, send the logo.
    sys.stdout.buffer.write(logo)
def generate_weblogo(fname,
                     count_mat,
                     idx_first_residue=1,
                     residue_min=1,
                     residue_max=None,
                     title=""):
    """
    Generates logo representation of PBs frequency along protein sequence through
    the weblogo library.

    The weblogo reference:
    G. E. Crooks, G. Hon, J.-M. Chandonia, and S. E. Brenner.
    'WebLogo: A Sequence Logo Generator.'
    Genome Research 14:1188–90 (2004)
    doi:10.1101/gr.849004.
    http://weblogo.threeplusone.com/

    Parameters
    ----------
    fname : str
        The path to the file to write in
    count_mat : numpy array
        an occurence matrix returned by `count_matrix`.
    idx_first_residue: int
        the index of the first residue in the matrix
    residue_min: int
        the lower bound of residue frame
    residue_max: int
        the upper bound of residue frame
    title: str
        the title of the weblogo. Default is empty.
    """

    # Slice the matrix
    count = utils._slice_matrix(count_mat, idx_first_residue, residue_min,
                                residue_max)

    # Create a custom color scheme for PB
    colors = weblogo.ColorScheme([
        ColorGroup("d", "#1240AB", "strand main"),
        ColorGroup("abcdef", "#1240AB", "strand others"),
        ColorGroup("ghij", "#0BD500", "coil"),
        ColorGroup("m", "#FD0006", "helix main"),
        ColorGroup("klnop", "#FD0006", "helix others")
    ])

    # Load data from an occurence matrix
    data = weblogo.LogoData.from_counts(PB.NAMES, count)

    # Create options
    options = weblogo.LogoOptions(fineprint=False,
                                  logo_title=title,
                                  color_scheme=colors,
                                  stack_width=weblogo.std_sizes["large"],
                                  first_residue=residue_min)

    # Generate weblogo
    logo = weblogo.LogoFormat(data, options)

    # Retrieve image format
    image_format = os.path.splitext(fname)[1][1:]

    # Retrieve the right function given the image format
    try:
        if image_format == 'jpg':
            image_format = 'jpeg'
        formatter = weblogo.formatters[image_format]
    except KeyError:
        raise ValueError(
            "Invalid format image '{0}'."
            " Valid ones are : eps, png, pdf, jpg/jpeg, svg".format(
                image_format))
    # Format the logo
    image = formatter(data, logo)

    # Write it
    with open(fname, "wb") as f:
        f.write(image)
Exemple #5
0
def main():
    args = docopt.docopt(__doc__)
    root = args['<workspace>']
    round = args['<round>']
    directory = args['<directory>']

    if directory:
        models, filters = structures.load(directory)
        resfile = pipeline.load_resfile(directory)
        resis = sorted(int(i) for i in resfile.designable)
        print(resis)
        title = directory
        sequences = [
            ''.join(models['sequence'][i][j - 1] for j in resis)
            for i in models.index
        ]

    else:
        workspace = pipeline.ValidatedDesigns(root, round)
        workspace.check_paths()
        title = workspace.focus_dir
        designs = [structures.Design(x) for x in workspace.output_subdirs]
        sequences = [x.resfile_sequence for x in designs]

    sequences = weblogo.seq.SeqList(
        [weblogo.seq.Seq(x) for x in sequences],
        alphabet=weblogo.seq.unambiguous_protein_alphabet,
    )

    logo_data = weblogo.LogoData.from_seqs(sequences)
    logo_options = weblogo.LogoOptions()
    logo_options.title = title
    logo_format = weblogo.LogoFormat(logo_data, logo_options)

    if args['--output']:
        preview = False
        logo_file = open(args['--output'], 'wb')
        with open(args['--output'][:-len('pdf')] + 'txt', 'w') as f:
            for line in str(logo_data):
                f.write(line)
    else:
        preview = True
        logo_file = tempfile.NamedTemporaryFile('wb',
                                                prefix='weblogo_',
                                                suffix='.pdf')

    ext = os.path.splitext(logo_file.name)[-1]
    formatters = {
        '.pdf': weblogo.pdf_formatter,
        '.svg': weblogo.svg_formatter,
        '.eps': weblogo.eps_formatter,
        '.png': weblogo.png_formatter,
        '.jpeg': weblogo.jpeg_formatter,
        '.txt': weblogo.txt_formatter,
    }
    if ext not in formatters:
        scripting.print_error_and_die(
            "'{0}' is not a supported file format".format(ext))

    document = formatters[ext](logo_data, logo_format)
    logo_file.write(document)
    logo_file.flush()

    if preview:
        pdf = os.environ.get('PDF', 'evince'), logo_file.name
        subprocess.call(pdf)

    logo_file.close()