Beispiel #1
0
 def test_plot_titles( self ):
     fig  = plt.figure(figsize=(10, 10))
     grid = (1, 1)
     ax00 = plt.subplot2grid(grid, (0, 0), fig=fig)
     ax00.plot([1, 2, 3], [1, 2, 3])
     ru.add_right_title(ax00, 'right title text', rotation=-90)
     ru.add_top_title(ax00, 'top title text')
     ru.add_left_title(ax00, 'left title text', rotation=90)
     return fig
Beispiel #2
0
 def test_cd_read_chirascan( self ):
     df = ri.read_CD(os.path.join(self.dirpath, 'chirascan_cd.csv'),  model='chirascan')
     fig  = plt.figure(figsize=(15, 15))
     grid = (3, 2)
     for i, sample in enumerate(sorted(df.keys())):
         ax = plt.subplot2grid(grid, (int(i / 2), i % 2), fig=fig)
         rp.plot_CD(df[sample], ax, sample=5)
         ru.add_top_title(ax, sample)
     plt.tight_layout()
     return fig
def main(options):
    infile = options.ifile if options.ifile is not None else options.ifiles
    df = parse_rosetta_file(infile, multi=options.ifiles is not None)

    # Plot
    if options.fsize[0] is None:
        fig = plt.figure()
    else:
        fig = plt.figure(figsize=[float(x) for x in options.fsize])
    ax = plt.subplot2grid((1, 1), (0, 0), fig=fig)

    sns.regplot(x=options.x,
                y=options.y,
                data=df,
                fit_reg=False,
                ax=ax,
                color=sns.color_palette()[int(options.color)])
    if options.ylim[0] is not None:
        ax.set_ylim(bottom=float(options.ylim[0]), top=float(options.ylim[1]))
    if options.xlim[0] is not None:
        ax.set_xlim(left=float(options.xlim[0]), right=float(options.xlim[1]))
    if options.ylab is not None:
        ax.set_ylabel(options.ylab)
    if options.xlab is not None:
        ax.set_xlabel(options.xlab)

    add_top_title(ax, options.title)

    plt.tight_layout()

    # Write to file
    if options.ofile is not None:
        plt.savefig(options.ofile)

    # Show on screen
    if not options.silent:
        plt.show()
    return fig
def plot_ramachandran_single(df, seqID, ax, rama_type='GENERAL', **kwargs):
    """Plot only one of the 4 ramachandran plots in RAMPAGE format.

    Parameters for :func:`~matplotlib.pyplot.scatter` can be provided with prefix ``scatter_``.

    Parameters for :func:`~matplotlib.pyplot.plot` can be provided with prefix ``line_``.

    :param df: |df_param|, where ONE column cotains the phi and a second column
        the psi angles.
    :type df: :class:`~pandas.Series`
    :param str seqID: |seqID_param|
    :param ax: |axis_param|.
    :type ax: :class:`~matplotlib.axes.Axes`
    :param str rama_type: Type of plot and residue types to plot. Options are:
        [``GLY``, ``PRO``, ``PRE-PRO``, ``GENERAL``].

    :raises:
        :ValueError: If the input is not a :class:`~pandas.Series` with the
            ``phi_seqID`` and ``psi_seqID`` columns.
        :ValueError: If rama_type is not between the available options.

    .. seealso::
        :func:`.plot_ramachandran`
        :func:`.get_sequence_and_structure`
        :func:`.get_dihedrals`
        :func:`.get_phi`
        :func:`.get_psi`
    """
    if rama_type.upper() not in ['GLY', 'PRO', 'PRE-PRO', 'GENERAL']:
        raise ValueError('Unknown rama type {}'.format(rama_type))
    # Data type management.
    if not isinstance(df, (pd.Series, DesignSeries)):
        raise ValueError("Input data must be in a Series or DesignSeries")
    if not isinstance(df, DesignSeries):
        df = DesignSeries(df)
    if not isinstance(df.get_phi(seqID), np.ndarray):
        raise ValueError(
            "Ramachandran plot function can only be applied on one decoy at once."
        )
    if not isinstance(df.get_psi(seqID), np.ndarray):
        raise ValueError(
            "Ramachandran plot function can only be applied on one decoy at once."
        )

    rama_preferences = make_rama_preferences()[rama_type.upper()]
    rama_pref_values = np.full((360, 360), 0, dtype=np.float64)
    with open(rama_preferences["file"]) as fn:
        for line in fn:
            if not line.startswith("#"):
                # Preference file has values for every second position only
                lp = [float(_) for _ in line.split()]
                rama_pref_values[int(lp[1]) + 180][int(lp[0]) + 180] = float(
                    line.split()[2])
                rama_pref_values[int(lp[1]) + 179][int(lp[0]) + 179] = float(
                    line.split()[2])
                rama_pref_values[int(lp[1]) + 179][int(lp[0]) + 180] = float(
                    line.split()[2])
                rama_pref_values[int(lp[1]) + 180][int(lp[0]) + 179] = float(
                    line.split()[2])
    # Ramachandran residue classification.
    seq = df.get_sequence(seqID)
    rama_types = []
    for i, aa in enumerate(seq):
        if aa == "G":
            if rama_type.upper() == 'GLY':
                rama_types.append(i)
        elif aa == "P":
            if rama_type.upper() == 'PRO':
                rama_types.append(i)
        elif i + 1 < len(seq) and seq[i + 1] == "P":
            if rama_type.upper() == 'PRE-PRO':
                rama_types.append(i)
        else:
            if rama_type.upper() == 'GENERAL':
                rama_types.append(i)

    # Generate the plots
    all_phi = df.get_phi(seqID)
    all_psi = df.get_psi(seqID)

    phi = all_phi[rama_types]
    psi = all_psi[rama_types]

    ax.imshow(rama_pref_values,
              cmap=rama_preferences["cmap"],
              norm=colors.BoundaryNorm(rama_preferences["bounds"],
                                       rama_preferences["cmap"].N),
              extent=(-180, 180, 180, -180))

    scatter_kwargs = {}
    for k in kwargs:
        if k.startswith('scatter_'):
            scatter_kwargs.setdefault(k.replace('scatter_', ''), kwargs[k])
    scatter_kwargs.setdefault('color', 'black')
    ax.scatter(phi, psi, **scatter_kwargs)
    add_top_title(ax, rama_type.upper())
    ax.set_xlim([-180, 180])
    ax.set_ylim([-180, 180])
    line_kwargs = {}
    for k in kwargs:
        if k.startswith('line_'):
            line_kwargs.setdefault(k.replace('line_', ''), kwargs[k])
    line_kwargs.setdefault('color', 'black')
    ax.plot([-180, 180], [0, 0], **line_kwargs)
    ax.plot([0, 0], [-180, 180], **line_kwargs)
    ax.locator_params(axis='x', nbins=7)
    ax.set_xlabel(r'$\phi$ [degrees]')
    ax.set_ylabel(r'$\psi$ [degrees]')
    ax.grid()
def plot_fragment_profiles( fig, small_frags, large_frags, ref_seq, ref_sse, matrix="BLOSUM62" ):
    """Plots a full summary of the a :class:`.FragmentFrame` quality with sequence and expected
    secondary structure match.

    :param fig: Figure into which the data is going to be plotted.
    :type fig: :class:`~matplotlib.figure.Figure`
    :param small_frags: Data for the small fragments.
    :type small_frags: :class:`.FragmentFrame`
    :param large_frags: Data for the large fragments.
    :type large_frags: :class:`.FragmentFrame`
    :param str ref_seq: Reference sequence over which to compare.
    :param str ref_sse: Reference secondary structure over which to compare.
    :param str matrix: Sequence similarity matrix to use for calculations.
        Defualt is ``BLOSUM62``.

    :return: :func:`list` of :class:`~matplotlib.axes.Axes`

    .. seealso::
        :func:`.plot_fragments`

    .. rubric:: Example

    .. ipython::
        :okwarning:

        In [1]: from rstoolbox.io import parse_rosetta_fragments
           ...: from rstoolbox.plot import plot_fragment_profiles
           ...: import matplotlib.pyplot as plt
           ...: df3 = parse_rosetta_fragments("../rstoolbox/tests/data/wauto.200.3mers.gz")
           ...: df9 = parse_rosetta_fragments("../rstoolbox/tests/data/wauto.200.9mers.gz")
           ...: df3 = df3.add_quality_measure(None)
           ...: df9 = df9.add_quality_measure(None)
           ...: fig = plt.figure(figsize=(25, 10))
           ...: seq = "ETPYAIALNDRVIGSSMVLPVDLEEFGAGFLFGQGYIKKAEEIREILVCPQGRISVYA"
           ...: sse = "LEEEEEEELLEEEEEEEELLLLHHHHHHHHHHHHLLLLLLLLLLLEEEELLLEEEELL"
           ...: axs = plot_fragment_profiles(fig, df3, df9, seq, sse)
           ...: plt.tight_layout()

        @savefig plot_fragment_profiles_docs.png width=5in
        In [2]: plt.show()

        In [3]: plt.close()
    """

    # make subplots
    grid = (4, 2)
    ax00 = plt.subplot2grid(grid, (0, 0), fig=fig)
    ax10 = plt.subplot2grid(grid, (1, 0), sharex=ax00, fig=fig)
    ax20 = plt.subplot2grid(grid, (2, 0), rowspan=2, sharex=ax00, fig=fig)

    ax01 = plt.subplot2grid(grid, (0, 1), sharey=ax00, fig=fig)
    ax11 = plt.subplot2grid(grid, (1, 1), sharex=ax01, fig=fig)
    ax21 = plt.subplot2grid(grid, (2, 1), rowspan=2, sharex=ax01, fig=fig)

    # fill subplots
    plot_fragments( small_frags, large_frags, ax20, ax21, titles=None )
    ref_sse.replace("C", "L")
    positional_structural_similarity_plot(
        pd.concat([ra.positional_structural_count(small_frags),
                   ra.positional_structural_identity(small_frags, ref_sse=ref_sse)], axis=1),
        ax10)
    positional_structural_similarity_plot(
        pd.concat([ra.positional_structural_count(large_frags),
                   ra.positional_structural_identity(large_frags, ref_sse=ref_sse)], axis=1),
        ax11)
    positional_sequence_similarity_plot(ra.positional_sequence_similarity(small_frags, "A",
                                                                          ref_seq, matrix=matrix ),
                                        ax00 )
    positional_sequence_similarity_plot(ra.positional_sequence_similarity(large_frags, "A",
                                                                          ref_seq, matrix=matrix ),
                                        ax01 )

    # fix axis
    plt.setp(ax00.get_xticklabels(), visible=False)
    ax00.set_ylabel("aa freq")
    ax00.set_xlim(-0.5, max(small_frags["position"]))
    ax00.set_ylim(0, 1.01)
    plt.setp(ax01.get_xticklabels(), visible=False)
    ax01.set_ylabel("aa freq")
    ax01.set_xlim(-0.5, max(large_frags["position"]))
    ax01.set_ylim(0, 1.01)
    plt.setp(ax10.get_xticklabels(), visible=False)
    ax10.set_ylabel("sse freq")
    ax10.set_ylim(0, 1.01)
    plt.setp(ax11.get_xticklabels(), visible=False)
    ax11.set_ylabel("sse freq")
    ax11.set_ylim(0, 1.01)

    # fix display
    ru.add_top_title(ax00, "{}mers".format(small_frags["size"].values[0]))
    ru.add_top_title(ax01, "{}mers".format(large_frags["size"].values[0]))
    fig.subplots_adjust(wspace=0.1, hspace=0.1)

    fig.legend(handles=[
        mpatches.Patch(color="green",     label="aa identity"),
        mpatches.Patch(color="orange",    label="aa similarity"),
        mpatches.Patch(color="royalblue", label="sse helix content"),
        mpatches.Patch(color="tomato",    label="sse beta content"),
        mpatches.Patch(color="black",     label="sse match to expected")
    ], ncol=5, loc='lower center', borderaxespad=0.)

    return [ax00, ax10, ax20, ax01, ax11, ax21]
def plot_fragments( small_frags, large_frags, small_ax, large_ax, small_color=0, large_color=0,
                    small_max=None, large_max=None, titles="top", **kwargs ):
    """
    Plot RMSD quality of a pair of :class:`.FragmentFrame` in two provided axis.
    Thought to more easily print both small and large fragments together.

    On plotting, fragment RMSD values are assigned to the first position of the fragments.
    This means that the plots will have a length of::

        :math:`len(sequence) - len(fragment set)`

    :param small_frags: Data for the small fragments.
    :type small_frags: :class:`.FragmentFrame`
    :param large_frags: Data for the large fragments.
    :type large_frags: :class:`.FragmentFrame`
    :param small_ax: Axis where to print the small fragments.
    :type small_ax: :class:`~matplotlib.axes.Axes`
    :param large_ax: Axis where to print the large fragments.
    :type large_ax: :class:`~matplotlib.axes.Axes`
    :param small_color: Color to use on the small fragments. If string,
        that is the assumed color. If integer, it will provide that position for the
        currently active color palette in seaborn.
    :type small_color: Union[:class:`str`, :class:`int`]
    :param large_color: Color to use on the large fragments. If string,
        that is the assumed color. If integer, it will provide that position for the
        currently active color palette in seaborn.
    :type large_color: Union[:class:`str`, :class:`int`]
    :param float small_max: Max value for the y (RMSD) axis of the small fragments. If
        not provided, the system picks it according to the given data.
    :param float large_max: Max value for the y (RMSD) axis of the large fragments. If
        not provided, the system picks it according to the given data.
    :param str titles: Title placement. Options are "top" or "right". Other options
        will result in no titles added to the plot.

    .. seealso::
        :func:`.plot_fragment_profiles`

    .. rubric:: Example

    .. ipython::
        :okwarning:

        In [1]: from rstoolbox.io import parse_rosetta_fragments
           ...: from rstoolbox.plot import plot_fragments
           ...: import matplotlib.pyplot as plt
           ...: df3 = parse_rosetta_fragments("../rstoolbox/tests/data/wauto.200.3mers.gz")
           ...: df9 = parse_rosetta_fragments("../rstoolbox/tests/data/wauto.200.9mers.gz")
           ...: df3 = df3.add_quality_measure(None)
           ...: df9 = df9.add_quality_measure(None)
           ...: fig = plt.figure(figsize=(35, 10))
           ...: ax00 = plt.subplot2grid((1, 2), (0, 0))
           ...: ax01 = plt.subplot2grid((1, 2), (0, 1))
           ...: plot_fragments(df3, df9, ax00, ax01)
           ...: plt.tight_layout()

        @savefig plot_fragments_docs.png width=5in
        In [2]: plt.show()

        In [3]: plt.close()
    """

    # Color management
    if isinstance(small_color, int):
        small_color = sns.color_palette()[small_color]
    if isinstance(large_color, int):
        large_color = sns.color_palette()[large_color]

    # Data compactness
    small_frags_ = small_frags[small_frags["position"] == small_frags["frame"]]
    large_frags_ = large_frags[large_frags["position"] == large_frags["frame"]]

    sns.boxplot(x="frame", y="rmsd", data=small_frags_, ax=small_ax, color=small_color, **kwargs)
    sns.boxplot(x="frame", y="rmsd", data=large_frags_, ax=large_ax, color=large_color, **kwargs)

    # Basic formating
    small_ax.set_xticks(range(0, len(small_frags["frame"].unique()), 5))
    small_ax.set_xticklabels(range(min(small_frags["frame"]), max(small_frags["frame"]) + 1, 5))
    small_ax.set_xlabel("sequence")
    small_ax.set_ylabel("RMSD")
    if small_max is not None:
        small_ax.set_ylim(0, small_max)
    else:
        small_ax.set_ylim(0)
    small_ax.yaxis.grid(False)
    small_ax.xaxis.grid(True)
    small_ax.set_axisbelow(True)

    large_ax.set_xticks(range(0, len(large_frags["frame"].unique()), 5))
    large_ax.set_xticklabels(range(min(small_frags["frame"]), max(large_frags["frame"]) + 1, 5))
    large_ax.set_xlabel("sequence")
    large_ax.set_ylabel("RMSD")
    if large_max is not None:
        large_ax.set_ylim(0, large_max)
    else:
        large_ax.set_ylim(0)
    large_ax.yaxis.grid(False)
    large_ax.xaxis.grid(True)
    large_ax.set_axisbelow(True)

    # Titles
    if titles is not None:
        if titles.lower() == "top":
            ru.add_top_title(small_ax, "{}mers".format(small_frags["size"].values[0]))
            ru.add_top_title(large_ax, "{}mers".format(large_frags["size"].values[0]))
        elif titles.lower() == "right":
            ru.add_left_title(small_ax, "{}mers".format(small_frags["size"].values[0]))
            ru.add_left_title(large_ax, "{}mers".format(large_frags["size"].values[0]))
        else:
            pass
def multiple_distributions( df, fig, grid, igrid=None, values="*", titles=None, labels=None,
                            refdata=None, ref_equivalences=None, violins=True, legends=False, **kwargs ):
    """Automatically plot boxplot distributions for multiple score types of the
    decoy population.

    A part from the fixed options, the function accepst any option of
    :func:`~seaborn.boxplot`, except for ``y``, ``data`` and ``ax``, which
    are used internally by this function.

    :param df: Data container.
    :type df: :class:`~pandas.DataFrame`
    :param fig: Figure into which the data is going to be plotted.
    :type fig: :class:`~matplotlib.figure.Figure`
    :param grid: Shape of the grid to plot the values in the figure (rows x columns).
    :type grid: :class:`tuple` with two :class:`int`
    :param igrid: Initial position of the grid. Defaults to (0, 0)
    :type igrid: :class:`tuple` with two :class:`int`
    :param values: Contents from the data container that are expected to be plotted.
    :type values: :func:`list` of :class:`str`
    :param titles: Titles to assign to the value of each plot (if provided).
    :type titles: :func:`list` of :class:`str`
    :param labels: Y labels to assign to the value of each plot. By default this will be
        the name of the value.
    :type labels: :func:`list` of :class:`str`
    :param refdata: Data content to use as reference.
    :type refdata: :class:`~pandas.DataFrame`
    :param dict ref_equivalences: When names between the query data and the provided data are the
        same, they will be directly assigned. Here a dictionary ``db_name``:``query_name`` can be
        provided otherwise.
    :param bool violins: When :data:`True`, plot refdata comparisson with violins, otherwise do it
        with kdplots.
    :param bool legends: When :data:`True`, show the legends of each axis.

    :return: :func:`list` of :class:`~matplotlib.axes.Axes`

    :raises:
        :ValueError: If columns are requested that do not exist in the :class:`~pandas.DataFrame`.
        :ValueError: If the given grid does not have enought positions for all the requested values.
        :ValueError: If the number of values and titles do not match.
        :ValueError: If the number of values and labels do not match.
        :ValueError: If ``refdata`` is not :class:`~pandas.DataFrame`.

    .. rubric:: Example 1: Raw design population data.

    .. ipython::
        :okwarning:

        In [1]: from rstoolbox.io import parse_rosetta_file
           ...: from rstoolbox.plot import multiple_distributions
           ...: import matplotlib.pyplot as plt
           ...: df = parse_rosetta_file("../rstoolbox/tests/data/input_2seq.minisilent.gz")
           ...: values = ["score", "hbond_sr_bb", "B_ni_rmsd", "hbond_bb_sc",
           ...:           "cav_vol", "design_score", "packstat", "rmsd_drift"]
           ...: fig = plt.figure(figsize=(25, 10))
           ...: axs = multiple_distributions(df, fig, (2, 4), values=values)
           ...: plt.tight_layout()

        @savefig multiple_distributions_docs.png width=5in
        In [2]: plt.show()

        In [3]: plt.close()

    .. rubric:: Example 2: Design population data vs. DB reference.

    .. ipython::
        :okwarning:

        In [1]: from rstoolbox.io import parse_rosetta_file
           ...: from rstoolbox.plot import multiple_distributions
           ...: from rstoolbox.utils import load_refdata
           ...: import matplotlib.pyplot as plt
           ...: df = parse_rosetta_file("../rstoolbox/tests/data/input_2seq.minisilent.gz",
           ...:                         {'sequence': 'A'})
           ...: slength = len(df.iloc[0]['sequence_A'])
           ...: refdf = load_refdata('scop2')
           ...: refdf = refdf[(refdf['length'] >= slength - 5) &
           ...:               (refdf['length'] <= slength + 5)]
           ...: values = ["score", "hbond_sr_bb", "B_ni_rmsd", "hbond_bb_sc",
           ...:           "cav_vol", "design_score", "packstat", "rmsd_drift"]
           ...: fig = plt.figure(figsize=(25, 10))
           ...: axs = multiple_distributions(df, fig, (2, 4), values=values, refdata=refdf)
           ...: plt.tight_layout()

        @savefig multiple_distributions_docs2.png width=5in
        In [2]: plt.show()

        In [3]: plt.close()
    """
    if igrid is None:
        igrid = (0, 0)
    if values == "*":
        values = df.select_dtypes(include=[np.number]).columns.tolist()
    if len(set(values).difference(set(list(df.columns)))) > 0:
        raise ValueError("Some of the requested values do not exist "
                         "in the data container.")
    if (grid[0] * grid[1]) - (igrid[0] * igrid[1]) < len(values):
        raise ValueError("The grid does not provide enought positions for all"
                         " requested values.")
    if titles is not None and len(titles) != len(values):
        raise ValueError("Number of expected plots and titles do not match.")
    if labels is not None and len(labels) != len(values):
        raise ValueError("Number of expected labels and titles do not match.")

    if refdata is not None:
        if not isinstance(refdata, pd.DataFrame):
            raise ValueError('Unknown reference data format.')
        if ref_equivalences is not None:
            refdata = refdata.rename(columns=ref_equivalences)
        refvalues = refdata.select_dtypes(include=[np.number]).columns.tolist()
    else:
        refvalues = []

    kwargs.pop("y", None)
    kwargs.pop("data", None)
    kwargs.pop("axis", None)

    axis_args = {'rowspan': kwargs.pop('rowspan', 1), 'colspan': kwargs.pop('colspan', 1)}

    axis = []
    for _, pgrid in enumerate(itertools.product(*[range(grid[0]), range(grid[1])])):
        if _ >= len(values):
            break
        pgrid = list(pgrid)
        pgrid[0] += igrid[0]
        pgrid[1] += igrid[1]
        ax = plt.subplot2grid(grid, pgrid, fig=fig, rowspan=axis_args['rowspan'])
        if values[_] not in refvalues:
            sns.boxplot(y=values[_], data=df, ax=ax, **kwargs)
        else:
            s1 = add_column(pd.DataFrame(df[values[_]]), 'target', 'query')
            s1 = add_column(s1, 'violinx', 1)
            s2 = add_column(pd.DataFrame(refdata[values[_]]), 'target', 'reference')
            s2 = add_column(s2, 'violinx', 1)
            qd = pd.concat([s1, s2])
            if violins:
                sns.violinplot(x='violinx', y=values[_], hue='target', data=qd, ax=ax,
                               hue_order=["query", "reference"], split=True)
                if not legends:
                    ax.get_legend().remove()
                ax.set_xlabel('')
                ax.set_xticklabels('')
            else:
                sns.kdeplot(s1[values[_]], ax=ax, shade=True)
                sns.kdeplot(s2[values[_]], ax=ax, shade=True)
                if not legends:
                    ax.get_legend().remove()
                ax.set_xlabel(values[_])
        if titles is not None:
            add_top_title(ax, titles[_])
        if labels is not None:
            ax.set_ylabel(labels[_])
        axis.append(ax)

    return axis
Beispiel #8
0
def multiple_distributions( df, fig, grid, values="*", titles=None, labels=None, **kwargs ):
    """Automatically plot boxplot distributions for multiple score types of the
    decoy population.

    A part from the fixed options, the function accepst any option of
    :func:`~seaborn.boxplot`, except for ``y``, ``data`` and ``ax``, which
    are used internally by this function.

    :param df: Data container
    :type df: :class:`~pandas.DataFrame`
    :param fig: Figure into which the data is going to be plotted.
    :type fig: :class:`~matplotlib.figure.Figure`
    :param grid: Shape of the grid to plot the values in the figure (rows x columns).
    :type grid: :class:`tuple` with two :class:`int`
    :param values: Contents from the data container that are expected to be plotted.
    :type values: :func:`list` of :class:`str`
    :param titles: Titles to assign to the value of each plot (if provided).
    :type titles: :func:`list` of :class:`str`
    :param labels: Y labels to assign to the value of each plot. By default this will be
        the name of the value.
    :type labels: :func:`list` of :class:`str`

    :return: :func:`list` of :class:`~matplotlib.axes.Axes`

    :raises:
        :ValueError: If columns are requested that do not exist in the :class:`~pandas.DataFrame`.
        :ValueError: If the given grid does not have enought positions for all the requested values.
        :ValueError: It the number of values and titles do not match.
        :ValueError: It the number of values and labels do not match.

    .. rubric:: Example

    .. ipython::

        In [1]: from rstoolbox.io import parse_rosetta_file
           ...: from rstoolbox.plot import multiple_distributions
           ...: import matplotlib.pyplot as plt
           ...: df = parse_rosetta_file("../rstoolbox/tests/data/input_2seq.minisilent.gz")
           ...: values = ["score", "hbond_sr_bb", "B_ni_rmsd", "hbond_bb_sc",
           ...:           "cav_vol", "design_score", "packstat", "rmsd_drift"]
           ...: fig = plt.figure(figsize=(25, 10))
           ...: axs = multiple_distributions(df, fig, (2, 4), values)
           ...: plt.tight_layout()

        @savefig multiple_distributions_docs.png width=5in
        In [2]: plt.show()
    """
    if values == "*":
        values = df.select_dtypes(include=[np.number]).columns.tolist()
    if len(set(values).difference(set(list(df.columns)))) > 0:
        raise ValueError("Some of the requested values do not exist "
                         "in the data container.")
    if grid[0] * grid[1] < len(values):
        raise ValueError("The grid does not provide enought positions for all"
                         " requested values.")
    if titles is not None and len(titles) != len(values):
        raise ValueError("Number of expected plots and titles do not match.")
    if labels is not None and len(labels) != len(values):
        raise ValueError("Number of expected labels and titles do not match.")

    kwargs.pop("y", None)
    kwargs.pop("data", None)
    kwargs.pop("axis", None)

    axis = []
    for _, x in enumerate(itertools.product(*[range(grid[0]), range(grid[1])])):
        if _ >= len(values):
            break
        ax = plt.subplot2grid(grid, x, fig=fig)
        sns.boxplot(y=values[_], data=df, ax=ax, **kwargs)
        if titles is not None:
            add_top_title(ax, titles[_])
        if labels is not None:
            ax.set_ylabel(labels[_])
        axis.append(ax)

    return axis
Beispiel #9
0
def plot_ramachandran(df, seqID, fig):
    """Generates a ramachandran plot in RAMPAGE style.

    For more details and sources please refert to
    `ramachandran plot for python
    <https://warwick.ac.uk/fac/sci/moac/people/students/peter_cock/python/ramachandran/>`_

    Background distribution data taken from
    `git repository <https://github.com/S-John-S/Ramachandran-Plot.git>`_

    The phi - psi dihedrals should be present in the DesignSerie. If this is not the case,
    consider computing them using for example the :func:`.get_sequence_and_structure`.
    Note that this function will only plot the ramachandran for a single decoy. If one
    would like to compute it for mutiple decoys, please see the example below.

    :param df: |df_param|, where ONE column cotains the phi and a second column
        the psi angles.
    :type df: :class:`~pandas.Series`
    :param ax: |axis_param|.
    :type ax: :class:`~matplotlib.axes.Axes`

    .. seealso::
        :func:`.get_sequence_and_structure`
        :func:`.get_dihedrals`
        :func:`.get_phi`
        :func:`.get_psi`

    .. rubric:: Example

    .. ipython::

        In [1]: import rstoolbox as rb
           ...: import pandas as pd
           ...: plt.style.use('ggplot')
           ...: definitions = {
           ...:                "scores": ["score"],
           ...:                "sequence" : "A",
           ...:                "psipred" : "*",
           ...:                "structure" : "*",
           ...:                "dihedrals": "*"
           ...:                }
           ...: dsf = rb.io.parse_rosetta_file(
           ...:     "../rstoolbox/tests/data/input_3ssepred.minisilent.gz",
           ...:     definitions )
           ...: figure = plt.figure(figsize=(15,10))
           ...: rb.plot.plot_ramachandran(dsf.iloc[0], "A", figure)
           ...: plt.tight_layout()
           ...: fig.subplots_adjust(top=1.2)

        @savefig plot_ramachandran.png width=5in
        In [2]: plt.show()
    """
    # Data type management.
    if not isinstance(df, pd.Series):
        raise ValueError("Input data must be in a Series or DesignSeries")
    if not isinstance(df, DesignSeries):
        df = DesignSeries(df)
    if not isinstance(df.get_phi(seqID), np.ndarray):
        raise ValueError(
            "Ramachandran plot function can only be applied on one decoy at once."
        )
    if not isinstance(df.get_psi(seqID), np.ndarray):
        raise ValueError(
            "Ramachandran plot function can only be applied on one decoy at once."
        )

    # General variable for the background preferences.
    cwd = os.path.dirname(__file__)
    rama_preferences = {
        "General": {
            "file": os.path.join(cwd, "rama_bgdists/pref_general.data"),
            "cmap": colors.ListedColormap(['#FFFFFF', '#B3E8FF', '#7FD9FF']),
            "bounds": [0, 0.0005, 0.02, 1],
        },
        "GLY": {
            "file": os.path.join(cwd, "rama_bgdists/pref_glycine.data"),
            "cmap": colors.ListedColormap(['#FFFFFF', '#FFE8C5', '#FFCC7F']),
            "bounds": [0, 0.002, 0.02, 1],
        },
        "PRO": {
            "file": os.path.join(cwd, "rama_bgdists/pref_proline.data"),
            "cmap": colors.ListedColormap(['#FFFFFF', '#D0FFC5', '#7FFF8C']),
            "bounds": [0, 0.002, 0.02, 1],
        },
        "PRE-PRO": {
            "file": os.path.join(cwd, "rama_bgdists/pref_preproline.data"),
            "cmap": colors.ListedColormap(['#FFFFFF', '#B3E8FF', '#7FD9FF']),
            "bounds": [0, 0.002, 0.02, 1],
        }
    }

    # Read in the expected torsion angles.
    __location__ = os.path.join(cwd, "rama_bgdists/")
    rama_pref_values = {}
    for key, val in rama_preferences.items():
        rama_pref_values[key] = np.full((360, 360), 0, dtype=np.float64)
        with open(os.path.join(__location__, val["file"])) as fn:
            for line in fn:
                if not line.startswith("#"):
                    # Preference file has values for every second position only
                    lp = [float(_) for _ in line.split()]
                    rama_pref_values[key][int(lp[1]) +
                                          180][int(lp[0]) + 180] = float(
                                              line.split()[2])
                    rama_pref_values[key][int(lp[1]) +
                                          179][int(lp[0]) + 179] = float(
                                              line.split()[2])
                    rama_pref_values[key][int(lp[1]) +
                                          179][int(lp[0]) + 180] = float(
                                              line.split()[2])
                    rama_pref_values[key][int(lp[1]) +
                                          180][int(lp[0]) + 179] = float(
                                              line.split()[2])

    # Ramachandran residue classification.
    seq = df.get_sequence(seqID)
    rama_types = {"GLY": [], "PRO": [], "PRE-PRO": [], "GENERAL": []}
    for i, aa in enumerate(seq):
        if aa == "G":
            rama_types["GLY"].append(i)
        elif aa == "P":
            rama_types["PRO"].append(i)
        elif i + 1 < len(seq) and seq[i + 1] == "P":
            rama_types["PRE-PRO"].append(i)
        else:
            rama_types["GENERAL"].append(i)

    # Generate the plots
    all_phi = df.get_phi(seqID)
    all_psi = df.get_psi(seqID)
    order = ["GENERAL", "GLY", "PRE-PRO", "PRO"]
    grid = (2, 2)
    ax = [
        plt.subplot2grid(grid, (0, 0), fig=fig),
        plt.subplot2grid(grid, (0, 1), fig=fig),
        plt.subplot2grid(grid, (1, 0), fig=fig),
        plt.subplot2grid(grid, (1, 1), fig=fig)
    ]
    for i, (key, val) in enumerate(
            sorted(rama_preferences.items(), key=lambda x: x[0].lower())):
        phi = all_phi[rama_types[order[i]]]
        psi = all_psi[rama_types[order[i]]]

        ax[i].imshow(rama_pref_values[key],
                     cmap=rama_preferences[key]["cmap"],
                     norm=colors.BoundaryNorm(rama_preferences[key]["bounds"],
                                              rama_preferences[key]["cmap"].N),
                     extent=(-180, 180, 180, -180))
        ax[i].scatter(phi, psi, color="black")
        add_top_title(ax[i], order[i])
        ax[i].set_xlim([-180, 180])
        ax[i].set_ylim([-180, 180])
        ax[i].plot([-180, 180], [0, 0], color="black")
        ax[i].plot([0, 0], [-180, 180], color="black")
        ax[i].locator_params(axis='x', nbins=7)
        ax[i].set_xlabel(r'$\phi$ [degrees]')
        ax[i].set_ylabel(r'$\psi$ [degrees]')
        ax[i].grid()