예제 #1
0
def enrichment_pymol_script(enrichment_table, output_file,
                            sphere_view=True, chain=None):
    """
    Create a Pymol .pml script to visualize EC "enrichment"

    Parameters
    ----------
    enrichment_table : pandas.DataFrame
        Mapping of position (column i) to EC enrichment
        (column enrichemnt), as returned by 
        evcouplings.couplings.pairs.enrichment()
    output_file : str
        File path where to store pml script
    sphere_view : bool, optional (default: True)
        If True, create pml that highlights enriched positions
        with spheres and color; if False, create pml
        that highlights enrichment using b-factor and
        "cartoon putty"
    chain : str, optional (default: None)
        Use this PDB chain in residue selection
    """
    t = enrichment_table.query("enrichment > 1")

    # compute boundaries for highly coupled residues
    # that will be specially highlighted
    boundary1 = int(0.05 * len(t))  # top 5%
    boundary2 = int(0.15 * len(t))  # top 15%

    t.loc[:, "b_factor"] = t.enrichment

    # set color for "low" enrichment (anything > 1)
    t.loc[:, "color"] = "yelloworange"

    # high
    t.loc[t.iloc[0:boundary1].index, "color"] = "red"

    # medium
    t.loc[t.iloc[boundary1:boundary2].index, "color"] = "orange"

    if sphere_view:
        t.loc[t.iloc[0:boundary2].index, "show"] = "spheres"

    if chain is not None:
        chain_sel = ", chain '{}'".format(chain)
    else:
        chain_sel = ""

    with open(output_file, "w") as f:
        f.write("as cartoon{}\n".format(chain_sel))
        f.write("color grey80{}\n".format(chain_sel))

        if chain is None:
            f.write("alter all, b=0.0\n")
        else:
            f.write("alter chain '{}', b=0.0\n".format(chain))

        pymol_mapping(t, f, chain)

        if not sphere_view:
            f.write("cartoon putty{}\n".format(chain_sel))
예제 #2
0
def enrichment_pymol_script(enrichment_table,
                            output_file,
                            sphere_view=True,
                            chain=None,
                            legacy=False):
    """
    Create a Pymol .pml script to visualize EC "enrichment"
    Parameters
    ----------
    enrichment_table : pandas.DataFrame
        Mapping of position (column i) to EC enrichment
        (column enrichemnt), as returned by 
        evcouplings.couplings.pairs.enrichment()
    output_file : str
        File path where to store pml script
    sphere_view : bool, optional (default: True)
        If True, create pml that highlights enriched positions
        with spheres and color; if False, create pml
        that highlights enrichment using b-factor and
        "cartoon putty"
    chain : str, optional (default: None)
        Use this PDB chain in residue selection
    legacy: bool, optional (default: False)
        Use legacy (2011) red and yellow colormap
        for EC enrichment
    """
    if legacy:
        t = enrichment_table.query("enrichment > 1")
        t.loc[:, "b_factor"] = t.enrichment
        # compute boundaries for highly coupled residues
        # that will be specially highlighted
        boundary1 = int(0.05 * len(t))  # top 5%
        boundary2 = int(0.15 * len(t))  # top 15%

        t.loc[:, "b_factor"] = t.enrichment

        # set color for "low" enrichment (anything > 1)
        t.loc[:, "color"] = "yelloworange"

        # high
        t.loc[t.iloc[0:boundary1].index, "color"] = "red"

        # medium
        t.loc[t.iloc[boundary1:boundary2].index, "color"] = "orange"

    else:
        t = deepcopy(enrichment_table)
        t.loc[:, "b_factor"] = t.enrichment

        # set boundaries for enrichment levels
        # that will be specially highlighted
        # create nine subsets
        boundary_list = [
            int(0.11 * len(t)),
            int(0.22 * len(t)),
            int(0.33 * len(t)),
            int(0.44 * len(t)),
            int(0.55 * len(t)),
            int(0.66 * len(t)),
            int(0.77 * len(t)),
            int(0.88 * len(t)),
            int(1.00 * len(t))
        ]

        # list of colors to color each category
        # must be same length and order as boundary_list
        # list of rgb tuples
        color_list = [
            (77, 0, 75),  # dark purple
            (129, 15, 124),
            (136, 65, 157),
            (140, 107, 177),
            (140, 150, 198),
            (158, 188, 218),
            (191, 211, 230),
            (224, 236, 244),
            (247, 252, 253)  # almost white
        ]

        # convert to fractions
        color_list = [(x / 255, y / 255, z / 255) for x, y, z in color_list]

        prior_boundary = 0

        for idx, boundary in enumerate(boundary_list):
            t.loc[t.iloc[prior_boundary:boundary].index,
                  "color"] = 'color{}'.format(idx)
            prior_boundary = boundary

    if sphere_view:
        t.loc[t.iloc[0:boundary].index, "show"] = "spheres"

    if chain is not None:
        chain_sel = ", chain '{}'".format(chain)
    else:
        chain_sel = ""

    with open(output_file, "w") as f:

        if chain is None:
            f.write("alter all, b=0.0\n")
        else:
            f.write("alter chain '{}', b=0.0\n".format(chain))

        # for leagcy mode, background color is grey80
        if legacy:
            f.write("color grey80{}\n".format(chain_sel))

        # for non-legacy mode, background color is the last color in the spectrum
        else:
            for idx, c in enumerate(color_list):
                f.write("set_color color{}, [{},{},{}]\n".format(
                    idx, c[0], c[1], c[2]))
            f.write("color color{}{}\n".format(
                len(boundary_list) - 1, chain_sel))

        f.write("as cartoon{}\n".format(chain_sel))

        pymol_mapping(t, f, chain)

        if not sphere_view:
            f.write("cartoon putty{}\n".format(chain_sel))
예제 #3
0
def mutation_pymol_script(mutation_table,
                          output_file,
                          effect_column="prediction_epistatic",
                          mutant_column="mutant",
                          agg_func="mean",
                          cmap=plt.cm.RdBu_r,
                          segment_to_chain_mapping=None):
    """
    Create a Pymol .pml script to visualize single mutation
    effects

    Parameters
    ----------
    mutation_table : pandas.DataFrame
        Table with mutation effects (will be filtered
        for single mutants)
    output_file : str
        File path where to store pml script
    effect_column : str, optional (default: "prediction_epistatic")
        Column in mutation_table that contains mutation effects
    mutant_column : str, optional (default: "mutant")
        Column in mutation_table that contains mutations
        (in format "A123G")
    agg_func : str, optional (default: "mean")
        Function used to aggregate single mutations into one
        aggregated effect per position (any pandas aggregation
        operation, including "mean", "min, "max")
    cmap : matplotlib.colors.LinearSegmentedColormap, optional
            (default: plt.cm.RdBu_r)
        Colormap used to map mutation effects to colors
    segment_to_chain_mapping: str or dict(str -> str), optional (default: None)
        PDB chain(s) that should be targeted by line drawing

        * If None, residues will be selected
          py position alone, which may cause wrong assignments
          if multiple chains are present in the structure.

        * Different chains can be assigned for position
          if a dictionary that maps from segment (str) to PDB chain (str)
          is given.

    Raises
    ------
    ValueError
        If no single mutants contained in mutation_table
    ValueError
        If mutation_table contains a segment identifier not
        found in segment_to_chain_mapping
    """
    # split mutation strings
    t = split_mutants(mutation_table, mutant_column)

    # only pick single mutants
    t = t.query("num_mutations == 1")

    if len(t) == 0:
        raise ValueError("mutation_table does not contain any single "
                         "amino acid substitutions.")

    # add a segment column if missing
    if "segment" not in t.columns:
        t.loc[:, "segment"] = None

    with open(output_file, "w") as f:

        #handle each segment independently
        # have to fill NaNs with a string for groupby to work
        t = t.fillna("none")
        for segment_name, _t in t.groupby("segment"):

            if segment_to_chain_mapping is None:
                chain = None

            elif type(segment_to_chain_mapping) is str:
                chain = segment_to_chain_mapping

            elif segment_name not in segment_to_chain_mapping:
                raise ValueError("Segment name {} has no mapping to PyMOL "
                                 "chain. Available mappings are: {}".format(
                                     segment_name, segment_to_chain_mapping))
            else:
                chain = segment_to_chain_mapping[segment_name]

            # aggregate into positional information
            _t = _t.loc[:,
                        ["pos", effect_column]].rename(columns={
                            "pos": "i",
                            effect_column: "effect"
                        })

            t_agg = _t.groupby("i").agg(agg_func).reset_index()
            t_agg.loc[:, "i"] = pd.to_numeric(t_agg.i).astype(int)

            # map aggregated effects to colors
            max_val = t_agg.effect.abs().max()
            mapper = colormap(-max_val, max_val, cmap)
            t_agg.loc[:, "color"] = t_agg.effect.map(mapper)
            t_agg.loc[:, "show"] = "spheres"

            if chain is not None:
                chain_sel = ", chain '{}'".format(chain)
            else:
                chain_sel = ""

            f.write("as cartoon{}\n".format(chain_sel))
            f.write("color grey80{}\n".format(chain_sel))

            pymol_mapping(t_agg, f, chain, atom="CA")
예제 #4
0
def mutation_pymol_script(mutation_table,
                          output_file,
                          effect_column="prediction_epistatic",
                          mutant_column="mutant",
                          agg_func="mean",
                          cmap=plt.cm.RdBu_r,
                          chain=None):
    """
    Create a Pymol .pml script to visualize single mutation
    effects

    Parameters
    ----------
    mutation_table : pandas.DataFrame
        Table with mutation effects (will be filtered
        for single mutants)
    output_file : str
        File path where to store pml script
    effect_column : str, optional (default: "prediction_epistatic")
        Column in mutation_table that contains mutation effects
    mutant_column : str, optional (default: "mutant")
        Column in mutation_table that contains mutations
        (in format "A123G")
    agg_func : str, optional (default: "mean")
        Function used to aggregate single mutations into one
        aggregated effect per position (any pandas aggregation
        operation, including "mean", "min, "max")
    cmap : matplotlib.colors.LinearSegmentedColormap, optional
            (default: plt.cm.RdBu_r)
        Colormap used to map mutation effects to colors
    chain : str, optional (default: None)
        Use this PDB chain in residue selection

    Raises
    ------
    ValueError
        If no single mutants contained in mutation_table
    """
    # split mutation strings
    t = split_mutants(mutation_table, mutant_column)

    # only pick single mutants
    t = t.query("num_mutations == 1")

    if len(t) == 0:
        raise ValueError("mutation_table does not contain any single "
                         "amino acid substitutions.")

    # aggregate into positional information
    t = t.loc[:, ["pos", effect_column]].rename(columns={
        "pos": "i",
        effect_column: "effect"
    })

    t_agg = t.groupby("i").agg(agg_func).reset_index()
    t_agg.loc[:, "i"] = pd.to_numeric(t_agg.i).astype(int)

    # map aggregated effects to colors
    max_val = t_agg.effect.abs().max()
    mapper = colormap(-max_val, max_val, cmap)
    t_agg.loc[:, "color"] = t_agg.effect.map(mapper)
    t_agg.loc[:, "show"] = "spheres"

    if chain is not None:
        chain_sel = ", chain '{}'".format(chain)
    else:
        chain_sel = ""

    with open(output_file, "w") as f:
        f.write("as cartoon{}\n".format(chain_sel))
        f.write("color grey80{}\n".format(chain_sel))

        pymol_mapping(t_agg, f, chain, atom="CA")