Esempio n. 1
0
def rg_analysis(model, **kwargs):
    from htmd.model import getStateStatistic
    from IDP_htmd.MetricRadiusGyration import metricRG
    from IDP_htmd.model_utils import get_data
    import numpy as np

    rg_data = get_data(model, metricRG, **kwargs)
    rg_mean = getStateStatistic(model, rg_data, states=range(model.macronum))
    rg_std = getStateStatistic(model,
                               rg_data,
                               states=range(model.macronum),
                               method=np.std)

    aggregate_dat = []
    for i in rg_data.dat:
        aggregate_dat += i.ravel().tolist()
    aggregate_dat = np.array(aggregate_dat)

    rg_mean = np.append(rg_mean, np.mean(aggregate_dat))
    rg_std = np.append(rg_std, np.std(aggregate_dat))

    return np.array([rg_mean, rg_std])
Esempio n. 2
0
def aux_plot(model,
             mol,
             plot_func,
             metric=None,
             skip=1,
             normalize=False,
             method=np.mean,
             data=None,
             **kwargs):
    """Summary

    Parameters
    ----------
    model : TYPE
        Model to extract the data
    metric : TYPE
        Metric object to project the simlist of the model
    mol : TYPE
        Description
    plot_func : TYPE
        Plotting function to plot the projected data
    skip : int, optional
        Skip frames from the simlist
    normalize : bool, optional
        Whether to normalize by the number of atoms
    method : TYPE, optional
        Method to perform the aggregation of the data by macrostate
    **kwargs
        Additional arguments for the plotting function
    """
    if not metric and not data:
        raise Exception("Either a metric or a data object must be provided")

    if not data:
        data = get_data(model, metric, skip=skip)

    data_summary = getStateStatistic(model,
                                     data,
                                     method=method,
                                     states=range(model.macronum),
                                     statetype="macro")

    if normalize:
        _, counts = np.unique(mol.resid, return_counts=True)
        data_summary = np.array(data_summary) / counts

    try:
        plot_func(data_summary, mol, **kwargs)
    except Exception as e:
        print("Plotting error: ", e)
Esempio n. 3
0
def cluster_macro(model,
                  data,
                  macro,
                  method=np.mean,
                  cluster_method=MiniBatchKMeans):
    """Modifies the model by splitting a macrostate.
    In first place, the mean for the given data is calculated for each micro
    of the model. This data is then clustered using the MiniBatchKMeans algorithm
        
    Parameters
    ----------
    model : <htmd.model.Model>
        Model to be modified
    data : TYPE
        Description
    macro : int
        Macrostate to be splitted
    method : TYPE, optional
        Description
    """
    # from sklearn.cluster import MiniBatchKMeans, AffinityPropagation
    #from IDP_htmd.IDP_model import plot_contacts

    metastable_states(model)
    if isinstance(macro, int):
        macro = [macro]

    all_micros = np.array([], dtype=int)
    for i in macro:
        if i < 0 or i > model.macronum:
            raise Exception("Macro out of bounds")
        all_micros = np.concatenate([all_micros, model.metastable_sets[i]])
    data_by_micro = getStateStatistic(model,
                                      data,
                                      states=all_micros,
                                      statetype="micro",
                                      method=method)
    clusters = cluster_method().fit(data_by_micro)

    new_macro_assignment = []
    for i in range(len(clusters.cluster_centers_)):
        new_macro_assignment.append(
            all_micros[np.where(clusters.labels_ == i)[0]])

    return np.array(new_macro_assignment)
Esempio n. 4
0
def create_bulk(model, metric=None, data=None, threshold=0.2, skip=1):
    """Creates a bulk macrosates
    Modifies passed model
    It is intended to be used in ligand binding escenarios.
    
    Parameters
    ----------
    model : TYPE
        Model to extract a bulk
    metric : TYPE
        Metric to describe a bulk vs not-bulk situation. In general is the contacts 
        between protein and ligand selection with groupsels set to 'all'
    data : None, optional
        Description
    
    Returns
    -------
    TYPE
        Description
    
    Raises
    ------
    Exception
        Description
    """
    if not metric and not data:
        raise Exception("Either a metric or a data object must be provided")

    if metric and not data:
        data = get_data(model, metric, skip=skip)

    data_by_micro = np.array(
        getStateStatistic(model,
                          data,
                          states=range(model.micronum),
                          statetype="micro"))
    min_contacts = np.where(data_by_micro < threshold)[0]

    if len(min_contacts) == 0:
        min_contacts = [np.argmin(data_by_micro < threshold)]

    model.createState(min_contacts)
    print(f"Macrostate created with micros: {min_contacts}")
    return min_contacts
Esempio n. 5
0
    def plotClusters(
        self,
        dimX,
        dimY,
        resolution=100,
        s=4,
        c=None,
        cmap="Greys",
        logplot=False,
        plot=True,
        save=None,
        data=None,
        levels=7,
    ):
        """Plot a scatter-plot of the locations of the clusters on top of the count histogram.

        Parameters
        ----------
        dimX : int
            Index of projected dimension to use for the X axis.
        dimY : int
            Index of projected dimension to use for the Y axis.
        resolution : int
            Resolution of bincount grid.
        s : float
            Marker size for clusters.
        c : list
            Colors or indexes for each cluster.
        cmap : matplotlib.colors.Colormap
            Matplotlib colormap for the scatter plot.
        logplot : bool
            Set True to plot the logarithm of counts.
        plot : bool
            If the method should display the plot
        save : str
            Path of the file in which to save the figure
        data : :class:`MetricData` object
            Optionally you can pass a different MetricData object than the one used for clustering. For example
            if the user wants to cluster on distances but wants to plot the centers on top of RMSD values. The new
            MetricData object needs to have the same simlist as this object.
        """
        if self.Centers is None:
            raise RuntimeError("Data has not been clustered yet. Cannot plot clusters.")
        from matplotlib import pylab as plt

        if data is None:
            data = self
            centers = self.Centers
        else:
            from htmd.model import getStateStatistic

            if self.numFrames != data.numFrames or ~np.all(
                [s1 == s2 for s1, s2 in zip(self.simlist, data.simlist)]
            ):
                raise RuntimeError(
                    "The data argument you provided uses a different simlist than this object."
                )
            centers = np.vstack(
                getStateStatistic(self, data, range(self.K), statetype="cluster")
            )

        if data.description is not None:
            xlabel = data.description.description[dimX]
        else:
            xlabel = "Dimension {}".format(dimX)

        if data.description is not None:
            ylabel = data.description.description[dimY]
        else:
            ylabel = "Dimension {}".format(dimY)

        title = "Clusters plotted onto counts histogram"
        if logplot:
            title = "Clusters plotted onto logarithmic counts histogram"
        f, ax, cf = self._plotCounts(
            dimX,
            dimY,
            resolution=resolution,
            logplot=logplot,
            levels=levels,
            cmap=cmap,
            title=title,
            xlabel=xlabel,
            ylabel=ylabel,
        )

        y = ax.scatter(
            centers[:, dimX],
            centers[:, dimY],
            s=s,
            c=c if c is not None else "r",
            cmap=cmap,
            linewidths=0,
            marker="o",
        )
        if c is not None:
            self._setColorbar(f, y, "Cluster groups")

        if save is not None:
            plt.savefig(save, dpi=300, bbox_inches="tight", pad_inches=0.2)
        if plot:
            plt.show()
Esempio n. 6
0
    from htmd.projections.metric import MetricData
    from htmd.projections.metricdistance import MetricDistance
    from htmd.model import Model
    from htmd.molecule.molecule import Molecule
    import numpy as np

    data = MetricData()
    data.load(
        "/workspace8/p27_sj403/10-11-2018_p27_short_sj403/analysis/17_11_2018/testing.dat"
    )
    model = Model()
    model.load(
        "/workspace8/p27_sj403/10-11-2018_p27_short_sj403/analysis/17_11_2018/model.dat"
    )
    mol = Molecule(model.data.simlist[0].molfile)
    mean_dat = getStateStatistic(model, data, range(model.macronum))
    met = MetricDistance(sel1="noh and protein or resname MOL",
                         sel2="noh and protein or resname MOL",
                         groupsel1="residue",
                         groupsel2="residue",
                         metric="distances",
                         pbc=False)
    mapping = met.getMapping(mol)
    contact_plot(mean_dat,
                 mol,
                 rows=2,
                 cols=2,
                 model=model,
                 plot=False,
                 save="/home/pablo/test.png",
                 mapping=mapping)
Esempio n. 7
0
    def plotClusters(self, dimX, dimY, resolution=100, s=4, c=None, cmap=None, logplot=False, plot=True, save=None, data=None):
        """ Plot a scatter-plot of the locations of the clusters on top of the count histogram.

        Parameters
        ----------
        dimX : int
            Index of projected dimension to use for the X axis.
        dimY : int
            Index of projected dimension to use for the Y axis.
        resolution : int
            Resolution of bincount grid.
        s : float
            Marker size for clusters.
        c : list
            Colors or indexes for each cluster.
        cmap : matplotlib.colors.Colormap
            Matplotlib colormap for the scatter plot.
        logplot : bool
            Set True to plot the logarithm of counts.
        plot : bool
            If the method should display the plot
        save : str
            Path of the file in which to save the figure
        data : :class:`MetricData` object
            Optionally you can pass a different MetricData object than the one used for clustering. For example
            if the user wants to cluster on distances but wants to plot the centers on top of RMSD values. The new
            MetricData object needs to have the same simlist as this object.
        """
        if self.Centers is None:
            raise RuntimeError('Data has not been clustered yet. Cannot plot clusters.')
        from matplotlib import pylab as plt

        if data is None:
            data = self
            centers = self.Centers
        else:
            from htmd.model import getStateStatistic
            if self.numFrames != data.numFrames or ~np.all([s1 == s2 for s1, s2 in zip(self.simlist, data.simlist)]):
                raise RuntimeError('The data argument you provided uses a different simlist than this object.')
            centers = np.vstack(getStateStatistic(self, data, range(self.K), statetype='cluster'))

        if cmap is None:
            cmap = plt.cm.jet

        if data.description is not None:
            xlabel = data.description.description[dimX]
        else:
            xlabel = 'Dimension {}'.format(dimX)

        if data.description is not None:
            ylabel = data.description.description[dimY]
        else:
            ylabel = 'Dimension {}'.format(dimY)

        title = 'Clusters plotted onto counts histogram'
        dc = np.concatenate(data.dat)
        f, ax, cf = self._contourPlot(dc[:, dimX], dc[:, dimY], resolution=resolution, xlabel=xlabel, ylabel=ylabel, title=title, logplot=logplot)
        y = ax.scatter(centers[:, dimX], centers[:, dimY], s=s, c=c, cmap=cmap, linewidths=0, marker='o')
        if c is not None:
            self._setColorbar(f, y, 'Cluster groups')

        if save is not None:
            plt.savefig(save, dpi=300, bbox_inches='tight', pad_inches=0.2)
        if plot:
            plt.show()
Esempio n. 8
0
def plot_model_by(model,
                  dat1,
                  dat2,
                  method1=np.mean,
                  s=15,
                  method2=np.mean,
                  cmap="Set1",
                  legend=True,
                  ylabel=None,
                  xlabel=None,
                  ylim=(None, None),
                  xlim=(None, None)):
    import matplotlib as mpl

    if method1:
        cum_dat1 = np.array(
            getStateStatistic(model,
                              dat1,
                              states=range(model.micronum),
                              statetype="micro",
                              method=method1)).ravel()
    else:
        cum_dat1 = dat1

    if method2:
        cum_dat2 = np.array(
            getStateStatistic(model,
                              dat2,
                              states=range(model.micronum),
                              statetype="micro",
                              method=method2)).ravel()
    else:
        cum_dat2 = dat2

    cmap = mpl.cm.get_cmap(cmap, model.macronum)
    c = [cmap(model.macro_ofmicro[i]) for i in range(model.micronum)]

    macro_pop = np.round(model.eqDistribution(plot=False) * 100, 1)

    for i in range(model.macronum):
        c = cmap(i)
        macro_in_micro = np.where(model.macro_ofmicro == i)[0]
        tmp_x = cum_dat1[macro_in_micro]
        tmp_y = cum_dat2[macro_in_micro]
        sc = plt.scatter(tmp_x,
                         tmp_y,
                         color=c,
                         s=s,
                         alpha=0.6,
                         edgecolor=c,
                         label=f"Macro {i}, {macro_pop[i]}%")

    if legend: plt.legend()

    if xlabel:
        plt.xlabel(xlabel)
    if ylabel:
        plt.ylabel(ylabel)

    xmin, xmax = xlim
    if xmin is None:
        xmin = np.min(cum_dat1) * 0.8
    if xmax is None:
        xmax = np.max(cum_dat1) * 1.2

    ymin, ymax = ylim
    if ymin is None:
        ymin = np.min(cum_dat2) * 0.8
    if ymax is None:
        ymax = np.max(cum_dat2) * 1.2

    _ = plt.ylim(ymin, ymax)
    _ = plt.xlim(xmin, xmax)
    return cum_dat1, cum_dat2
Esempio n. 9
0
def plot_model_by_rmsd(model,
                       rmsd_dat=None,
                       rmsd_mean=None,
                       rmsd_std=None,
                       cmap='jet',
                       legend=True,
                       save=None,
                       ax=None):
    import matplotlib as mpl

    if rmsd_dat is None and rmsd_mean is None and rmsd_std is None:
        raise RuntimeError(
            "Either rmsd_dat or rmsd_mean & rmsd_std should be defined")

    if rmsd_dat:
        rmsd_mean = getStateStatistic(model,
                                      rmsd_dat,
                                      states=range(model.micronum),
                                      statetype="micro",
                                      method=np.mean)
        # rmsd_min = getStateStatistic(model, rmsd_dat, states=range(model.micronum), statetype="micro", method=np.min)
        rmsd_std = getStateStatistic(model,
                                     rmsd_dat,
                                     states=range(model.micronum),
                                     statetype="micro",
                                     method=np.std)

    rmsd_mean = np.array(rmsd_mean).ravel()
    # rmsd_min = np.array(rmsd_min).ravel()
    rmsd_std = np.array(rmsd_std).ravel()

    plt.sca(ax) if ax else plt.figure(figsize=(8, 8))

    cmap = mpl.cm.get_cmap(cmap, model.macronum)
    c = [cmap(model.macro_ofmicro[i]) for i in range(model.micronum)]

    macro_pop = np.round(model.eqDistribution(plot=False) * 100, 1)
    macro_sort = np.argsort(macro_pop)[::-1]
    macros = range(model.macronum)

    for idx, i in enumerate(macro_sort):
        c = cmap(len(macros) - 1 - idx)
        macro_in_micro = np.where(model.macro_ofmicro == i)[0]
        tmp_x = rmsd_mean[macro_in_micro]
        tmp_y = rmsd_std[macro_in_micro]
        sc = plt.scatter(tmp_x,
                         tmp_y,
                         color=c,
                         s=15,
                         alpha=0.5,
                         edgecolor=c,
                         label=f"Macro {i}, {macro_pop[i]}%")
    if legend:
        plt.legend()
    plt.xlabel("Mean RMSD \n by microstate (Å)")
    plt.ylabel(r"$\it{SD\ RMSD \ by\ microstate\ (Å)}$")
    _ = plt.ylim(0, np.max(rmsd_std) * 1.2)
    _ = plt.xlim(0, np.max(rmsd_mean) * 1.2)

    if save:
        plt.savefig(save, dpi=300, bbox_inches='tight', pad_inches=0.2)
    return rmsd_mean, rmsd_std