예제 #1
0
def calculate_scores(data,
                     kde_bw=0.15,
                     pca_component=0.95,
                     score_weights=None):
    """Calculate scores based on probability density.

    Parameters
    ----------
    data : ndarray, shape=(n_atoms, n_samples, n_dims)
    kde_bw : scalar
    pca_component : scalar
    score_weights : None or dict
        A dictionary that contains the weight for n_dims, {n_dim: weight}

    Returns
    ---------
    scores : ndarray, shape=(n_samples,)

    See also
    --------
    pylipid.func.collect_bound_poses
    pylipid.func.vectorize_poses

    """
    weights = {atom_idx: 1 for atom_idx in np.arange(np.shape(data)[0])}
    if score_weights is not None:
        weights.update(score_weights)

    kde_funcs = {}
    try:
        for atom_idx in np.arange(np.shape(data)[0]):
            transformed_data = PCA(n_components=pca_component).fit_transform(
                data[atom_idx])
            var_type = ""
            bw = []
            for dummy in range(len(transformed_data[0])):
                var_type += "c"
                bw.append(kde_bw)
            kde_funcs[atom_idx] = kde(data=transformed_data,
                                      var_type=var_type,
                                      bw=bw)
        # evaluate binding poses
        scores = np.sum([
            weights[atom_idx] * kde_funcs[atom_idx].pdf()
            for atom_idx in np.arange(np.shape(data)[0])
        ],
                        axis=0)
        return scores

    except ValueError:
        print(
            "Pose generation error -- possibly due to insufficient number of binding event."
        )
예제 #2
0
bins = np.linspace(0, XL, 50)

# Illustrate
axs[0].hist(P, bins, density=True, label='Example sample')
axs[1].hist(q, bins, density=True, label='Proposal sample and pdf')
axs[2].hist(q,
            bins,
            density=True,
            label='Proposal sample - weighted',
            weights=w)
axs[3].hist(r, bins, density=True, label='resmpl: Residual')
axs[4].hist(s, bins, density=True, label='resmpl: Systematic')
axs[5].hist(t, bins, density=True, label='resmpl: Stochastic')

# Add actual pdfs
axs[0].plot(xx, pdf(xx, dof), label='pdf: Target')
axs[1].plot(xx, qdf(xx, scl), label='pdf: Proposal')

# kde pdf comparison
axs[6].plot(xx, pdf(xx, dof), c='k', lw=3, label='pdf: Target')
axs[6].plot(xx, qdf(xx, scl), c='k', lw=2, label='pdf: Proposal')
axs[6].plot(xx, kde(r, 'c', bw=[0.1]).pdf(xx), label='kde: Residual')
axs[6].plot(xx, kde(s, 'c', bw=[0.1]).pdf(xx), label='kde: Systematic')
axs[6].plot(xx, kde(t, 'c', bw=[0.1]).pdf(xx), label='kde: Stochastic')

axs[0].set_yticklabels([])
for ax in f.axes:
    ax.legend()

plt.pause(.1)
예제 #3
0
def calculate_scores(dist_matrix,
                     kde_bw=0.15,
                     pca_component=0.90,
                     score_weights=None):
    r"""Calculate scores based on probability density.

    This function first lower the dimension of dist_matrix by using a
    `PCA <https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html>`_. Then the distribution of
    the distance vectors for each atom is estimated using
    `KDEMultivariate <https://www.statsmodels.org/devel/generated/statsmodels.nonparametric.kernel_density.KDEMultivariate.html>`_.

    The score of a lipid pose is calculated based on the probability density function of the atom positions in the binding
    site and weights given to the atoms:

    .. math::
        \text { score }=\sum_{i} W_{i} \cdot \hat{f}_{i, H}(D)

    where :math:`W_{i}` is the weight given to atom i of the lipid molecule, H is the bandwidth and
    :math:`\hat{f}_{i, H}(D)` is a multivariate kernel density etimation of the position of atom i in the specified
    binding site. :math:`\hat{f}_{i, H}(D)` is calculated from all the bound lipid poses in that binding site.

    Parameters
    ----------
    dist_matrix : numpy.ndarray, shape=(n_lipid_atoms, n_poses, n_binding_site_residues)
        The distance vectors describing the position of bound poses in the binding site. This dist_matrix can be
        generated by :meth:`~vectorize_poses`.

    kde_bw : scalar, default=0.15
        The bandwidth for kernel density estimation. Used by
        `KDEMultivariate <https://www.statsmodels.org/devel/generated/statsmodels.nonparametric.kernel_density.KDEMultivariate.html>`_.
        By default, the bandwidth is set to 0.15nm which roughly corresponds to the vdw radius of MARTINI 2 beads.

    pca_component : scalar, default=0.9
        The number of components to keep. if ``0 < pca_component<1``, select the number of components such that the
        amount of variance that needs to be explained is greater than the percentage specified by n_components. It is used
        by `PCA <https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html>`_.

    score_weights : None or dict
        A dictionary that contains the weight for n_lipid_atoms, {idx_atom: weight}

    Returns
    ---------
    scores : numpy.ndarray, shape=(n_samples,)
        Scores for bound poses.

    See also
    --------
    pylipid.func.collect_bound_poses
        Collect bound poses from trajectories.
    pylipid.func.vectorize_poses
        Convert bound poses to distance vectors.

    """
    weights = {atom_idx: 1 for atom_idx in np.arange(np.shape(dist_matrix)[0])}
    if score_weights is not None:
        weights.update(score_weights)

    kde_funcs = {}
    try:
        for atom_idx in np.arange(np.shape(dist_matrix)[0]):
            transformed_data = PCA(n_components=pca_component).fit_transform(
                dist_matrix[atom_idx])
            var_type = ""
            bw = []
            for dummy in range(len(transformed_data[0])):
                var_type += "c"
                bw.append(kde_bw)
            kde_funcs[atom_idx] = kde(data=transformed_data,
                                      var_type=var_type,
                                      bw=bw)
        # evaluate binding poses
        scores = np.sum([
            weights[atom_idx] * kde_funcs[atom_idx].pdf()
            for atom_idx in np.arange(np.shape(dist_matrix)[0])
        ],
                        axis=0)
        return scores

    except ValueError:
        print(
            "Pose generation error -- possibly due to insufficient number of binding event."
        )