Beispiel #1
0
    def partial_transform(self, traj):
        """Featurize an MD trajectory into a vector space via distance
        after superposition

        Parameters
        ----------
        traj : mdtraj.Trajectory
            A molecular dynamics trajectory to featurize.

        Returns
        -------
        features : np.ndarray, shape=(n_frames, n_ref_frames)
            The RMSD value of each frame of the input trajectory to be
            featurized versus each frame in the reference trajectory. The
            number of features is the number of reference frames.

        See Also
        --------
        transform : simultaneously featurize a collection of MD trajectories
        """
        if self.atom_indices is not None:
            sliced_traj = traj.atom_slice(self.atom_indices)
        else:
            sliced_traj = traj
        result = libdistance.cdist(sliced_traj, self.sliced_reference_traj,
                                   'rmsd')
        return result
Beispiel #2
0
    def partial_transform(self, traj):
        """Featurize an MD trajectory into a vector space via distance
        after superposition

        Parameters
        ----------
        traj : mdtraj.Trajectory
            A molecular dynamics trajectory to featurize.

        Returns
        -------
        features : np.ndarray, dtype=float, shape=(n_samples, n_features)
            A featurized trajectory is a 2D array of shape
            `(length_of_trajectory x n_features)` where each `features[i]`
            vector is computed by applying the featurization function
            to the `i`th snapshot of the input trajectory.

        See Also
        --------
        transform : simultaneously featurize a collection of MD trajectories
        """
        if self.atom_indices is not None:
            sliced_traj = traj.atom_slice(self.atom_indices)
        else:
            sliced_traj = traj
        result = libdistance.cdist(sliced_traj, self.sliced_reference_traj,
                                   'rmsd')
        return result
Beispiel #3
0
    def partial_transform(self, traj):
        """Featurize an MD trajectory into a vector space via distance
        after superposition

        Parameters
        ----------
        traj : mdtraj.Trajectory
            A molecular dynamics trajectory to featurize.

        Returns
        -------
        features : np.ndarray, shape=(n_frames, n_ref_frames)
            The RMSD value of each frame of the input trajectory to be
            featurized versus each frame in the reference trajectory. The
            number of features is the number of reference frames.

        See Also
        --------
        transform : simultaneously featurize a collection of MD trajectories
        """
        if self.atom_indices is not None:
            sliced_traj = traj.atom_slice(self.atom_indices)
        else:
            sliced_traj = traj
        result = libdistance.cdist(
            sliced_traj, self.sliced_reference_traj, 'rmsd'
        )
        return result
Beispiel #4
0
    def partial_transform(self, traj):
        """Featurize an MD trajectory into a vector space via distance
        after superposition

        Parameters
        ----------
        traj : mdtraj.Trajectory
            A molecular dynamics trajectory to featurize.

        Returns
        -------
        features : np.ndarray, dtype=float, shape=(n_samples, n_features)
            A featurized trajectory is a 2D array of shape
            `(length_of_trajectory x n_features)` where each `features[i]`
            vector is computed by applying the featurization function
            to the `i`th snapshot of the input trajectory.

        See Also
        --------
        transform : simultaneously featurize a collection of MD trajectories
        """
        if self.atom_indices is not None:
            sliced_traj = traj.atom_slice(self.atom_indices)
        else:
            sliced_traj = traj
        result = libdistance.cdist(sliced_traj, self.sliced_reference_traj,
                                   'rmsd')
        return result
Beispiel #5
0
def cdist(XA, XB, metric='euclidean'):
    if isinstance(metric, six.string_types):
        return libdistance.cdist(XA, XB, metric)

    nA, nB = len(XA), len(XB)
    d = np.empty((nA, nB))
    for i in range(nA):
        d[i, :] = metric(XB, XA, i)
    return d
Beispiel #6
0
def test_dist_double_float_1():
    # test without X_indices
    for metric in VECTOR_METRICS:
        for X, Y in ((X_double, Y_double), (X_float, Y_float)):
            dist_1 = dist(X, Y[0], metric)
            dist_2 = cdist(X, Y, metric)[:, 0]
            yield lambda: np.testing.assert_almost_equal(
                    dist_1, dist_2,
                    decimal=5 if X.dtype == np.float32 else 10)
Beispiel #7
0
def cdist(XA, XB, metric='euclidean'):
    if isinstance(metric, six.string_types):
        return libdistance.cdist(XA, XB, metric)

    nA, nB = len(XA), len(XB)
    d = np.empty((nA, nB))
    for i in range(nA):
        d[i, :] = metric(XB, XA, i)
    return d
Beispiel #8
0
def test_cdist_double_float_1():
    # test without X_indices
    for metric in VECTOR_METRICS:
        for X, Y in ((X_double, Y_double), (X_float, Y_float)):
            cdist_1 = cdist(X, Y, metric)
            cdist_2 = scipy.spatial.distance.cdist(X, Y, metric)
            yield lambda: np.testing.assert_almost_equal(
                    cdist_1,
                    cdist_2,
                    decimal=5 if X.dtype == np.float32 else 10)
Beispiel #9
0
def gromos():
    trajs
    rmsdmax = -9999
    centers = {}
    for traj0 in trajs:
        for traj1 in trajs:
            rmsd = libdistance.cdist(traj0, traj1, metric="rmsd")
            x, y, v = maxmatrix(rmsd)
            if v >= rmsdmax:
                pass
Beispiel #10
0
def test_cdist_double_float_1():
    # test without X_indices
    for metric in VECTOR_METRICS:
        for X, Y in ((X_double, Y_double), (X_float, Y_float)):
            cdist_1 = cdist(X, Y, metric)
            cdist_2 = scipy.spatial.distance.cdist(X, Y, metric)
            yield lambda : np.testing.assert_almost_equal(
                cdist_1,
                cdist_2,
                decimal=5 if X.dtype == np.float32 else 10)
Beispiel #11
0
def test_assign_nearest_rmsd_1():
    # rmsd assign nearest without X_indices
    assignments, inertia = assign_nearest(X_rmsd, Y_rmsd, "rmsd")
    assert isinstance(assignments, np.ndarray)
    assert isinstance(inertia, float)

    cdist_rmsd = cdist(X_rmsd, Y_rmsd, 'rmsd')
    assert cdist_rmsd.shape == (10, 3)

    np.testing.assert_array_equal(
        assignments,
        cdist_rmsd.argmin(axis=1))

    np.testing.assert_almost_equal(
        inertia,
        cdist_rmsd[np.arange(10), assignments].sum(),
        decimal=6)
Beispiel #12
0
def test_assign_nearest_rmsd_1():
    # rmsd assign nearest without X_indices
    assignments, inertia = assign_nearest(X_rmsd, Y_rmsd, "rmsd")
    assert isinstance(assignments, np.ndarray)
    assert isinstance(inertia, float)

    cdist_rmsd = cdist(X_rmsd, Y_rmsd, 'rmsd')
    assert cdist_rmsd.shape == (10, 3)

    np.testing.assert_array_equal(
            assignments,
            cdist_rmsd.argmin(axis=1))

    np.testing.assert_almost_equal(
            inertia,
            cdist_rmsd[np.arange(10), assignments].sum(),
            decimal=6)
Beispiel #13
0
def test_assign_nearest_rmsd_2():
    # rmsd assign nearest with X_indices
    assignments, inertia = assign_nearest(X_rmsd, Y_rmsd, "rmsd", X_indices)
    assert isinstance(assignments, np.ndarray)
    assert isinstance(inertia, float)

    cdist_rmsd = cdist(X_rmsd, Y_rmsd, 'rmsd')
    cdist_rmsd = cdist_rmsd[X_indices].astype(np.double)
    assert cdist_rmsd.shape == (5, 3)

    np.testing.assert_array_equal(
        assignments,
        cdist_rmsd.argmin(axis=1))

    np.testing.assert_almost_equal(
        inertia,
        cdist_rmsd[np.arange(5), assignments].sum(),
        decimal=5)
Beispiel #14
0
def test_assign_nearest_rmsd_2():
    # rmsd assign nearest with X_indices
    assignments, inertia = assign_nearest(X_rmsd, Y_rmsd, "rmsd", X_indices)
    assert isinstance(assignments, np.ndarray)
    assert isinstance(inertia, float)

    cdist_rmsd = cdist(X_rmsd, Y_rmsd, 'rmsd')
    cdist_rmsd = cdist_rmsd[X_indices].astype(np.double)
    assert cdist_rmsd.shape == (5, 3)

    np.testing.assert_array_equal(
            assignments,
            cdist_rmsd.argmin(axis=1))

    np.testing.assert_almost_equal(
            inertia,
            cdist_rmsd[np.arange(5), assignments].sum(),
            decimal=5)
Beispiel #15
0
def test_canberra_32_2():
    for i in range(10):
        X = random.randn(10,2).astype(np.float32)
        Y = X[[0,1,2], :]
        X_indices = random.random_integers(low=0, high=9, size=5).astype(np.intp)

        assignments, inertia = assign_nearest(X, Y, 'canberra', X_indices=X_indices)
        cdist_can = cdist(X[X_indices], Y, metric='canberra')
        ref = cdist_can.argmin(axis=1)
        if not np.all(ref == assignments):
            different = np.where(assignments != ref)[0]
            row = cdist_can[different, :]

            # if there are differences between assignments and the 'reference',
            # make sure that there is actually some difference between the
            # entries in that row of the distance matrix before throwing
            # an error
            if not np.all(row==row[0]):
                assert False
Beispiel #16
0
def test_assign_nearest_float_double_2():
    # test with X_indices

    for metric in VECTOR_METRICS:
        for X, Y in ((X_double, Y_double), (X_float, Y_float)):
            if metric == 'canberra' and X.dtype == np.float32:
                # this is tested separately
                continue

            assignments, inertia = assign_nearest(X, Y, metric, X_indices)
            assert isinstance(assignments, np.ndarray)
            assert isinstance(inertia, float)

            cdist_1 = cdist(X[X_indices], Y, metric=metric)
            yield lambda: np.testing.assert_array_equal(
                assignments,
                cdist_1.argmin(axis=1))
            yield lambda: np.testing.assert_almost_equal(
                inertia,
                cdist_1[np.arange(5), assignments].sum(),
                decimal=5 if X.dtype == np.float32 else 10)
Beispiel #17
0
def test_canberra_32_2():
    for i in range(10):
        X = random.randn(10, 2).astype(np.float32)
        Y = X[[0, 1, 2], :]
        X_indices = (random.random_integers(low=0, high=9, size=5)
                     .astype(np.intp))

        assignments, inertia = assign_nearest(X, Y, 'canberra',
                                              X_indices=X_indices)
        cdist_can = cdist(X[X_indices], Y, metric='canberra')
        ref = cdist_can.argmin(axis=1)
        if not np.all(ref == assignments):
            different = np.where(assignments != ref)[0]
            row = cdist_can[different, :]

            # if there are differences between assignments and the 'reference',
            # make sure that there is actually some difference between the
            # entries in that row of the distance matrix before throwing
            # an error
            if not np.all(row == row[0]):
                assert False
Beispiel #18
0
def test_assign_nearest_float_double_2():
    # test with X_indices

    for metric in VECTOR_METRICS:
        for X, Y in ((X_double, Y_double), (X_float, Y_float)):
            if metric == 'canberra' and X.dtype == np.float32:
                # this is tested separately
                continue

            assignments, inertia = assign_nearest(X, Y, metric, X_indices)
            assert isinstance(assignments, np.ndarray)
            assert isinstance(inertia, float)

            cdist_1 = cdist(X[X_indices], Y, metric=metric)
            yield lambda: np.testing.assert_array_equal(
                    assignments,
                    cdist_1.argmin(axis=1))
            yield lambda: np.testing.assert_almost_equal(
                    inertia,
                    cdist_1[np.arange(5), assignments].sum(),
                    decimal=5 if X.dtype == np.float32 else 10)
def cal_rmsdmatrix(xtcf0, xtcf1, top, sel, dt=1, outfname=None):
    if not outfname:
        fname0, fname1 = os.path.split(xtcf0)[1].split(".")[0], os.path.split(
            xtcf1)[1].split(".")[0]
        outfname = "%s_%s" % (fname0, fname1)
    top = md.load_pdb(top)
    ndx = top.top.select(sel)
    xtc0 = md.load_xtc(xtcf0, top=top, atom_indices=ndx, stride=dt)
    if xtcf0 == xtcf1:
        xtc1 = xtc0
    else:
        xtc1 = md.load_xtc(xtcf1, top=top, atom_indices=ndx, stride=dt)
    rmsd = libdistance.cdist(xtc0, xtc1, metric="rmsd")
    rmsd = np.triu(rmsd)
    #np.save(outfname+".npy", rmsd)
    #from scipy import sparse
    #b = sparse.csr_matrix(rmsd)
    #sparse.save_npz('b_compressed.npz', b, True)
    x, y, v = maxmatrix(rmsd)
    np.savez_compressed(outfname + '.npz', a=rmsd, b=np.array([x, y, v]))
    return rmsd, v, x, y
Beispiel #20
0
def test_canberra_32_1():
    # with canberra in float32, there is a rounding issue where many of
    # the distances come out exactly the same, but due to finite floating
    # point resolution, a different one gets picked than by argmin()
    # on the cdist
    for i in range(10):
        X = random.randn(10,2).astype(np.float32)
        Y = X[[0,1,2], :]

        assignments, inertia = assign_nearest(X, Y, 'canberra')
        cdist_can = cdist(X, Y, metric='canberra')
        ref = cdist_can.argmin(axis=1)
        if not np.all(ref == assignments):
            different = np.where(assignments != ref)[0]
            row = cdist_can[different, :]

            # if there are differences between assignments and the 'reference',
            # make sure that there is actually some difference between the
            # entries in that row of the distance matrix before throwing
            # an error
            if not np.all(row==row[0]):
                assert False
Beispiel #21
0
def test_canberra_32_1():
    # with canberra in float32, there is a rounding issue where many of
    # the distances come out exactly the same, but due to finite floating
    # point resolution, a different one gets picked than by argmin()
    # on the cdist
    for i in range(10):
        X = random.randn(10, 2).astype(np.float32)
        Y = X[[0, 1, 2], :]

        assignments, inertia = assign_nearest(X, Y, 'canberra')
        cdist_can = cdist(X, Y, metric='canberra')
        ref = cdist_can.argmin(axis=1)
        if not np.all(ref == assignments):
            different = np.where(assignments != ref)[0]
            row = cdist_can[different, :]

            # if there are differences between assignments and the 'reference',
            # make sure that there is actually some difference between the
            # entries in that row of the distance matrix before throwing
            # an error
            if not np.all(row == row[0]):
                assert False
Beispiel #22
0
def test_cdist_rmsd_1():
    got = cdist(X_rmsd, Y_rmsd, "rmsd")
    all2all = np.array([md.rmsd(X_rmsd, Y_rmsd[i], precentered=True) for i in range(len(Y_rmsd))]).T
    np.testing.assert_almost_equal(got, all2all, decimal=5)
Beispiel #23
0
def test_cdist_rmsd_1():
    got = cdist(X_rmsd, Y_rmsd, "rmsd")
    all2all = np.array([md.rmsd(X_rmsd, Y_rmsd[i], precentered=True)
                        for i in range(len(Y_rmsd))]).T
    np.testing.assert_almost_equal(got, all2all, decimal=5)