Example #1
0
def validate_input_arrays(predictions, measurements, uncertainties, prior_pops=None):
    """Check input data for correct shape and dtype

    Parameters
    ----------
    predictions : ndarray, shape = (num_frames, num_measurements)
        predictions[j, i] gives the ith observabled predicted at frame j
    measurements : ndarray, shape = (num_measurements)
        measurements[i] gives the ith experimental measurement
    uncertainties : ndarray, shape = (num_measurements)
        uncertainties[i] gives the uncertainty of the ith experiment
    prior_pops : ndarray, shape = (num_frames), optional
        Prior populations of each conformation.  If None, skip.
    
    Notes
    -----
    All inputs must have float64 type and compatible shapes.
    """
    num_frames, num_measurements = predictions.shape

    ensure_type(predictions, np.float64, 2, "predictions")
    ensure_type(measurements, np.float64, 1, "measurements", shape=(num_measurements,))
    ensure_type(uncertainties, np.float64, 1, "uncertainties", shape=(num_measurements,))

    if prior_pops is not None:
        ensure_type(prior_pops, np.float64, 1, "prior_pops", shape=(num_frames,))
Example #2
0
def test_ensure_type_25():
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")

        val = ensure_type(a, np.float64, 1, '', length=10, warn_on_cast=False)

        assert val.dtype == np.float64
        assert a.dtype == np.float32  # a should not be changed
        assert len(w) == 0  # no warning since we set warn_on_cast to False
Example #3
0
def test_ensure_type_2():
    with warnings.catch_warnings(record=True) as w:
        warnings.simplefilter("always")

        val = ensure_type(a, np.float64, 1, '', length=10)

        assert val.dtype == np.float64
        assert a.dtype == np.float32  # a should not be changed
        assert len(w) == 1
        assert issubclass(w[-1].category, TypeCastPerformanceWarning)
Example #4
0
def compute_translation_and_rotation(mobile, target):
    """Returns the translation and rotation mapping mobile onto target.

    Parameters
    ----------
    mobile : ndarray, shape = (n_atoms, 3)
        xyz coordinates of a `single` frame, to be aligned onto target.
    target : ndarray, shape = (n_atoms, 3)
        xyz coordinates of a `single` frame

    Returns
    -------
    translation : ndarray, shape=(3,)
        Difference between the centroids of the two conformations
    rotation : ndarray, shape=(3,3)
        Rotation matrix to apply to mobile to carry out the transformation.
    """

    ensure_type(mobile, 'float', 2, 'mobile', shape=(None, 3))
    ensure_type(target, 'float', 2, 'target', shape=(target.shape[0], 3))

    mu1 = mobile.mean(0)
    mu2 = target.mean(0)

    translation = mu2

    mobile = mobile - mu1
    target = target - mu2

    correlation_matrix = np.dot(np.transpose(mobile), target)
    V, S, W_tr = np.linalg.svd(correlation_matrix)
    is_reflection = (np.linalg.det(V) * np.linalg.det(W_tr)) < 0.0
    if is_reflection:
        V[:, -1] = -V[:, -1]
    rotation = np.dot(V, W_tr)

    return translation, rotation
Example #5
0
def compute_translation_and_rotation(mobile, target):
    """Returns the translation and rotation mapping mobile onto target.

    Parameters
    ----------
    mobile : ndarray, shape = (n_atoms, 3)
        xyz coordinates of a `single` frame, to be aligned onto target.
    target : ndarray, shape = (n_atoms, 3)
        xyz coordinates of a `single` frame

    Returns
    -------
    translation : ndarray, shape=(3,)
        Difference between the centroids of the two conformations
    rotation : ndarray, shape=(3,3)
        Rotation matrix to apply to mobile to carry out the transformation.
    """

    ensure_type(mobile, 'float', 2, 'mobile', warn_on_cast=False, shape=(None, 3))
    ensure_type(target, 'float', 2, 'target', warn_on_cast=False, shape=(target.shape[0], 3))

    mu1 = mobile.mean(0)
    mu2 = target.mean(0)

    translation = mu2

    mobile = mobile - mu1
    target = target - mu2

    correlation_matrix = np.dot(np.transpose(mobile), target)
    V, S, W_tr = np.linalg.svd(correlation_matrix)
    is_reflection = (np.linalg.det(V) * np.linalg.det(W_tr)) < 0.0
    if is_reflection:
        V[:, -1] = -V[:, -1]
    rotation = np.dot(V, W_tr)

    return translation, rotation
Example #6
0
def kmeans_mds(xyzlist, k=10, max_iters=100, max_time=10, threshold=1e-8, nearest_medoid=False):
    """k-means clustering with the RMSD distance metric.

    this is an iterative algorithm. during each iteration we first move each cluster center to
    the empirical average of the conformations currently assigned to it, and then we re-assign
    all of the conformations given the new locations of the centers.
    
    to compute the average conformations, we use a form of classical multidimensional
    scaling / principle coordinate analysis.
    """
    xyzlist = ensure_type(xyzlist, np.float32, 3, name='xyzlist', shape=(None, None, 3), warn_on_cast=False)
    
    # center
    for x in xyzlist:
        centroid = x.astype('float64').mean(0)
        assert centroid.shape == (3,)
        x -= centroid

    # setup for the rmsd calculation
    n_frames, n_atoms = xyzlist.shape[0:2]
    xyzlist_irmsd, n_atoms_padded = rmsd.reshape_irmsd(xyzlist)
    xyzlist_G = rmsd.calculate_G(xyzlist)
    
    # setup for the clustering stuff
    # assignments[i] = j means that the i-th conformation is assigned to the j-th cluster
    assignments = -1*np.ones(n_frames, dtype=np.int64)
    assignments[0:k] = np.arange(k)
    np.random.shuffle(assignments)
    
    # the j-th cluster has cartesian coorinates centers[j]
    centers = np.zeros((k, xyzlist.shape[1], 3))
    # assignment_dist[i] gives the RMSD between the ith conformation and its
    # cluster center
    assignment_dist = np.inf * np.ones(len(xyzlist))
    # previous value of the clustering score
    # all of the clustering scores
    scores = [np.inf]
    times = [time.time()]
    
    for n in itertools.count():
        # recenter each cluster based on its current members
        for i in range(k):
            structures = xyzlist[assignments == i, :, :]
            if len(structures) == 0:
                # if the current state has zero assignments, just randomly
                # select a structure for it
                print 'warning: cluster %5d contains zero structures, reseeding...' % i
                print '(if this error appears once or twice at the beginning and then goes away'
                print 'don\'t worry. but if it keeps up repeatedly, something is wrong)'
                new_center = xyzlist[np.random.randint(len(xyzlist))]
            else:
                medoid = average_structure(structures)
                medoid -= medoid.mean(0)
                if nearest_medoid:
                    # instead of actually using the raw MDS average structure, we choose
                    # the data point in xyzlist[assignments == i, :, :] that is closest,
                    # by RMSD, to this MDS structure.

                    # reshape the medoid for RMSD
                    medoid = medoid[np.newaxis, :, :]
                    medoid_g = rmsd.calculate_G(medoid)
                    medoid_irmsd, _ = rmsd.reshape_irmsd(medoid)

                    # actually compute the RMSDs
                    d = IRMSD.rmsd_one_to_all(medoid_irmsd, xyzlist_irmsd[assignments == i, :, :],
                        medoid_g, xyzlist_G[assignments == i, :, :], n_atoms, 0)

                    # choose the structure that was closest to be the medoid
                    medoid = xyzlist[assignments == i, :, :][np.argmin(d)]

            centers[i] = medoid
        
        # prepare the new centers for RMSD
        centers_G = rmsd.calculate_G(centers)
        centers_irmsd, _ = rmsd.reshape_irmsd(centers)
        
        # reassign all of the data
        assignments = -1 * np.ones(len(xyzlist))
        assignment_dist = np.inf * np.ones(len(xyzlist))
        for i in range(k):
            d = IRMSD.rmsd_one_to_all(centers_irmsd, xyzlist_irmsd, centers_G, xyzlist_G, n_atoms, i)
            where = d < assignment_dist
            assignments[where] = i
            assignment_dist[where] = d[where]

        # check how far each cluster center moved during the last iteration
        # and break if necessary
        scores.append(np.sqrt(np.mean(np.square(assignment_dist))))
        times.append(time.time())
        print 'round %3d, RMS radius %8f, change %.3e' % (n, scores[-1], scores[-1] - scores[-2])
        if threshold is not None and scores[-2] - scores[-1] < threshold:
            print 'score decreased less than threshold (%s). done\n' % threshold
            break
        if max_iters is not None and n >= max_iters:
            print 'reached maximum number of iterations. done\n'
            break
        if max_time is not None and times[-1] >= times[0] + max_time:
            print 'reached maximum amount of time. done\n'
            break
    
    print 'RMSD KMeans Performance Summary (py)'
    print '------------------------------------'
    print 'n frames: %d' % n_frames
    print 'n states: %d' % k
    print 'mean time per round (s)   %.4f' % np.mean(np.diff(times))
    print 'stddev time per round (s) %.4f' % np.std(np.diff(times))
    print 'total time (s)            %.4f' % (times[-1] - times[0])
    return centers, assignments, assignment_dist, np.array(scores), np.array(times)
Example #7
0
def test_ensure_type_7():
    c = ensure_type(a, np.float32, ndim=2, name='', add_newaxis_on_deficient_ndim=True)
    assert c.shape == (1, len(a))
Example #8
0
def test_ensure_type_6():
    val = ensure_type(b, np.float64, 2, '', shape=(10,10))
    assert val.flags.c_contiguous is True
Example #9
0
def test_ensure_type_5():
    ensure_type(a, np.float32, 1, '', length=11, can_be_none=True)
Example #10
0
def test_ensure_type_4():
    ensure_type(None, np.float64, 1, '', length=11, can_be_none=True)
Example #11
0
def test_ensure_type_8():
    c = ensure_type(np.zeros((5,10)), np.float32, ndim=2, name='', shape=(None, 10))
    assert c.shape == (5, 10)
Example #12
0
def _center(conformation):
    """Center the conformation"""
    ensure_type(conformation, 'float', 2, 'conformation', warn_on_cast=False, shape=(None, 3))
    centroid = np.mean(conformation, axis=0)
    centered = conformation - centroid
    return centered
Example #13
0
def test_ensure_type_12():
    ensure_type(np.zeros((2,2)), np.float32, ndim=3)
Example #14
0
def test_ensure_type_13():
    ensure_type(np.zeros((2,2)), np.float32, ndim=2, name='', shape=(None, None, None))
Example #15
0
def test_ensure_type_11():
    c = ensure_type(0, np.float32, ndim=1, name='', add_newaxis_on_deficient_ndim=True)
    assert c.shape == (1,)
Example #16
0
def test_ensure_type_10():
    c = ensure_type([0,1], np.float32, ndim=2, name='')
Example #17
0
def test_ensure_type_9():
    c = ensure_type(np.zeros((5,11)), np.float32, ndim=2, name='', shape=(None, 10))
Example #18
0
def _center(conformation):
    """Center the conformation"""
    ensure_type(conformation, 'float', 2, 'conformation', shape=(None, 3))
    centroid = np.mean(conformation, axis=0)
    centered = conformation - centroid
    return centered
Example #19
0
def test_ensure_type_3():
    ensure_type(a, np.float32, 1, '', length=11)
Example #20
0
def rmsd_qcp(conformation1, conformation2):
    """Compute the RMSD with Theobald's quaterion-based characteristic
    polynomial

    Rapid calculation of RMSDs using a quaternion-based characteristic polynomial.
    Acta Crystallogr A 61(4):478-480.

    Parameters
    ----------
    conformation1 : np.ndarray, shape=(n_atoms, 3)
        The cartesian coordinates of the first conformation
    conformation2 : np.ndarray, shape=(n_atoms, 3)
        The cartesian coordinates of the second conformation

    Returns
    -------
    rmsd : float
        The root-mean square deviation after alignment between the two pointsets
    """
    ensure_type(conformation1, np.float32, 2, 'conformation1', shape=(None, 3))
    ensure_type(conformation2, np.float32, 2, 'conformation2', shape=(conformation1.shape[0], 3))

    A = _center(conformation1)
    B = _center(conformation2)
    if not A.shape[0] == B.shape[0]:
        raise ValueError('conformation1 and conformation2 must have same number of atoms')
    n_atoms = len(A)

    # the inner product of the structures A and B
    G_A = np.einsum('ij,ij', A, A)
    G_B = np.einsum('ij,ij', B, B)
    # print 'GA', G_A, np.trace(np.dot(A.T, A))
    # print 'GB', G_B, np.trace(np.dot(B.T, B))

    # M is the inner product of the matrices A and B
    M = np.dot(B.T, A)

    # unpack the elements
    Sxx, Sxy, Sxz = M[0, :]
    Syx, Syy, Syz = M[1, :]
    Szx, Szy, Szz = M[2, :]

    # do some intermediate computations to assemble the characteristic
    # polynomial
    Sxx2 = Sxx * Sxx
    Syy2 = Syy * Syy
    Szz2 = Szz * Szz

    Sxy2 = Sxy * Sxy
    Syz2 = Syz * Syz
    Sxz2 = Sxz * Sxz

    Syx2 = Syx * Syx
    Szy2 = Szy * Szy
    Szx2 = Szx * Szx

    SyzSzymSyySzz2 = 2.0 * (Syz * Szy - Syy * Szz)
    Sxx2Syy2Szz2Syz2Szy2 = Syy2 + Szz2 - Sxx2 + Syz2 + Szy2

    # two of the coefficients
    C2 = -2.0 * (Sxx2 + Syy2 + Szz2 + Sxy2 + Syx2 + Sxz2 + Szx2 + Syz2 + Szy2)
    C1 = 8.0 * (Sxx * Syz * Szy + Syy * Szx * Sxz + Szz * Sxy * Syx - Sxx * Syy * Szz - Syz * Szx * Sxy - Szy * Syx * Sxz)

    SxzpSzx = Sxz + Szx
    SyzpSzy = Syz + Szy
    SxypSyx = Sxy + Syx
    SyzmSzy = Syz - Szy
    SxzmSzx = Sxz - Szx
    SxymSyx = Sxy - Syx
    SxxpSyy = Sxx + Syy
    SxxmSyy = Sxx - Syy
    Sxy2Sxz2Syx2Szx2 = Sxy2 + Sxz2 - Syx2 - Szx2

    # the other coefficient
    C0 = Sxy2Sxz2Syx2Szx2 * Sxy2Sxz2Syx2Szx2 \
        + (Sxx2Syy2Szz2Syz2Szy2 + SyzSzymSyySzz2) * (Sxx2Syy2Szz2Syz2Szy2 - SyzSzymSyySzz2) \
        + (-(SxzpSzx) * (SyzmSzy) + (SxymSyx) * (SxxmSyy - Szz)) * (-(SxzmSzx) * (SyzpSzy) + (SxymSyx) * (SxxmSyy + Szz)) \
        + (-(SxzpSzx) * (SyzpSzy) - (SxypSyx) * (SxxpSyy - Szz)) * (-(SxzmSzx) * (SyzmSzy) - (SxypSyx) * (SxxpSyy + Szz)) \
        + (+(SxypSyx) * (SyzpSzy) + (SxzpSzx) * (SxxmSyy + Szz)) * (-(SxymSyx) * (SyzmSzy) + (SxzpSzx) * (SxxpSyy + Szz)) \
        + (+(SxypSyx) * (SyzmSzy) + (SxzmSzx) * (SxxmSyy - Szz)) * (-(SxymSyx) * (SyzpSzy) + (SxzmSzx) * (SxxpSyy - Szz))

    E0 = (G_A + G_B) / 2.0
    f = lambda x: x ** 4.0 + C2 * x ** 2. + C1 * x + C0
    df = lambda x: 4 * x ** 3.0 + 2 * C2 * x + C1
    max_eigenvalue = scipy.optimize.newton(f, E0, df)
    rmsd = np.sqrt(np.abs(2.0 * (E0 - max_eigenvalue) / n_atoms))
    return rmsd
Example #21
0
def rmsd_qcp(conformation1, conformation2):
    """Compute the RMSD with Theobald's quaterion-based characteristic
    polynomial

    Rapid calculation of RMSDs using a quaternion-based characteristic polynomial.
    Acta Crystallogr A 61(4):478-480.

    Parameters
    ----------
    conformation1 : np.ndarray, shape=(n_atoms, 3)
        The cartesian coordinates of the first conformation
    conformation2 : np.ndarray, shape=(n_atoms, 3)
        The cartesian coordinates of the second conformation

    Returns
    -------
    rmsd : float
        The root-mean square deviation after alignment between the two pointsets
    """
    ensure_type(conformation1, np.float32, 2, 'conformation1', warn_on_cast=False, shape=(None, 3))
    ensure_type(conformation2, np.float32, 2, 'conformation2', warn_on_cast=False, shape=(conformation1.shape[0], 3))

    A = _center(conformation1)
    B = _center(conformation2)
    if not A.shape[0] == B.shape[0]:
        raise ValueError('conformation1 and conformation2 must have same number of atoms')
    n_atoms = len(A)

    # the inner product of the structures A and B
    G_A = np.einsum('ij,ij', A, A)
    G_B = np.einsum('ij,ij', B, B)
    # print 'GA', G_A, np.trace(np.dot(A.T, A))
    # print 'GB', G_B, np.trace(np.dot(B.T, B))

    # M is the inner product of the matrices A and B
    M = np.dot(B.T, A)

    # unpack the elements
    Sxx, Sxy, Sxz = M[0, :]
    Syx, Syy, Syz = M[1, :]
    Szx, Szy, Szz = M[2, :]

    # do some intermediate computations to assemble the characteristic
    # polynomial
    Sxx2 = Sxx * Sxx
    Syy2 = Syy * Syy
    Szz2 = Szz * Szz

    Sxy2 = Sxy * Sxy
    Syz2 = Syz * Syz
    Sxz2 = Sxz * Sxz

    Syx2 = Syx * Syx
    Szy2 = Szy * Szy
    Szx2 = Szx * Szx

    SyzSzymSyySzz2 = 2.0 * (Syz * Szy - Syy * Szz)
    Sxx2Syy2Szz2Syz2Szy2 = Syy2 + Szz2 - Sxx2 + Syz2 + Szy2

    # two of the coefficients
    C2 = -2.0 * (Sxx2 + Syy2 + Szz2 + Sxy2 + Syx2 + Sxz2 + Szx2 + Syz2 + Szy2)
    C1 = 8.0 * (Sxx * Syz * Szy + Syy * Szx * Sxz + Szz * Sxy * Syx - Sxx * Syy * Szz - Syz * Szx * Sxy - Szy * Syx * Sxz)

    SxzpSzx = Sxz + Szx
    SyzpSzy = Syz + Szy
    SxypSyx = Sxy + Syx
    SyzmSzy = Syz - Szy
    SxzmSzx = Sxz - Szx
    SxymSyx = Sxy - Syx
    SxxpSyy = Sxx + Syy
    SxxmSyy = Sxx - Syy
    Sxy2Sxz2Syx2Szx2 = Sxy2 + Sxz2 - Syx2 - Szx2

    # the other coefficient
    C0 = Sxy2Sxz2Syx2Szx2 * Sxy2Sxz2Syx2Szx2 \
        + (Sxx2Syy2Szz2Syz2Szy2 + SyzSzymSyySzz2) * (Sxx2Syy2Szz2Syz2Szy2 - SyzSzymSyySzz2) \
        + (-(SxzpSzx) * (SyzmSzy) + (SxymSyx) * (SxxmSyy - Szz)) * (-(SxzmSzx) * (SyzpSzy) + (SxymSyx) * (SxxmSyy + Szz)) \
        + (-(SxzpSzx) * (SyzpSzy) - (SxypSyx) * (SxxpSyy - Szz)) * (-(SxzmSzx) * (SyzmSzy) - (SxypSyx) * (SxxpSyy + Szz)) \
        + (+(SxypSyx) * (SyzpSzy) + (SxzpSzx) * (SxxmSyy + Szz)) * (-(SxymSyx) * (SyzmSzy) + (SxzpSzx) * (SxxpSyy + Szz)) \
        + (+(SxypSyx) * (SyzmSzy) + (SxzmSzx) * (SxxmSyy - Szz)) * (-(SxymSyx) * (SyzpSzy) + (SxzmSzx) * (SxxpSyy - Szz))

    E0 = (G_A + G_B) / 2.0
    f = lambda x: x ** 4.0 + C2 * x ** 2. + C1 * x + C0
    df = lambda x: 4 * x ** 3.0 + 2 * C2 * x + C1
    max_eigenvalue = scipy.optimize.newton(f, E0, df)
    rmsd = np.sqrt(np.abs(2.0 * (E0 - max_eigenvalue) / n_atoms))
    return rmsd