def validate_input_arrays(predictions, measurements, uncertainties, prior_pops=None): """Check input data for correct shape and dtype Parameters ---------- predictions : ndarray, shape = (num_frames, num_measurements) predictions[j, i] gives the ith observabled predicted at frame j measurements : ndarray, shape = (num_measurements) measurements[i] gives the ith experimental measurement uncertainties : ndarray, shape = (num_measurements) uncertainties[i] gives the uncertainty of the ith experiment prior_pops : ndarray, shape = (num_frames), optional Prior populations of each conformation. If None, skip. Notes ----- All inputs must have float64 type and compatible shapes. """ num_frames, num_measurements = predictions.shape ensure_type(predictions, np.float64, 2, "predictions") ensure_type(measurements, np.float64, 1, "measurements", shape=(num_measurements,)) ensure_type(uncertainties, np.float64, 1, "uncertainties", shape=(num_measurements,)) if prior_pops is not None: ensure_type(prior_pops, np.float64, 1, "prior_pops", shape=(num_frames,))
def test_ensure_type_25(): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") val = ensure_type(a, np.float64, 1, '', length=10, warn_on_cast=False) assert val.dtype == np.float64 assert a.dtype == np.float32 # a should not be changed assert len(w) == 0 # no warning since we set warn_on_cast to False
def test_ensure_type_2(): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") val = ensure_type(a, np.float64, 1, '', length=10) assert val.dtype == np.float64 assert a.dtype == np.float32 # a should not be changed assert len(w) == 1 assert issubclass(w[-1].category, TypeCastPerformanceWarning)
def compute_translation_and_rotation(mobile, target): """Returns the translation and rotation mapping mobile onto target. Parameters ---------- mobile : ndarray, shape = (n_atoms, 3) xyz coordinates of a `single` frame, to be aligned onto target. target : ndarray, shape = (n_atoms, 3) xyz coordinates of a `single` frame Returns ------- translation : ndarray, shape=(3,) Difference between the centroids of the two conformations rotation : ndarray, shape=(3,3) Rotation matrix to apply to mobile to carry out the transformation. """ ensure_type(mobile, 'float', 2, 'mobile', shape=(None, 3)) ensure_type(target, 'float', 2, 'target', shape=(target.shape[0], 3)) mu1 = mobile.mean(0) mu2 = target.mean(0) translation = mu2 mobile = mobile - mu1 target = target - mu2 correlation_matrix = np.dot(np.transpose(mobile), target) V, S, W_tr = np.linalg.svd(correlation_matrix) is_reflection = (np.linalg.det(V) * np.linalg.det(W_tr)) < 0.0 if is_reflection: V[:, -1] = -V[:, -1] rotation = np.dot(V, W_tr) return translation, rotation
def compute_translation_and_rotation(mobile, target): """Returns the translation and rotation mapping mobile onto target. Parameters ---------- mobile : ndarray, shape = (n_atoms, 3) xyz coordinates of a `single` frame, to be aligned onto target. target : ndarray, shape = (n_atoms, 3) xyz coordinates of a `single` frame Returns ------- translation : ndarray, shape=(3,) Difference between the centroids of the two conformations rotation : ndarray, shape=(3,3) Rotation matrix to apply to mobile to carry out the transformation. """ ensure_type(mobile, 'float', 2, 'mobile', warn_on_cast=False, shape=(None, 3)) ensure_type(target, 'float', 2, 'target', warn_on_cast=False, shape=(target.shape[0], 3)) mu1 = mobile.mean(0) mu2 = target.mean(0) translation = mu2 mobile = mobile - mu1 target = target - mu2 correlation_matrix = np.dot(np.transpose(mobile), target) V, S, W_tr = np.linalg.svd(correlation_matrix) is_reflection = (np.linalg.det(V) * np.linalg.det(W_tr)) < 0.0 if is_reflection: V[:, -1] = -V[:, -1] rotation = np.dot(V, W_tr) return translation, rotation
def kmeans_mds(xyzlist, k=10, max_iters=100, max_time=10, threshold=1e-8, nearest_medoid=False): """k-means clustering with the RMSD distance metric. this is an iterative algorithm. during each iteration we first move each cluster center to the empirical average of the conformations currently assigned to it, and then we re-assign all of the conformations given the new locations of the centers. to compute the average conformations, we use a form of classical multidimensional scaling / principle coordinate analysis. """ xyzlist = ensure_type(xyzlist, np.float32, 3, name='xyzlist', shape=(None, None, 3), warn_on_cast=False) # center for x in xyzlist: centroid = x.astype('float64').mean(0) assert centroid.shape == (3,) x -= centroid # setup for the rmsd calculation n_frames, n_atoms = xyzlist.shape[0:2] xyzlist_irmsd, n_atoms_padded = rmsd.reshape_irmsd(xyzlist) xyzlist_G = rmsd.calculate_G(xyzlist) # setup for the clustering stuff # assignments[i] = j means that the i-th conformation is assigned to the j-th cluster assignments = -1*np.ones(n_frames, dtype=np.int64) assignments[0:k] = np.arange(k) np.random.shuffle(assignments) # the j-th cluster has cartesian coorinates centers[j] centers = np.zeros((k, xyzlist.shape[1], 3)) # assignment_dist[i] gives the RMSD between the ith conformation and its # cluster center assignment_dist = np.inf * np.ones(len(xyzlist)) # previous value of the clustering score # all of the clustering scores scores = [np.inf] times = [time.time()] for n in itertools.count(): # recenter each cluster based on its current members for i in range(k): structures = xyzlist[assignments == i, :, :] if len(structures) == 0: # if the current state has zero assignments, just randomly # select a structure for it print 'warning: cluster %5d contains zero structures, reseeding...' % i print '(if this error appears once or twice at the beginning and then goes away' print 'don\'t worry. but if it keeps up repeatedly, something is wrong)' new_center = xyzlist[np.random.randint(len(xyzlist))] else: medoid = average_structure(structures) medoid -= medoid.mean(0) if nearest_medoid: # instead of actually using the raw MDS average structure, we choose # the data point in xyzlist[assignments == i, :, :] that is closest, # by RMSD, to this MDS structure. # reshape the medoid for RMSD medoid = medoid[np.newaxis, :, :] medoid_g = rmsd.calculate_G(medoid) medoid_irmsd, _ = rmsd.reshape_irmsd(medoid) # actually compute the RMSDs d = IRMSD.rmsd_one_to_all(medoid_irmsd, xyzlist_irmsd[assignments == i, :, :], medoid_g, xyzlist_G[assignments == i, :, :], n_atoms, 0) # choose the structure that was closest to be the medoid medoid = xyzlist[assignments == i, :, :][np.argmin(d)] centers[i] = medoid # prepare the new centers for RMSD centers_G = rmsd.calculate_G(centers) centers_irmsd, _ = rmsd.reshape_irmsd(centers) # reassign all of the data assignments = -1 * np.ones(len(xyzlist)) assignment_dist = np.inf * np.ones(len(xyzlist)) for i in range(k): d = IRMSD.rmsd_one_to_all(centers_irmsd, xyzlist_irmsd, centers_G, xyzlist_G, n_atoms, i) where = d < assignment_dist assignments[where] = i assignment_dist[where] = d[where] # check how far each cluster center moved during the last iteration # and break if necessary scores.append(np.sqrt(np.mean(np.square(assignment_dist)))) times.append(time.time()) print 'round %3d, RMS radius %8f, change %.3e' % (n, scores[-1], scores[-1] - scores[-2]) if threshold is not None and scores[-2] - scores[-1] < threshold: print 'score decreased less than threshold (%s). done\n' % threshold break if max_iters is not None and n >= max_iters: print 'reached maximum number of iterations. done\n' break if max_time is not None and times[-1] >= times[0] + max_time: print 'reached maximum amount of time. done\n' break print 'RMSD KMeans Performance Summary (py)' print '------------------------------------' print 'n frames: %d' % n_frames print 'n states: %d' % k print 'mean time per round (s) %.4f' % np.mean(np.diff(times)) print 'stddev time per round (s) %.4f' % np.std(np.diff(times)) print 'total time (s) %.4f' % (times[-1] - times[0]) return centers, assignments, assignment_dist, np.array(scores), np.array(times)
def test_ensure_type_7(): c = ensure_type(a, np.float32, ndim=2, name='', add_newaxis_on_deficient_ndim=True) assert c.shape == (1, len(a))
def test_ensure_type_6(): val = ensure_type(b, np.float64, 2, '', shape=(10,10)) assert val.flags.c_contiguous is True
def test_ensure_type_5(): ensure_type(a, np.float32, 1, '', length=11, can_be_none=True)
def test_ensure_type_4(): ensure_type(None, np.float64, 1, '', length=11, can_be_none=True)
def test_ensure_type_8(): c = ensure_type(np.zeros((5,10)), np.float32, ndim=2, name='', shape=(None, 10)) assert c.shape == (5, 10)
def _center(conformation): """Center the conformation""" ensure_type(conformation, 'float', 2, 'conformation', warn_on_cast=False, shape=(None, 3)) centroid = np.mean(conformation, axis=0) centered = conformation - centroid return centered
def test_ensure_type_12(): ensure_type(np.zeros((2,2)), np.float32, ndim=3)
def test_ensure_type_13(): ensure_type(np.zeros((2,2)), np.float32, ndim=2, name='', shape=(None, None, None))
def test_ensure_type_11(): c = ensure_type(0, np.float32, ndim=1, name='', add_newaxis_on_deficient_ndim=True) assert c.shape == (1,)
def test_ensure_type_10(): c = ensure_type([0,1], np.float32, ndim=2, name='')
def test_ensure_type_9(): c = ensure_type(np.zeros((5,11)), np.float32, ndim=2, name='', shape=(None, 10))
def _center(conformation): """Center the conformation""" ensure_type(conformation, 'float', 2, 'conformation', shape=(None, 3)) centroid = np.mean(conformation, axis=0) centered = conformation - centroid return centered
def test_ensure_type_3(): ensure_type(a, np.float32, 1, '', length=11)
def rmsd_qcp(conformation1, conformation2): """Compute the RMSD with Theobald's quaterion-based characteristic polynomial Rapid calculation of RMSDs using a quaternion-based characteristic polynomial. Acta Crystallogr A 61(4):478-480. Parameters ---------- conformation1 : np.ndarray, shape=(n_atoms, 3) The cartesian coordinates of the first conformation conformation2 : np.ndarray, shape=(n_atoms, 3) The cartesian coordinates of the second conformation Returns ------- rmsd : float The root-mean square deviation after alignment between the two pointsets """ ensure_type(conformation1, np.float32, 2, 'conformation1', shape=(None, 3)) ensure_type(conformation2, np.float32, 2, 'conformation2', shape=(conformation1.shape[0], 3)) A = _center(conformation1) B = _center(conformation2) if not A.shape[0] == B.shape[0]: raise ValueError('conformation1 and conformation2 must have same number of atoms') n_atoms = len(A) # the inner product of the structures A and B G_A = np.einsum('ij,ij', A, A) G_B = np.einsum('ij,ij', B, B) # print 'GA', G_A, np.trace(np.dot(A.T, A)) # print 'GB', G_B, np.trace(np.dot(B.T, B)) # M is the inner product of the matrices A and B M = np.dot(B.T, A) # unpack the elements Sxx, Sxy, Sxz = M[0, :] Syx, Syy, Syz = M[1, :] Szx, Szy, Szz = M[2, :] # do some intermediate computations to assemble the characteristic # polynomial Sxx2 = Sxx * Sxx Syy2 = Syy * Syy Szz2 = Szz * Szz Sxy2 = Sxy * Sxy Syz2 = Syz * Syz Sxz2 = Sxz * Sxz Syx2 = Syx * Syx Szy2 = Szy * Szy Szx2 = Szx * Szx SyzSzymSyySzz2 = 2.0 * (Syz * Szy - Syy * Szz) Sxx2Syy2Szz2Syz2Szy2 = Syy2 + Szz2 - Sxx2 + Syz2 + Szy2 # two of the coefficients C2 = -2.0 * (Sxx2 + Syy2 + Szz2 + Sxy2 + Syx2 + Sxz2 + Szx2 + Syz2 + Szy2) C1 = 8.0 * (Sxx * Syz * Szy + Syy * Szx * Sxz + Szz * Sxy * Syx - Sxx * Syy * Szz - Syz * Szx * Sxy - Szy * Syx * Sxz) SxzpSzx = Sxz + Szx SyzpSzy = Syz + Szy SxypSyx = Sxy + Syx SyzmSzy = Syz - Szy SxzmSzx = Sxz - Szx SxymSyx = Sxy - Syx SxxpSyy = Sxx + Syy SxxmSyy = Sxx - Syy Sxy2Sxz2Syx2Szx2 = Sxy2 + Sxz2 - Syx2 - Szx2 # the other coefficient C0 = Sxy2Sxz2Syx2Szx2 * Sxy2Sxz2Syx2Szx2 \ + (Sxx2Syy2Szz2Syz2Szy2 + SyzSzymSyySzz2) * (Sxx2Syy2Szz2Syz2Szy2 - SyzSzymSyySzz2) \ + (-(SxzpSzx) * (SyzmSzy) + (SxymSyx) * (SxxmSyy - Szz)) * (-(SxzmSzx) * (SyzpSzy) + (SxymSyx) * (SxxmSyy + Szz)) \ + (-(SxzpSzx) * (SyzpSzy) - (SxypSyx) * (SxxpSyy - Szz)) * (-(SxzmSzx) * (SyzmSzy) - (SxypSyx) * (SxxpSyy + Szz)) \ + (+(SxypSyx) * (SyzpSzy) + (SxzpSzx) * (SxxmSyy + Szz)) * (-(SxymSyx) * (SyzmSzy) + (SxzpSzx) * (SxxpSyy + Szz)) \ + (+(SxypSyx) * (SyzmSzy) + (SxzmSzx) * (SxxmSyy - Szz)) * (-(SxymSyx) * (SyzpSzy) + (SxzmSzx) * (SxxpSyy - Szz)) E0 = (G_A + G_B) / 2.0 f = lambda x: x ** 4.0 + C2 * x ** 2. + C1 * x + C0 df = lambda x: 4 * x ** 3.0 + 2 * C2 * x + C1 max_eigenvalue = scipy.optimize.newton(f, E0, df) rmsd = np.sqrt(np.abs(2.0 * (E0 - max_eigenvalue) / n_atoms)) return rmsd
def rmsd_qcp(conformation1, conformation2): """Compute the RMSD with Theobald's quaterion-based characteristic polynomial Rapid calculation of RMSDs using a quaternion-based characteristic polynomial. Acta Crystallogr A 61(4):478-480. Parameters ---------- conformation1 : np.ndarray, shape=(n_atoms, 3) The cartesian coordinates of the first conformation conformation2 : np.ndarray, shape=(n_atoms, 3) The cartesian coordinates of the second conformation Returns ------- rmsd : float The root-mean square deviation after alignment between the two pointsets """ ensure_type(conformation1, np.float32, 2, 'conformation1', warn_on_cast=False, shape=(None, 3)) ensure_type(conformation2, np.float32, 2, 'conformation2', warn_on_cast=False, shape=(conformation1.shape[0], 3)) A = _center(conformation1) B = _center(conformation2) if not A.shape[0] == B.shape[0]: raise ValueError('conformation1 and conformation2 must have same number of atoms') n_atoms = len(A) # the inner product of the structures A and B G_A = np.einsum('ij,ij', A, A) G_B = np.einsum('ij,ij', B, B) # print 'GA', G_A, np.trace(np.dot(A.T, A)) # print 'GB', G_B, np.trace(np.dot(B.T, B)) # M is the inner product of the matrices A and B M = np.dot(B.T, A) # unpack the elements Sxx, Sxy, Sxz = M[0, :] Syx, Syy, Syz = M[1, :] Szx, Szy, Szz = M[2, :] # do some intermediate computations to assemble the characteristic # polynomial Sxx2 = Sxx * Sxx Syy2 = Syy * Syy Szz2 = Szz * Szz Sxy2 = Sxy * Sxy Syz2 = Syz * Syz Sxz2 = Sxz * Sxz Syx2 = Syx * Syx Szy2 = Szy * Szy Szx2 = Szx * Szx SyzSzymSyySzz2 = 2.0 * (Syz * Szy - Syy * Szz) Sxx2Syy2Szz2Syz2Szy2 = Syy2 + Szz2 - Sxx2 + Syz2 + Szy2 # two of the coefficients C2 = -2.0 * (Sxx2 + Syy2 + Szz2 + Sxy2 + Syx2 + Sxz2 + Szx2 + Syz2 + Szy2) C1 = 8.0 * (Sxx * Syz * Szy + Syy * Szx * Sxz + Szz * Sxy * Syx - Sxx * Syy * Szz - Syz * Szx * Sxy - Szy * Syx * Sxz) SxzpSzx = Sxz + Szx SyzpSzy = Syz + Szy SxypSyx = Sxy + Syx SyzmSzy = Syz - Szy SxzmSzx = Sxz - Szx SxymSyx = Sxy - Syx SxxpSyy = Sxx + Syy SxxmSyy = Sxx - Syy Sxy2Sxz2Syx2Szx2 = Sxy2 + Sxz2 - Syx2 - Szx2 # the other coefficient C0 = Sxy2Sxz2Syx2Szx2 * Sxy2Sxz2Syx2Szx2 \ + (Sxx2Syy2Szz2Syz2Szy2 + SyzSzymSyySzz2) * (Sxx2Syy2Szz2Syz2Szy2 - SyzSzymSyySzz2) \ + (-(SxzpSzx) * (SyzmSzy) + (SxymSyx) * (SxxmSyy - Szz)) * (-(SxzmSzx) * (SyzpSzy) + (SxymSyx) * (SxxmSyy + Szz)) \ + (-(SxzpSzx) * (SyzpSzy) - (SxypSyx) * (SxxpSyy - Szz)) * (-(SxzmSzx) * (SyzmSzy) - (SxypSyx) * (SxxpSyy + Szz)) \ + (+(SxypSyx) * (SyzpSzy) + (SxzpSzx) * (SxxmSyy + Szz)) * (-(SxymSyx) * (SyzmSzy) + (SxzpSzx) * (SxxpSyy + Szz)) \ + (+(SxypSyx) * (SyzmSzy) + (SxzmSzx) * (SxxmSyy - Szz)) * (-(SxymSyx) * (SyzpSzy) + (SxzmSzx) * (SxxpSyy - Szz)) E0 = (G_A + G_B) / 2.0 f = lambda x: x ** 4.0 + C2 * x ** 2. + C1 * x + C0 df = lambda x: 4 * x ** 3.0 + 2 * C2 * x + C1 max_eigenvalue = scipy.optimize.newton(f, E0, df) rmsd = np.sqrt(np.abs(2.0 * (E0 - max_eigenvalue) / n_atoms)) return rmsd