def match(ref_geom, geom_to_match): rmsd_before = rmsd.kabsch_rmsd(ref_geom.coords3d, geom_to_match.coords3d) print(f"Kabsch RMSD before: {rmsd_before:.4f}") matched_geom = match_geom_atoms(ref_geom, geom_to_match, hydrogen=True) # Right now the atoms are not in the right order as we only sorted the # individual coord blocks by atom. # This dictionary will hold the counter indices for the individual atom atom_type_inds = {atom: 0 for atom in ref_geom.atom_types} matched_coord_blocks, _ = matched_geom.coords_by_type new_coords = list() for atom in ref_geom.atoms: # Get the current counter/index from the dicitonary for the given atom cur_atom_ind = atom_type_inds[atom] # Select the appropriate atom from the coords block atom_coords = matched_coord_blocks[atom][cur_atom_ind] new_coords.append(atom_coords) # Increment the counter so the next time the same atom type comes up # we fetch the next entry of the coord block. atom_type_inds[atom] += 1 # Assign the updated atom order and corresponding coordinates matched_geom.atoms = ref_geom.atoms matched_geom.coords = np.array(new_coords).flatten() rmsd_after = rmsd.kabsch_rmsd(ref_geom.coords3d, matched_geom.coords3d) print(f"Kabsch RMSD after: {rmsd_after:.4f}") return [matched_geom, ]
def test_reorder_qml(): filename_1 = pathlib.PurePath(RESOURCE_PATH, "CHEMBL3039407.xyz") p_atoms, p_coord = rmsd.get_coordinates_xyz(filename_1) # Reorder atoms n_atoms = len(p_atoms) random_reorder = np.arange(n_atoms, dtype=int) np.random.seed(5) np.random.shuffle(random_reorder) q_atoms = copy.deepcopy(p_atoms) q_coord = copy.deepcopy(p_coord) q_atoms = q_atoms[random_reorder] q_coord = q_coord[random_reorder] # Mess up the distance matrix by rotating the molecule theta = 180.0 rotation_y = np.array( [ [np.cos(theta), 0, np.sin(theta)], [0, 1, 0], [-np.sin(theta), 0, np.cos(theta)], ] ) q_coord = np.dot(q_coord, rotation_y) # Reorder with standard hungarian, this will fail reorder and give large # RMSD view_dist = rmsd.reorder_hungarian(p_atoms, q_atoms, p_coord, q_coord) q_atoms_dist = q_atoms[view_dist] q_coord_dist = q_coord[view_dist] _rmsd_dist = rmsd.kabsch_rmsd(p_coord, q_coord_dist) assert q_atoms_dist.tolist() == p_atoms.tolist() assert _rmsd_dist > 3.0 # Reorder based in chemical similarity view = rmsd.reorder_similarity(p_atoms, q_atoms, p_coord, q_coord) q_atoms = q_atoms[view] q_coord = q_coord[view] # Calculate new RMSD with correct atom order _rmsd = rmsd.kabsch_rmsd(p_coord, q_coord) # Assert correct atom order assert q_atoms.tolist() == p_atoms.tolist() # Assert this is the same molecule pytest.approx(0.0) == _rmsd
def test_pdb(example_path="examples", threshold=0.001): """ A simple test for the PDB functionality :return: True if all test passed """ p_atoms, P = rmsd.get_coordinates(example_path+'/ci2_1.pdb', 'pdb') q_atoms, Q = rmsd.get_coordinates(example_path+'/ci2_2.pdb', 'pdb') n_rmsd = rmsd.rmsd(P, Q) Pc = rmsd.centroid(P) Qc = rmsd.centroid(Q) P -= Pc Q -= Qc k_rmsd = rmsd.kabsch_rmsd(P, Q) q_rmsd = rmsd.quaternion_rmsd(P, Q) if abs(n_rmsd - 26.975) > threshold: print('Failed to calculate normal RMSD, result: {0}'.format(n_rmsd)) return False if abs(k_rmsd - 11.777) > threshold: print('Failed to calculate Kabsch RMSD, result: {0}'.format(k_rmsd)) return False if abs(q_rmsd - 11.777) > threshold: print('Failed to calculate quaternion RMSD, result: {0}'.format(q_rmsd)) return False if abs(q_rmsd - k_rmsd) > threshold ** 2: print('Failed to yield similar Kabsch and quaternion RMSD, result: {0} vs {1}'.format(k_rmsd, q_rmsd)) return False return True
def step(self, action): resi, angles = action self._set_residue_torsion(resi, angles) self.state = cmd.get_coords() reward = rmsd.kabsch_rmsd(self.state.copy(), self.atom_coords.copy(), translate=True) done = reward < 1 return self.state, reward, done, self.info
def compute_pairwise_rmsd(confs): n_conf = confs.shape[0] rmsds = [] for i in range(n_conf): for j in range(n_conf): rmsds.append(kabsch_rmsd(confs[i, :, :], confs[j, :, :])) return np.mean(rmsds)
def main(): #print(__doc__) trajectory = get_trajectory(*sys.argv[1:]) rmsds = {} for key in trajectory.keys(): rmsds[key] = list() for i in range(len(trajectory[key])): rmsds[key].append( rmsd.kabsch_rmsd(trajectory[key][0], trajectory[key][i])) # avreages avrgs = {key: np.average(rmsds[key]) for key in rmsds.keys()} sorted_keys = list( map( itemgetter(0), sorted([item for item in avrgs.items()], reverse=True, key=itemgetter(1)))) print('"","' + '","'.join(list(map(str, sorted_keys))) + '"') print('"average",{}'.format(','.join( list(map(str, [avrgs[key] for key in sorted_keys]))))) for i in range(len(sys.argv[1:])): row = [str(i + 1)] for key in sorted_keys: row.append(str(rmsds[key][i])) print(','.join(row)) """
def rmsd_distance(atoms_list, coordinates_list, translation=False, rotation=False): N = len(atoms_list) kernel = np.zeros((N, N)) # Lower triangular for i in range(N): for j in range(i): coord_i = coordinates_list[i] coord_j = coordinates_list[j] # unique pairs if translation: coord_i = coord_i - rmsd.centroid(coord_i) coord_j = coord_j - rmsd.centroid(coord_j) if rotation: kernel[i, j] = rmsd.kabsch_rmsd(coord_i, coord_j) else: kernel[i, j] = rmsd.rmsd(coord_i, coord_j) kernel[j, i] = kernel[i, j] # np.fill_diagonal(kernel, 0.0) # iu2 = np.triu_indices(N, 1) # il2 = np.tril_indices(N, -1) # kernel[iu2] = kernel[il2] return kernel
def assert_geom(ref_fn, zmat_fn, atol=2.5e-5): zmat = zmat_from_fn(zmat_fn) geom = geom_from_zmat(zmat) ref = geom_loader(ref_fn) rmsd = kabsch_rmsd(geom.coords3d, ref.coords3d, translate=True) print(f"RMSD: {rmsd:.6f}") assert rmsd == pytest.approx(0., abs=atol)
def new_fitness(masses, network, target_coordinates, dim): ''' Fitness with new possible variation of laplacian ''' laplacian = create_customized_laplacian(network, masses) guess_coordinates = get_spectral_coordinates(laplacian, dim=dim) return rmsd.kabsch_rmsd(guess_coordinates.values, target_coordinates.values)
def count_rmsd(t1, t2): P = np.array([[float(v.x), float(v.y), float(v.z)] for v in t1.points]) Q = np.array([[float(v.x), float(v.y), float(v.z)] for v in t2.points]) # print("RMSD before translation: ", round(rmsd.kabsch_rmsd(P, Q), 6)) P -= rmsd.centroid(P) Q -= rmsd.centroid(Q) # print("RMSD after translation: ", round(rmsd.kabsch_rmsd(P, Q), 6)) return round(rmsd.kabsch_rmsd(P, Q), 4) # round 6 in triangles_matrix
def fitness(masses, network, target_coordinates, dim): ''' Defines the fitness score for a given individual. ''' mod_matrix = create_inverse_mod_matrix(masses) guess_coordinates = get_spectral_coordinates( nx.laplacian_matrix(network).todense(), mod_matrix, dim) return rmsd.kabsch_rmsd(guess_coordinates.values, target_coordinates.values)
def CalRmsdFrame(coor_frame, ref_coor_frame): """ Calculate the rmsd between two frames. """ coor_tmp = coor_frame.reshape(-1, 3) ref_tmp = ref_coor_frame.reshape(-1, 3) coor_tmp -= rmsd.centroid(coor_tmp) ref_tmp -= rmsd.centroid(ref_tmp) return rmsd.kabsch_rmsd(coor_tmp, ref_tmp)
def compute_rmsd_matrix(confs): n_conf = confs.shape[0] rmsds = np.zeros((n_conf, n_conf)) for i in range(n_conf): for j in range(n_conf): r = kabsch_rmsd(confs[i, :, :], confs[j, :, :]) rmsds[i, j] = r rmsds[j, i] = r return rmsds
def RMSDmetric(structure1, structure2): numParticles = len(structure1) / 3 coords1 = structure1.reshape(int(numParticles), 3) coords2 = structure2.reshape(int(numParticles), 3) coords1 = coords1 - rmsd.centroid(coords1) coords2 = coords2 - rmsd.centroid(coords2) return rmsd.kabsch_rmsd(coords1, coords2)
def fitness_single(masses, fitness_parameters): # fitness_parameters[0] = protein_network # fitness_parameters[1] = target_coordinates network = nt.modify_edges_weitghts(fitness_parameters[0], masses) guess_coordinates = nt.get_spectral_coordinates( nx.laplacian_matrix(network).toarray(), mod_matrix=np.diag([float(1 / a[1]) for a in network.degree]), dim=3) return rmsd.kabsch_rmsd(guess_coordinates.values, fitness_parameters[1].values)
def compute_similarity(site_a, site_b): """ Compute the similarity between two given ActiveSite instances. Input: two ActiveSite instances Output: the similarity between them (a floating point number) """ # Get strings of single letter aa residues s_a = output_aa_string(site_a.residues) s_b = output_aa_string(site_b.residues) # Align strings using local alignment algorithm which relies # on dynamic programming to compute all possible alignments and # returns the highest scoring alignment. # Local alignment aims to find the max alignment for substrings # of two larger strings. # Matches = +1 # Mismatches, gaps = +0 alignments = pairwise2.align.localxx(s_a, s_b) # perform alignment if len(alignments) == 0: return float("inf") # return INF if no alignment found align_a, align_b, s = alignments[0][:3] # extract first alignment # Output indices where nucleotides in alignment match inds_a, inds_b = match(align_a, align_b) if len(inds_a) < 2: return float("inf") # Create matrix of coordinates for atom CA V = create_coord_matrix(site_a, inds_a) W = create_coord_matrix(site_b, inds_b) # Center and rotate Ca matrices then calculate Root-Mean-Square-Deviation (RMSD) # It measures the average distance between backbone atoms of two # superimposed proteins. # The greater the RMSD, the less similar the proteins are. # A RMSD equal to 0 represents identical proteins. # Each protein is a matrix containing x, y, and z coordinates for each CA atom # The rows of the two matrices are matching residues obtained from the alignment # To minimize RMSD you must first center the coordinates on the origin so the # two vectors can be near each other. V -= rmsd.centroid(V) W -= rmsd.centroid(W) # Then find the optimal rotation for matrix W that aligns it best with V # This is the Kabasch algorithm which works by calculating a covariance matrix # and then finding the singular value decomposition (SVD) of the cov. matrix # Last, find the optimal rotation matrix which is the dot product of V and W # optimized by lowest RMSD return rmsd.kabsch_rmsd(V,W)
def get_unique_geometries(self, geoms): geom_num = len(geoms) rmsds = np.full((geom_num, geom_num), np.inf) for i, j in it.combinations(range(geom_num), 2): coords1 = geoms[i].coords.reshape(-1, 3) coords2 = geoms[j].coords.reshape(-1, 3) rmsds[i, j] = rmsd.kabsch_rmsd(coords1, coords2) is_, js = np.where(rmsds < self.rmsd_thresh) similar_inds = np.unique(js) unique_geoms = [geoms[i] for i in range(geom_num) if i not in similar_inds] return unique_geoms
def test_reorder_inertia_hungarian(): # coordinates of scrambled and rotated butane atoms = np.array( ["C", "C", "C", "C", "H", "H", "H", "H", "H", "H", "H", "H", "H", "H"] ) p_coord = np.array( [ [2.142e00, 1.395e00, -8.932e00], [3.631e00, 1.416e00, -8.537e00], [4.203e00, -1.200e-02, -8.612e00], [5.691e00, 9.000e-03, -8.218e00], [1.604e00, 7.600e-01, -8.260e00], [1.745e00, 2.388e00, -8.880e00], [2.043e00, 1.024e00, -9.930e00], [4.169e00, 2.051e00, -9.210e00], [3.731e00, 1.788e00, -7.539e00], [3.665e00, -6.470e-01, -7.940e00], [4.104e00, -3.840e-01, -9.610e00], [6.088e00, -9.830e-01, -8.270e00], [5.791e00, 3.810e-01, -7.220e00], [6.230e00, 6.440e-01, -8.890e00], ] ) q_coord = np.array( [ [6.71454, -5.53848, -3.50851], [6.95865, -6.22697, -2.15264], [8.16747, -5.57632, -1.45606], [5.50518, -6.19016, -4.20589], [5.33617, -5.71137, -5.14853], [7.58263, -5.64795, -4.12498], [6.51851, -4.49883, -3.35011], [6.09092, -6.11832, -1.53660], [5.70232, -7.22908, -4.36475], [7.15558, -7.26640, -2.31068], [8.33668, -6.05459, -0.51425], [7.97144, -4.53667, -1.29765], [4.63745, -6.08152, -3.58986], [9.03610, -5.68475, -2.07173], ] ) p_coord -= rmsd.centroid(p_coord) q_coord -= rmsd.centroid(q_coord) review = rmsd.reorder_inertia_hungarian(atoms, atoms, p_coord, q_coord) result_rmsd = rmsd.kabsch_rmsd(p_coord, q_coord[review]) np.testing.assert_almost_equal(0, result_rmsd, decimal=2)
def test_geom_from_zmat(this_dir): zmat = [ ZLine("C"), ZLine("C", 0, 1.510486 * AB), ZLine("N", 0, 1.459785 * AB, 1, 112.257683), ZLine("O", 1, 1.220389 * AB, 0, 118.653885, 2, -179.541229), ZLine("O", 1, 1.353023 * AB, 0, 122.591621, 2, 0.825231), ] geom = geom_from_zmat(zmat) reference = geom_loader(this_dir / "glycine_noh.xyz") rmsd = kabsch_rmsd(geom.coords3d, reference.coords3d, translate=True) assert rmsd == pytest.approx(0., abs=1e-6)
def rmsd_with(self, structure): """Calculates the Root Mean Square Deviation between this structure and another. :param AtomStructure structure: the structure to check against. :raises ValueError: if the other structure has a different number of\ atoms. :rtype: ``float``""" pairing = self.pairing_with(structure) coords1, coords2 = [[a.location for a in atoms] for atoms in zip(*pairing.items())] c1, c2 = self.center_of_mass, structure.center_of_mass coords1 = [[x - c1[0], y - c1[1], z - c1[2]] for x, y, z in coords1] coords2 = [[x - c2[0], y - c2[1], z - c2[2]] for x, y, z in coords2] return round(rmsd.kabsch_rmsd(coords1, coords2), 12)
def get_rmsd(coords, reference): """ Calculate the rmsd of the given coordinates compared to a reference. Input: > coords list[np.array, np.array, ...]; coordinates to be compared vs. the reference coordinates > reference list[np.array, np.array, ...]; reference to compare coordinates to Sources: https://pypi.org/project/rmsd/ """ # translate center of geometry to the origin for reference and # current frame coords -= rmsd.centroid(coords) reference -= rmsd.centroid(reference) return rmsd.kabsch_rmsd(coords, reference)
def myRMSDmetric(arr1, arr2): """ This function is built under the assumption that the space dimension is 3!!! Requirement from sklearn radius_neighbors_graph: The callable should take two arrays as input and return one value indicating the distance between them. Input: One row from reshaped XYZ trajectory as number of steps times nDOF Inside: Reshape to XYZ format and apply rmsd as r=rmsd(X[i], X[j]) Output: rmsd distance """ nParticles = len(arr1) / 3 assert (nParticles == int(nParticles)) X1 = arr1.reshape(int(nParticles), 3) X2 = arr2.reshape(int(nParticles), 3) X1 = X1 - rmsd.centroid(X1) X2 = X2 - rmsd.centroid(X2) return rmsd.kabsch_rmsd(X1, X2)
def compare_positions(pos, pos_list, threshold=0.005): """ views_list threshold_list iterate over views - hydrogen views on carbons (Based on bonds) - heavy atoms """ for posc in pos_list: comparision = rmsd.kabsch_rmsd(pos, posc) if comparision < threshold: return False return True
def fitness_all(masses, fitness_parameters): # fitness_parameters[0] = protein_network_list # fitness_parameters[1] = target_coordinates_list # fitness_parameters[2] = dataset_list # fitness_parameters[3] = edge_aa_list aa_contact_map = create_AA_contact_map(masses) fitness = 0.0 laplacian_list = refresh_network_weights(fitness_parameters[2], fitness_parameters[0], aa_contact_map, fitness_parameters[3]) for i in range(len(laplacian_list)): guess_coordinates = nt.get_spectral_coordinates( laplacian_list[i], mod_matrix=np.diag(1 / np.diag(laplacian_list[i])), dim=3) fitness += rmsd.kabsch_rmsd(guess_coordinates.values, fitness_parameters[1][i].values) return fitness
def calculate_transformation_kabsch( src_points: np.ndarray, dst_points: np.ndarray) -> Tuple[np.array, float]: """ Calculates the optimal rigid transformation from src_points to dst_points (regarding the least squares error) Parameters: ----------- src_points: array (3,N) matrix dst_points: array (3,N) matrix Returns: ----------- rotation_matrix: array (3,3) matrix translation_vector: array (3,1) matrix rmsd_value: float """ assert src_points.shape == dst_points.shape if src_points.shape[0] != 3: raise Exception( "The input data matrix had to be transposed in order to compute transformation." ) src_points = src_points.transpose() dst_points = dst_points.transpose() src_points_centered = src_points - rmsd.centroid(src_points) dst_points_centered = dst_points - rmsd.centroid(dst_points) rotation_matrix = rmsd.kabsch(src_points_centered, dst_points_centered) rmsd_value = rmsd.kabsch_rmsd(src_points_centered, dst_points_centered) translation_vector = rmsd.centroid(dst_points) - np.matmul( rmsd.centroid(src_points), rotation_matrix) return create_homogenous(rotation_matrix.transpose(), translation_vector.transpose()), rmsd_value
def find_rotation_matrix(ax, ay, az, imu_ax, imu_ay, imu_az): print("Finding rotation matrix...") rx = 0 ry = 0 rz = 0 diff_a = [] imu_a = [] # Kabsch algorithm setup for i in range(np.minimum(len(ax), len(imu_ax))): diff_a.append([ax[i] / 9.81, ay[i] / 9.81, az[i] / 9.81]) for i in range(np.minimum(len(ax), len(imu_ax))): imu_a.append([imu_ax[i], imu_ay[i], imu_az[i]]) diff_a = np.array(diff_a) imu_a = np.array(imu_a) print("Rotation matrix: ", rmsd.kabsch(diff_a, imu_a)) print("RMSD: ", rmsd.kabsch_rmsd(diff_a, imu_a)) # diff_a -= rmsd.centroid(diff_a) # imu_a -= rmsd.centroid(imu_a) # print("Rotation matrix after translation: ", rmsd.kabsch(diff_a, imu_a)) # print("RMSD after translation: ", rmsd.kabsch_rmsd(diff_a, imu_a)) rotated_a = rmsd.kabsch_rotate(diff_a, imu_a) r_ax = [] r_ay = [] r_az = [] for i in range(len(rotated_a)): r_ax.append(rotated_a[i][0]) r_ay.append(rotated_a[i][1]) r_az.append(rotated_a[i][2]) x_axis = np.array(range(len(r_ax))) plt.figure(5) plt.title("Rotated Differentiated Acceleration") plt.plot(x_axis, r_ax, label='x', color='red') plt.plot(x_axis, r_ay, label='y', color='green') plt.plot(x_axis, r_az, label='z', color='blue') plt.legend()
if __name__ == "__main__": P = tf.placeholder(tf.float32, [None, 3]) Q = tf.placeholder(tf.float32, [None, 3]) P_cent = P - tf_centroid(P) Q_cent = Q - tf_centroid(Q) pq_rmsd = tf_kabsch_rmsd(P_cent, Q_cent) _, P_np = rmsd.get_coordinates_pdb('ci2_1.pdb') _, Q_np = rmsd.get_coordinates_pdb('ci2_2.pdb') sess = tf.Session() sess.run(tf.global_variables_initializer()) qp_rmsd_tf = sess.run(pq_rmsd, feed_dict={P: Q_np, Q: P_np}) qp_rmsd_np = rmsd.kabsch_rmsd(Q_np - rmsd.centroid(Q_np), P_np - rmsd.centroid(P_np)) pq_rmsd_tf = sess.run(pq_rmsd, feed_dict={P: P_np, Q: Q_np}) pq_rmsd_np = rmsd.kabsch_rmsd(P_np - rmsd.centroid(P_np), Q_np - rmsd.centroid(Q_np)) print("Kabsch RMSD(Q, P): Numpy implementation {}, TF implementation {}". format(qp_rmsd_np, qp_rmsd_tf)) assert (isclose(qp_rmsd_tf, qp_rmsd_np, abs_tol=1e-5)) print("Kabsch RMSD(P, Q): Numpy implementation {}, TF implementation {}". format(pq_rmsd_np, pq_rmsd_tf)) assert (isclose(pq_rmsd_tf, pq_rmsd_np, abs_tol=1e-5))
centroidData = [] for i in range(numOfStructures): commonData[i] = np.array(commonData[i]) centroidPos = rmsd.centroid(commonData[i]) commonData[i] -= centroidPos centroidData.append(centroidPos) # calculate pairwise deviation and rotate deviations = np.empty((numOfLoci, 0), float) for i in range(numOfStructures): for j in range(numOfStructures): if j == i: continue # mirror image if needed mirrorFactor = 1.0 if rmsd.kabsch_rmsd(commonData[i], commonData[j]) > rmsd.kabsch_rmsd( commonData[i], -1.0 * commonData[j]): mirrorFactor = -1.0 # calculate deviation rotationMatrix = rmsd.kabsch(mirrorFactor * commonData[j], commonData[i]) if j > i: deviation = np.linalg.norm( np.dot(mirrorFactor * commonData[j], rotationMatrix) - commonData[i], axis=1).T deviations = np.c_[deviations, deviation] sys.stderr.write('median deviation between ' + str(i) + ' and ' + str(j) + ': ' + str(np.median(deviation)) + '\n') # rotate j to align with i sys.stderr.write('aligning ' + str(j) + ' to ' + str(i) + '\n')
def align(argv): # default parameters output_prefix = None # read arguments try: opts, args = getopt.getopt(argv[1:], "o:") except getopt.GetoptError as err: sys.stderr.write("[E::" + __name__ + "] unknown command\n") return 1 if len(args) == 0: sys.stderr.write( "Usage: dip-c align [options] <in1.3dg> <in2.3dg> ...\n") sys.stderr.write("Options:\n") sys.stderr.write(" -o STR output prefix [no output]\n") sys.stderr.write("Output:\n") sys.stderr.write(" tab-delimited: homolog, locus, RMSD\n") sys.stderr.write( " additionally with \"-o\": 3DG files aligned to each other\n") return 1 for o, a in opts: if o == "-o": output_prefix = a # load 3dg files input_data = [] num_structures = len(args) if num_structures < 2: sys.stderr.write("[E::" + __name__ + "] at least 2 structures are required\n") return 1 counter = 0 for input_filename in args: sys.stderr.write("[M::" + __name__ + "] reading 3dg file " + str(counter) + ": " + input_filename + "\n") input_data.append({}) for input_file_line in open(input_filename, "rb"): input_file_line_data = input_file_line.strip().split() input_data[-1][(input_file_line_data[0], int(input_file_line_data[1]))] = [ float(input_file_line_data[2]), float(input_file_line_data[3]), float(input_file_line_data[4]) ] counter += 1 # find common particles common_loci = set(input_data[0]) for input_structure in input_data[1:]: common_loci = common_loci.intersection(set(input_structure)) num_loci = len(common_loci) common_loci = list(common_loci) common_data = [] for input_structure in input_data: common_data.append([]) for common_locus in common_loci: common_data[-1].append(input_structure[common_locus]) sys.stderr.write("[M::" + __name__ + "] found " + str(num_loci) + " common particles\n") # subtract centroid common_data = np.array(common_data) centroid_data = [] for i in range(num_structures): common_data[i] = np.array(common_data[i]) centroid_pos = rmsd.centroid(common_data[i]) common_data[i] -= centroid_pos centroid_data.append(centroid_pos) sys.stderr.write("[M::" + __name__ + "] found centroids for " + str(num_structures) + " structures\n") # calculate pairwise deviation and rotate deviations = np.empty((num_loci, 0), float) for i in range(num_structures): for j in range(num_structures): if j == i: continue # mirror image if needed mirror_factor = 1.0 if rmsd.kabsch_rmsd(common_data[i], common_data[j]) > rmsd.kabsch_rmsd( common_data[i], -1.0 * common_data[j]): mirror_factor = -1.0 # calculate deviation rotation_matrix = rmsd.kabsch(mirror_factor * common_data[j], common_data[i]) if j > i: deviation = np.linalg.norm( np.dot(mirror_factor * common_data[j], rotation_matrix) - common_data[i], axis=1).T deviations = np.c_[deviations, deviation] sys.stderr.write("[M::" + __name__ + "] median deviation between file " + str(i) + " and file " + str(j) + ": " + str(np.median(deviation)) + "\n") # rotate if output_prefix is not None: # rotate j to align with i sys.stderr.write("[M::" + __name__ + "] aligning file " + str(j) + " to file " + str(i) + "\n") aligned_filename = output_prefix + str(j) + "_to_" + str( i) + ".3dg" aligned_file = open(aligned_filename, "wb") for input_locus in input_data[j]: aligned_pos = np.dot( (np.array(input_data[j][input_locus]) - centroid_data[j]) * mirror_factor, rotation_matrix) + centroid_data[i] aligned_file.write("\t".join([ input_locus[0], str(input_locus[1]), str(aligned_pos[0]), str(aligned_pos[1]), str(aligned_pos[2]) ]) + "\n") aligned_file.close() # summarize rmsd and print rmsds = np.sqrt((deviations**2).mean(axis=1)) totalrmsd = np.sqrt((rmsds**2).mean(axis=0)) sys.stderr.write("[M::" + __name__ + "] RMS RMSD: " + str(totalrmsd) + "\n") sys.stderr.write("[M::" + __name__ + "] median RMSD: " + str(np.median(rmsds, axis=0)) + "\n") sys.stderr.write("[M::" + __name__ + "] writing output\n") for i in range(num_loci): sys.stdout.write("\t".join( map(str, [common_loci[i][0], common_loci[i][1], rmsds[i]])) + "\n") return 0
def calc_exact_rmsd(ref_atoms, mob_atoms): ra = np.array(copy.deepcopy(ref_atoms)) ma = np.array(copy.deepcopy(mob_atoms)) ra -= rmsd.centroid(ra) ma -= rmsd.centroid(ma) return rmsd.kabsch_rmsd(ra, ma)
def matched_rmsd(geom1, geom2, thresh=5e-2): """RMSD for optimally aligned and matched geometries. Returns ------- matched_rmsd : float RMSD of optimally aligned and matched geometries. matched_geoms : tuple(Geometry, Geometry) Tuple of the optimally aligned and matched geometries. """ # Work on copies of the Geometries, as calling standard_orientation # moves their coordinates. geom1_copy = geom1.copy() geom1_copy.standard_orientation() coords3d_1 = geom1_copy.coords3d geom2_copy = geom2.copy() geom2_copy.standard_orientation() coords3d_2 = geom2_copy.coords3d.copy() # After bringing the Geometries into standard orientation we may # still have to consider additional axis swaps and reflections to # allow optimal atom matching using the Hungarian method. # Six possible axis swaps, (3*2, (x, y, z)*((x), (y), (z))). axes = (0, 1, 2) swaps = list(it.permutations(axes)) # Eight possible reflections, (±x, ±y, ±z). reflections = ( ( 1, 1, 1), # no reflection (-1, 1, 1), # reflect on yz plane ( 1, -1, 1), # reflect on xz plane ( 1, 1, -1), # reflect on xy plane (-1, -1, 1), # reflect on yz and xz planes (-1, 1, -1), # reflect on yz and xy planes ( 1, -1, -1), # reflect on xz and xy planes (-1, -1, -1) # reflect on yz, xz, xy planes ) # 48 combinations of six axis swaps and eight reflections. transforms = list(it.product(swaps, reflections)) matched_rmsds = list() matched_coords = list() for i, transform in enumerate(transforms): # Apply swap and reflection c3d_trans = apply_transform(coords3d_2.copy(), *transform) geom2_to_match = geom2.copy() geom2_to_match.coords3d = c3d_trans # Apply Hungarian method to the transformed Geometry geom2_matched = match_geom_atoms(geom1_copy, geom2_to_match) mrmsd = rmsd.kabsch_rmsd(coords3d_1, geom2_matched.coords3d) matched_rmsds.append(mrmsd) matched_coords.append(geom2_matched.coords) # Break when the two geometries are similar. Then we don't have to # apply the remaining transformations. if mrmsd <= thresh: break matched_rmsds = np.array(matched_rmsds) min_rmsd_ind = matched_rmsds.argmin() min_rmsd = matched_rmsds.min() best_matching_coords = matched_coords[min_rmsd_ind] geom2_copy.coords = best_matching_coords return min_rmsd, (geom1_copy, geom2_copy)