def get_atom_correspondence(pdb, gro): # read pdb pdbmol = pybel.readfile("pdb", pdb).__next__() p_atoms, p_all = get_mol_info(pdbmol) # read gro gromol = pybel.readfile("gro", gro).__next__() q_atoms, q_all = get_mol_info(gromol) return rmsd.reorder_hungarian(p_atoms, q_atoms, p_all, q_all), rmsd.reorder_hungarian( q_atoms, p_atoms, q_all, p_all)
def per_residue_rms(self, other, rot=None, tran=None, transform=False): """Calculates the RMSD of each residue in two superimposed sites. If superposition rotation matrix and translation vector are not given, RMSD is calculated without transformation. Otherwise, fitting is performed automatically, using weighted superposition to compensate for bias caused by slightly outlying residues.""" rmsds = [] if rot is None or tran is None: rot, tran, _, _ = self.fit(other, weighted=True, transform=False) for i, (p, q) in enumerate(zip(self, other)): if p.is_gap or q.is_gap: rmsds.append(np.nan) continue # Get functional atoms p_atoms, p_coords = p.get_func_atoms() q_atoms, q_coords = q.get_func_atoms() # Mutate if there are mismatches for i, (p_atom, q_atom) in enumerate(zip(p_atoms, q_atoms)): if p_atom != q_atom: p_atoms[i] = 'MUT' q_atoms[i] = 'MUT' # Transform functional atoms if transform: q_coords = PdbSite._transform(q_coords, rot, tran) # Reorder q_review = reorder_hungarian(p_atoms, q_atoms, p_coords, q_coords) q_coords = q_coords[q_review] # Calculate RMSD rms = PdbSite._rmsd(p_coords, q_coords) rmsds.append(np.round(rms, 3)) return np.array(rmsds)
def test_reorder_qml(): filename_1 = pathlib.PurePath(RESOURCE_PATH, "CHEMBL3039407.xyz") p_atoms, p_coord = rmsd.get_coordinates_xyz(filename_1) # Reorder atoms n_atoms = len(p_atoms) random_reorder = np.arange(n_atoms, dtype=int) np.random.seed(5) np.random.shuffle(random_reorder) q_atoms = copy.deepcopy(p_atoms) q_coord = copy.deepcopy(p_coord) q_atoms = q_atoms[random_reorder] q_coord = q_coord[random_reorder] # Mess up the distance matrix by rotating the molecule theta = 180.0 rotation_y = np.array( [ [np.cos(theta), 0, np.sin(theta)], [0, 1, 0], [-np.sin(theta), 0, np.cos(theta)], ] ) q_coord = np.dot(q_coord, rotation_y) # Reorder with standard hungarian, this will fail reorder and give large # RMSD view_dist = rmsd.reorder_hungarian(p_atoms, q_atoms, p_coord, q_coord) q_atoms_dist = q_atoms[view_dist] q_coord_dist = q_coord[view_dist] _rmsd_dist = rmsd.kabsch_rmsd(p_coord, q_coord_dist) assert q_atoms_dist.tolist() == p_atoms.tolist() assert _rmsd_dist > 3.0 # Reorder based in chemical similarity view = rmsd.reorder_similarity(p_atoms, q_atoms, p_coord, q_coord) q_atoms = q_atoms[view] q_coord = q_coord[view] # Calculate new RMSD with correct atom order _rmsd = rmsd.kabsch_rmsd(p_coord, q_coord) # Assert correct atom order assert q_atoms.tolist() == p_atoms.tolist() # Assert this is the same molecule pytest.approx(0.0) == _rmsd
def test_reorder_distance(self): N = 5 atoms = np.array(["H"]*N) p_coord = np.arange(N*3) p_coord = p_coord.reshape((5,3)) q_coord = copy.deepcopy(p_coord) np.random.seed(6) np.random.shuffle(q_coord) review = reorder_hungarian(atoms, atoms, p_coord, q_coord) self.assertEqual(p_coord.tolist(), q_coord[review].tolist()) return
def fit(self, other, weighted=False, cycles=1, cutoff=999, scaling_factor=None, transform=False, mutate=True, reorder=True, allow_symmetrics=True, exclude=None, get_array=False): """Iteratively fits two catalytic sites (self: fixed site, other: mobile site) using the Kabsch algorithm from the rmsd module (https://github.com/charnley/rmsd). Can also find the optimal atom alignment in each residue, considering symmetrical atoms and functionally similar residues, using the Hungarian algorithm. Args: other: mobile active site to fit weighted: to perform weighted superposition in the last iteration cycles: Number of fitting iterations to exclude outlying atoms transform: Also transforms the mobile site's coordinates mutate: If the two active sites do not have the same residues, make pseudo-mutations to the mobile site to facilitate atom correspondence reorder: Find the optimal atom correspondence (within a residue) between the two sites, taking into account conservative mutations and symmetrical atoms (optional). See and definitions in residue_definitions.py module. allow_symmetrics: Allows flipping of side chains if atoms are equivalent or symmetrical Returns: rot, tran, rms, rms_all rot: Rotation matrix to transform mobile site into the fixed site tran: Translation vector to transform mobile site into the fixed site rms: RMSD after fitting, excluding outliers rms_all: RMSD over all atoms, including outliers Raises: Exception: If number of functions atoms in the two sites is not the same (e.g. if there are missing atoms from the parent structure) """ # In case gaps are present, exclude those positions gaps = set(self.get_gaps() + other.get_gaps()) # If we want to exclude residues from fitting if exclude is not None: if type(exclude) not in (list, tuple, set): exclude = [exclude] for i in exclude: gaps.add(i) # Get atom identifier strings and coords as numpy arrays p_atoms, p_coords = self._get_func_atoms(allow_symmetrics, omit=gaps) q_atoms, q_coords = other._get_func_atoms(allow_symmetrics, omit=gaps) if p_atoms is None or q_atoms is None: return None, None, None, None if len(p_atoms) != len(q_atoms): raise Exception('Atom number mismatch in sites {} and {}'.format( self.id, other.id)) # Initial crude superposition rot, tran, rms, _ = PdbSite._super(p_coords, q_coords, cycles=1) q_trans = PdbSite._transform(q_coords, rot, tran) # In case of non-conservative mutations, make pseudo-mutations to facilitate superposition if mutate: for i, (p_atom, q_atom) in enumerate(zip(p_atoms, q_atoms)): if p_atom != q_atom: #q_atoms[i] = p_atom q_atoms[i] = '{}.MUT'.format(q_atoms[i].split('.')[0]) p_atoms[i] = '{}.MUT'.format(p_atoms[i].split('.')[0]) # Reorder atoms using the Hungarian algorithm from rmsd package if reorder: q_review = reorder_hungarian(p_atoms, q_atoms, p_coords, q_trans) q_coords = q_coords[q_review] # Iterative superposition. Get rotation matrix, translation vector and RMSD rot, tran, rms, rms_all = PdbSite._super(p_coords, q_coords, cycles, cutoff, weighted, scaling_factor) if transform: other.structure.transform(rot, tran) if get_array: q_trans = np.dot(q_coords, rot) + tran return rot, tran, rms, rms_all, p_coords, q_trans return rot, tran, rms, rms_all