def align(self, conf, reflect=False): """ Align the molecule and return the xyz The default CanonicalizeConformer function may also include inversion """ from rdkit.Chem import rdMolTransforms as rdmt #rotation #print("align: "); print(conf.GetPositions()) trans = rdmt.ComputeCanonicalTransform(conf) if np.linalg.det(trans[:3,:3]) < 0: trans[:3,:3] *= -1 if reflect: trans[:3,:3] *= -1 #print(trans) rdmt.TransformConformer(conf, trans) #print("rot", conf.GetPositions()[:3]) #translation pt = rdmt.ComputeCentroid(conf) center = np.array([pt.x, pt.y, pt.z]) xyz = conf.GetPositions() - center #print("return", xyz[:3]) return xyz
def canonicalize_conf_rdkit(mol, conf_id=-1): mol = Chem.Mol(mol) conf = mol.GetConformer(conf_id) ctd = rdmt.ComputeCentroid(conf) canon_trans = rdmt.ComputeCanonicalTransform(conf, ctd) rdmt.TransformConformer(conf, canon_trans) return mol
def centerMol(mol): from rdkit.Chem import rdMolTransforms conf = mol.GetConformer() pt = rdMolTransforms.ComputeCentroid(conf) for i in range(conf.GetNumAtoms()): conf.SetAtomPosition(i, conf.GetAtomPosition(i) - pt) return (mol)
def get_USRlike_atoms(self): """Returns 4 rdkit Point3D objects similar to those used in USR: - centroid (ctd) - closest to ctd (cst) - farthest from cst (fct) (usually ctd but let's avoid computing too many dist matrices) - farthest from fct (ftf)""" matrix = rdmolops.Get3DDistanceMatrix(self.mol) conf = self.mol.GetConformer() coords = conf.GetPositions() # centroid ctd = rdMolTransforms.ComputeCentroid(conf) # closest to centroid min_dist = 100 for atom in self.mol.GetAtoms(): point = rdGeometry.Point3D(*coords[atom.GetIdx()]) dist = ctd.Distance(point) if dist < min_dist: min_dist = dist cst = point cst_idx = atom.GetIdx() # farthest from cst fct_idx = argmax(matrix[cst_idx]) fct = rdGeometry.Point3D(*coords[fct_idx]) # farthest from fct ftf_idx = argmax(matrix[fct_idx]) ftf = rdGeometry.Point3D(*coords[ftf_idx]) return ctd, cst, fct, ftf
def translate(mol: Chem.rdchem.Mol, new_centroid: Union[np.ndarray, List[int]], conf_id: int = -1): """Move a given conformer of a molecule to a new position. The transformation is performed in place. Args: mol: the molecule. new_centroid: the new position to move to of shape [x, y, z] conf_id: id of the conformer. """ # Get conformer conf = mol.GetConformer(conf_id) # Compute the vector for translation mol_center = rdMolTransforms.ComputeCentroid(conf) mol_center = np.array([mol_center.x, mol_center.y, mol_center.z]) # Make the transformation matrix T = np.eye(4) T[:3, 3] = new_centroid - mol_center # Transform rdMolTransforms.TransformConformer(conf, T)
def find_ligand_site_event(nx, ny, nz, ex, ey, ez, lig_strings, pandda_model_path): # nn = native_centroid n, en = event_centroid n event_centroid = [ex, ey, ez] native_centroid = [nx, ny, nz] event_displacement = np.linalg.norm([native_centroid, event_centroid]) lig_distances = [] lig_centres = [] for lig in lig_strings: lig_pdb = [] for line in open(pandda_model_path): if lig in line: lig_pdb.append(line) lig_pdb = (''.join(lig_pdb)) mol = Chem.MolFromPDBBlock(lig_pdb) conf = mol.GetConformer() centre = rdMolTransforms.ComputeCentroid(conf) lig_centre = [centre.x, centre.y, centre.z] lig_centres.append(lig_centre) matrix = [lig_centre, event_centroid] dist = np.linalg.norm(matrix) lig_event_dist = abs(event_displacement-dist) lig_distances.append(lig_event_dist) min_dist = min(lig_distances) for j in range(0, len(lig_distances)): if lig_distances[j] == min_dist: ind = j ligand = lig_strings[ind] lig_centroid = lig_centres[ind] return ligand, lig_centroid, min_dist, event_displacement
def __init__(self, mol): """Initialize the ligand from an rdkit mol object""" self.mol = mol # Set Centroid self.coordinates = self.mol.GetConformer().GetPositions() self.centroid = rdMolTransforms.ComputeCentroid( self.mol.GetConformer())
def __init__(self, mol): self.mol = mol # RDkit molecule self.resname = self.mol.GetProp( 'resname') # unique identifier for the residue self.coordinates = self.mol.GetConformer().GetPositions( ) # atomic coordinates of the residue self.centroid = rdMolTransforms.ComputeCentroid( self.mol.GetConformer()) # centroid of the residue
def run(self): # open ligand mol2 file (generated during PrepLigand) ligand = os.path.join(self.root_dir, self.docking_dir, self.ligand_pdbqt.replace('_prepared.pdbqt', '.mol2')) # create an rdkit mol from ligand mol = Chem.MolFromMol2File(ligand) if mol==None: ## convert to mol with obabel obConv = openbabel.OBConversion() obConv.SetInAndOutFormats('mol2', 'mol') mol = openbabel.OBMol() # read pdb and write pdbqt obConv.ReadFile(mol, ligand) obConv.WriteFile(mol, ligand.replace('.mol2', '.mol')) ligand = ligand.replace('.mol2', '.mol') mol = Chem.MolFromMolFile(ligand) # get the ligand conformer and find its' centroid conf = mol.GetConformer() centre = rdMolTransforms.ComputeCentroid(conf) # out = centre.x, centre.y and centre.z for coords # box size allowed for vina box_size = eval(self.box_size) # name of output file from vina out_name = str(self.ligand_pdbqt).replace('.pdbqt', '_vinaout.pdbqt') params = [ '--receptor', os.path.join(self.root_dir, self.docking_dir, self.receptor_pdbqt), '--ligand', os.path.join(self.root_dir, self.docking_dir, self.ligand_pdbqt), '--center_x', centre.x, '--center_y', centre.y, '--center_z', centre.z, '--size_x', str(box_size[0]), '--size_y', str(box_size[1]), '--size_z', str(box_size[2]), '--out', out_name ] parameters = ' '.join(str(v) for v in params) write_job(job_directory=os.path.join(self.root_dir, self.docking_dir), job_filename=self.job_filename, job_name=self.job_name, job_executable=self.vina_exe, job_options=parameters) submit_job(job_directory=os.path.join(self.root_dir, self.docking_dir), job_script=self.job_filename)
def process_molecule(self, pdb_file, use_esp=False): """ Processes a molecule from the passed PDB file if the file contents has no errors. :param pdb_file: path to the PDB file to process the molecule from. :return: a ProcessedMolecule object """ # NOTE: Gasteiger is an inappropriate algorithm for ESP calculation of proteins! # read a molecule from the PDB file try: mol = Chem.MolFromPDBFile(molFileName=pdb_file, removeHs=False, sanitize=True) except IOError: log.warning("Could not read PDB file.") return None if mol is None: log.warning("Bad pdb file found.") return None if use_esp: try: # add missing hydrogen atoms mol = rdMO.AddHs(mol, addCoords=True) # compute partial charges rdPC.ComputeGasteigerCharges(mol, throwOnParamFailure=True) except ValueError: log.warning("Bad Gasteiger charge evaluation.") return None # get the conformation of the molecule conformer = mol.GetConformer() # calculate the center of the molecule center = rdMT.ComputeCentroid(conformer, ignoreHs=False) atoms_count = mol.GetNumAtoms() atoms = mol.GetAtoms() def get_coords(i): coord = conformer.GetAtomPosition(i) return np.asarray([coord.x, coord.y, coord.z]) # set the coordinates, charges, VDW radii and atom count res = { "coords": np.asarray( [get_coords(i) for i in range(0, atoms_count)]) - np.asarray( [center.x, center.y, center.z]), "vdwradii": np.asarray( [self.periodic_table.GetRvdw(atom.GetAtomicNum()) for atom in atoms]) } if use_esp: res['charges'] = np.asarray([float(atom.GetProp("_GasteigerCharge")) for atom in atoms]) return res
def test1Canonicalization(self): mol = Chem.MolFromSmiles("C") conf = Chem.Conformer(1) conf.SetAtomPosition(0, (4.0, 5.0, 6.0)) mol.AddConformer(conf, 1) conf = mol.GetConformer() pt = rdmt.ComputeCentroid(conf) self.failUnless(ptEq(pt, geom.Point3D(4.0, 5.0, 6.0))) fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolTransforms', 'test_data', '1oir.mol') m = Chem.MolFromMolFile(fileN) cpt = rdmt.ComputeCentroid(m.GetConformer()) trans = rdmt.ComputeCanonicalTransform(m.GetConformer(), cpt) trans2 = rdmt.ComputeCanonicalTransform(m.GetConformer()) for i in range(4): for j in range(4): self.failUnless(feq(trans[i, j], trans2[i, j])) rdmt.TransformConformer(m.GetConformer(), trans2) m2 = Chem.MolFromMolFile(fileN) rdmt.CanonicalizeConformer(m2.GetConformer()) nats = m.GetNumAtoms() cnf1 = m.GetConformer() cnf2 = m2.GetConformer() for i in range(nats): p1 = list(cnf1.GetAtomPosition(i)) p2 = list(cnf2.GetAtomPosition(i)) self.failUnless(feq(p1[0], p2[0])) self.failUnless(feq(p1[1], p2[1])) self.failUnless(feq(p1[2], p2[2])) m3 = Chem.MolFromMolFile(fileN) rdmt.CanonicalizeMol(m3) cnf1 = m.GetConformer() cnf2 = m3.GetConformer() for i in range(nats): p1 = list(cnf1.GetAtomPosition(i)) p2 = list(cnf2.GetAtomPosition(i)) self.failUnless(feq(p1[0], p2[0])) self.failUnless(feq(p1[1], p2[1])) self.failUnless(feq(p1[2], p2[2]))
def __init__(self, inputFile): """Initialize the ligand from a file""" self.inputFile = inputFile fileExtension = os.path.splitext(inputFile)[1] if fileExtension.lower() == '.mol2': logger.debug('Reading {}'.format(self.inputFile)) self.mol = Chem.MolFromMol2File(inputFile, sanitize=True, removeHs=False) else: raise ValueError('{} files are not supported for the ligand.'.format(fileExtension[1:].upper())) # Set Centroid self.coordinates = self.mol.GetConformer().GetPositions() self.centroid = rdMolTransforms.ComputeCentroid(self.mol.GetConformer()) logger.debug('Set ligand centroid to {:.3f} {:.3f} {:.3f}'.format(*[c for c in self.centroid]))
def get_center(self, xyz): """ get the molecular center for a transformed xyz """ if self.smile is None: return np.mean(xyz, axis=0) else: # from rdkit from rdkit.Geometry import Point3D from rdkit.Chem import rdMolTransforms as rdmt conf1 = self.rdkit_mol(self.smile).GetConformer(0) for i in range(conf1.GetNumAtoms()): x, y, z = xyz[i] conf1.SetAtomPosition(i, Point3D(x,y,z)) pt = rdmt.ComputeCentroid(conf1) return np.array([pt.x, pt.y, pt.z])
def prepare_vina_job(docking_dir, prepared_receptor, prepared_ligand, vina_exe, box_size, job_fname, job_name): ligand = os.path.join(docking_dir, prepared_ligand.replace('.pdbqt', '.mol2')) os.chdir(docking_dir) # create an rdkit mol from ligand mol = Chem.MolFromMol2File(ligand) # get the ligand conformer and find its' centroid conf = mol.GetConformer() centre = rdMolTransforms.ComputeCentroid(conf) # out = centre.x, centre.y and centre.z for coords # pdbqt name for results of vina vina_out = str(''.join(prepared_ligand.split('.')[:-1]) + '_vinaout.pdbqt') # vina options params = [ '--receptor', os.path.join(docking_dir, prepared_receptor), '--ligand', os.path.join(docking_dir, prepared_ligand), '--center_x', centre.x, '--center_y', centre.y, '--center_z', centre.z, '--size_x', str(box_size[0]), '--size_y', str(box_size[1]), '--size_z', str(box_size[2]), '--out', vina_out ] # parse options into string for vina parameters = ' '.join(str(v) for v in params) # write job file to run wherever write_job(directory=docking_dir, name=job_name, fname=job_fname, exe=vina_exe, options=parameters)
def test1Shape(self): fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'ShapeHelpers', 'test_data', '1oir.mol') m = Chem.MolFromMolFile(fileN) rdmt.CanonicalizeMol(m) dims1, offset1 = rdshp.ComputeConfDimsAndOffset(m.GetConformer()) grd = geom.UniformGrid3D(30.0, 16.0, 10.0) rdshp.EncodeShape(m, grd, 0) ovect = grd.GetOccupancyVect() self.failUnless(ovect.GetTotalVal() == 9250) m = Chem.MolFromMolFile(fileN) trans = rdmt.ComputeCanonicalTransform(m.GetConformer()) dims, offset = rdshp.ComputeConfDimsAndOffset(m.GetConformer(), trans=trans) dims -= dims1 offset -= offset1 self.failUnless(feq(dims.Length(), 0.0)) self.failUnless(feq(offset.Length(), 0.0)) grd1 = geom.UniformGrid3D(30.0, 16.0, 10.0) rdshp.EncodeShape(m, grd1, 0, trans) ovect = grd1.GetOccupancyVect() self.failUnless(ovect.GetTotalVal() == 9250) grd2 = geom.UniformGrid3D(30.0, 16.0, 10.0) rdshp.EncodeShape(m, grd2, 0) fileN2 = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'ShapeHelpers', 'test_data', '1oir_conf.mol') m2 = Chem.MolFromMolFile(fileN2) rmsd = rdMolAlign.AlignMol(m, m2) self.failUnless(feq(rdshp.ShapeTanimotoDist(m, m2), 0.2813)) dist = rdshp.ShapeTanimotoDist(mol1=m, mol2=m2, confId1=0, confId2=0, gridSpacing=0.25, stepSize=0.125) self.failUnless(feq(dist, 0.3021)) m = Chem.MolFromMolFile(fileN) cpt = rdmt.ComputeCentroid(m.GetConformer()) dims, offset = rdshp.ComputeConfDimsAndOffset(m.GetConformer()) grd = geom.UniformGrid3D(dims.x, dims.y, dims.z, 0.5, DataStructs.DiscreteValueType.TWOBITVALUE, offset) dims -= geom.Point3D(13.927, 16.97, 9.775) offset -= geom.Point3D(-4.353, 16.829, 2.782) self.failUnless(feq(dims.Length(), 0.0)) self.failUnless(feq(offset.Length(), 0.0)) rdshp.EncodeShape(m, grd, 0) ovect = grd.GetOccupancyVect() self.failUnless(ovect.GetTotalVal() == 9275) geom.WriteGridToFile(grd, '1oir_shape.grd') m = Chem.MolFromMolFile(fileN) lc, uc = rdshp.ComputeConfBox(m.GetConformer()) rdmt.CanonicalizeMol(m) lc1, uc1 = rdshp.ComputeConfBox(m.GetConformer()) lc2, uc2 = rdshp.ComputeUnionBox((lc, uc), (lc1, uc1)) lc -= geom.Point3D(-4.353, 16.829, 2.782) uc -= geom.Point3D(9.574, 33.799, 12.557) self.failUnless(feq(lc.Length(), 0.0)) self.failUnless(feq(uc.Length(), 0.0)) lc1 -= geom.Point3D(-10.7519, -6.0778, -3.0123) uc1 -= geom.Point3D(8.7163, 5.3279, 3.1621) self.failUnless(feq(lc1.Length(), 0.0)) self.failUnless(feq(uc1.Length(), 0.0)) lc2 -= geom.Point3D(-10.7519, -6.0778, -3.01226) uc2 -= geom.Point3D(9.574, 33.799, 12.557) self.failUnless(feq(lc2.Length(), 0.0)) self.failUnless(feq(uc2.Length(), 0.0))
def process_molecule(self, pdb_file): """ Splits the molecules into separate channels. :param pdb_file: the pdb file to be processed :return: a dictionary of the coordinates and vdwradii for each channel """ hydro_file_name = '_hydrogenized.'.join( os.path.basename(pdb_file).split('.')) hydrogenized_pdb_file = os.path.join(os.path.dirname(pdb_file), hydro_file_name) try: mol_rdkit = Chem.MolFromPDBFile(molFileName=pdb_file, removeHs=False, sanitize=True) if mol_rdkit is not None: mol_rdkit = rdMO.AddHs(mol_rdkit, addCoords=True) # get the conformation of the molecule conformer = mol_rdkit.GetConformer() # calculate the center of the molecule center = rdMT.ComputeCentroid(conformer, ignoreHs=False) mol_center = np.asarray([center.x, center.y, center.z]) else: raise ValueError pdbw = Chem.rdmolfiles.PDBWriter(fileName=hydrogenized_pdb_file) pdbw.write(mol_rdkit) pdbw.flush() pdbw.close() del mol_rdkit, pdbw except (IOError, ValueError): log.warning("Bad PDB file.") return None try: mol = pd.parsePDB(hydrogenized_pdb_file) except IOError: log.warning("Could not read PDB file.") return None if mol is None: log.warning("Bad pdb file found.") return None std_amino_acids = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', 'TYR', 'VAL'] canonical_notation = lambda x: x[0].upper() + x[1:].lower() if len( x) > 1 else x res = {'coords': mol.getCoords() - mol_center, 'vdwradii': np.asarray([self.periodic_table.GetRvdw( self.periodic_table.GetAtomicNumber( canonical_notation(atom))) for atom in mol.getElements()])} # find the data for all the 20 amino acids for aa in std_amino_acids: all_aas_in_mol = mol.select('resname ' + aa) if all_aas_in_mol is not None: mask = all_aas_in_mol.getIndices() else: mask = np.array([], dtype=np.int32) res['coords_' + aa] = res['coords'][mask, :] res['vdwradii_' + aa] = res['vdwradii'][mask] # find the data for the backbones backbone_mask = mol.backbone.getIndices() res['coords_backbone'] = res['coords'][backbone_mask, :] res['vdwradii_backbone'] = res['vdwradii'][backbone_mask] # find the data for the heavy atoms (i.e. no H atoms) heavy_mask = mol.heavy.getIndices() res['coords_heavy'] = res['coords'][heavy_mask, :] res['vdwradii_heavy'] = res['vdwradii'][heavy_mask] # find the data for the heavy atoms (i.e. no H atoms) hydro_mask = mol.hydrogen.getIndices() res['coords_hydro'] = res['coords'][hydro_mask, :] res['vdwradii_hydro'] = res['vdwradii'][hydro_mask] return res