Esempio n. 1
0
    def align(self, conf, reflect=False):
        """
        Align the molecule and return the xyz
        The default CanonicalizeConformer function may also include inversion
        """
        from rdkit.Chem import rdMolTransforms as rdmt

        #rotation
        #print("align: "); print(conf.GetPositions())
        trans = rdmt.ComputeCanonicalTransform(conf)
        if np.linalg.det(trans[:3,:3]) < 0:
            trans[:3,:3] *= -1

        if reflect:
            trans[:3,:3] *= -1
        #print(trans)
        rdmt.TransformConformer(conf, trans)
        
        #print("rot", conf.GetPositions()[:3])
        #translation
        pt = rdmt.ComputeCentroid(conf)
        center = np.array([pt.x, pt.y, pt.z])
        xyz = conf.GetPositions() - center
        #print("return", xyz[:3])
        return xyz
Esempio n. 2
0
 def canonicalize_conf_rdkit(mol, conf_id=-1):
     mol = Chem.Mol(mol)
     conf = mol.GetConformer(conf_id)
     ctd = rdmt.ComputeCentroid(conf)
     canon_trans = rdmt.ComputeCanonicalTransform(conf, ctd)
     rdmt.TransformConformer(conf, canon_trans)
     return mol
Esempio n. 3
0
def centerMol(mol):
    from rdkit.Chem import rdMolTransforms
    conf = mol.GetConformer()
    pt = rdMolTransforms.ComputeCentroid(conf)
    for i in range(conf.GetNumAtoms()):
        conf.SetAtomPosition(i, conf.GetAtomPosition(i) - pt)
    return (mol)
Esempio n. 4
0
    def get_USRlike_atoms(self):
        """Returns 4 rdkit Point3D objects similar to those used in USR:
        - centroid (ctd)
        - closest to ctd (cst)
        - farthest from cst (fct) (usually ctd but let's avoid computing too many dist matrices)
        - farthest from fct (ftf)"""
        matrix = rdmolops.Get3DDistanceMatrix(self.mol)
        conf = self.mol.GetConformer()
        coords = conf.GetPositions()

        # centroid
        ctd = rdMolTransforms.ComputeCentroid(conf)

        # closest to centroid
        min_dist = 100
        for atom in self.mol.GetAtoms():
            point = rdGeometry.Point3D(*coords[atom.GetIdx()])
            dist = ctd.Distance(point)
            if dist < min_dist:
                min_dist = dist
                cst = point
                cst_idx = atom.GetIdx()

        # farthest from cst
        fct_idx = argmax(matrix[cst_idx])
        fct = rdGeometry.Point3D(*coords[fct_idx])

        # farthest from fct
        ftf_idx = argmax(matrix[fct_idx])
        ftf = rdGeometry.Point3D(*coords[ftf_idx])

        return ctd, cst, fct, ftf
Esempio n. 5
0
def translate(mol: Chem.rdchem.Mol,
              new_centroid: Union[np.ndarray, List[int]],
              conf_id: int = -1):
    """Move a given conformer of a molecule to a new position. The transformation is performed
    in place.

    Args:
        mol: the molecule.
        new_centroid: the new position to move to of shape [x, y, z]
        conf_id: id of the conformer.
    """

    # Get conformer
    conf = mol.GetConformer(conf_id)

    # Compute the vector for translation
    mol_center = rdMolTransforms.ComputeCentroid(conf)
    mol_center = np.array([mol_center.x, mol_center.y, mol_center.z])

    # Make the transformation matrix
    T = np.eye(4)
    T[:3, 3] = new_centroid - mol_center

    # Transform
    rdMolTransforms.TransformConformer(conf, T)
Esempio n. 6
0
def find_ligand_site_event(nx, ny, nz, ex, ey, ez, lig_strings, pandda_model_path):
    # nn = native_centroid n, en = event_centroid n
    event_centroid = [ex, ey, ez]
    native_centroid = [nx, ny, nz]
    event_displacement = np.linalg.norm([native_centroid, event_centroid])
    lig_distances = []
    lig_centres = []
    for lig in lig_strings:
        lig_pdb = []

        for line in open(pandda_model_path):
            if lig in line:
                lig_pdb.append(line)
        lig_pdb = (''.join(lig_pdb))
        mol = Chem.MolFromPDBBlock(lig_pdb)
        conf = mol.GetConformer()
        centre = rdMolTransforms.ComputeCentroid(conf)
        lig_centre = [centre.x, centre.y, centre.z]
        lig_centres.append(lig_centre)

        matrix = [lig_centre, event_centroid]
        dist = np.linalg.norm(matrix)

        lig_event_dist = abs(event_displacement-dist)
        lig_distances.append(lig_event_dist)

    min_dist = min(lig_distances)
    for j in range(0, len(lig_distances)):
        if lig_distances[j] == min_dist:
            ind = j

    ligand = lig_strings[ind]
    lig_centroid = lig_centres[ind]

    return ligand, lig_centroid, min_dist, event_displacement
Esempio n. 7
0
 def __init__(self, mol):
     """Initialize the ligand from an rdkit mol object"""
     self.mol = mol
     # Set Centroid
     self.coordinates = self.mol.GetConformer().GetPositions()
     self.centroid = rdMolTransforms.ComputeCentroid(
         self.mol.GetConformer())
Esempio n. 8
0
 def __init__(self, mol):
     self.mol = mol  # RDkit molecule
     self.resname = self.mol.GetProp(
         'resname')  # unique identifier for the residue
     self.coordinates = self.mol.GetConformer().GetPositions(
     )  # atomic coordinates of the residue
     self.centroid = rdMolTransforms.ComputeCentroid(
         self.mol.GetConformer())  # centroid of the residue
Esempio n. 9
0
    def run(self):
        # open ligand mol2 file (generated during PrepLigand)
        ligand = os.path.join(self.root_dir, self.docking_dir, self.ligand_pdbqt.replace('_prepared.pdbqt', '.mol2'))

        # create an rdkit mol from ligand
        mol = Chem.MolFromMol2File(ligand)

        if mol==None:
            ## convert to mol with obabel
            obConv = openbabel.OBConversion()
            obConv.SetInAndOutFormats('mol2', 'mol')

            mol = openbabel.OBMol()

            # read pdb and write pdbqt
            obConv.ReadFile(mol, ligand)
            obConv.WriteFile(mol, ligand.replace('.mol2', '.mol'))

            ligand = ligand.replace('.mol2', '.mol')
            mol = Chem.MolFromMolFile(ligand)

        # get the ligand conformer and find its' centroid
        conf = mol.GetConformer()
        centre = rdMolTransforms.ComputeCentroid(conf)  # out = centre.x, centre.y and centre.z for coords

        # box size allowed for vina
        box_size = eval(self.box_size)

        # name of output file from vina
        out_name = str(self.ligand_pdbqt).replace('.pdbqt', '_vinaout.pdbqt')

        params = [
            '--receptor',
            os.path.join(self.root_dir, self.docking_dir, self.receptor_pdbqt),
            '--ligand',
            os.path.join(self.root_dir, self.docking_dir, self.ligand_pdbqt),
            '--center_x',
            centre.x,
            '--center_y',
            centre.y,
            '--center_z',
            centre.z,
            '--size_x',
            str(box_size[0]),
            '--size_y',
            str(box_size[1]),
            '--size_z',
            str(box_size[2]),
            '--out',
            out_name
        ]

        parameters = ' '.join(str(v) for v in params)

        write_job(job_directory=os.path.join(self.root_dir, self.docking_dir), job_filename=self.job_filename,
                  job_name=self.job_name, job_executable=self.vina_exe, job_options=parameters)

        submit_job(job_directory=os.path.join(self.root_dir, self.docking_dir), job_script=self.job_filename)
Esempio n. 10
0
    def process_molecule(self, pdb_file, use_esp=False):
        """
        Processes a molecule from the passed PDB file if the file contents has
        no errors.
        :param pdb_file: path to the PDB file to process the molecule from.
        :return: a ProcessedMolecule object
        """

        # NOTE: Gasteiger is an inappropriate algorithm for ESP calculation of proteins!
        # read a molecule from the PDB file
        try:
            mol = Chem.MolFromPDBFile(molFileName=pdb_file, removeHs=False,
                                      sanitize=True)
        except IOError:
            log.warning("Could not read PDB file.")
            return None

        if mol is None:
            log.warning("Bad pdb file found.")
            return None

        if use_esp:
            try:
                # add missing hydrogen atoms
                mol = rdMO.AddHs(mol, addCoords=True)
                # compute partial charges
                rdPC.ComputeGasteigerCharges(mol, throwOnParamFailure=True)
            except ValueError:
                log.warning("Bad Gasteiger charge evaluation.")
                return None

        # get the conformation of the molecule
        conformer = mol.GetConformer()

        # calculate the center of the molecule
        center = rdMT.ComputeCentroid(conformer, ignoreHs=False)

        atoms_count = mol.GetNumAtoms()
        atoms = mol.GetAtoms()

        def get_coords(i):
            coord = conformer.GetAtomPosition(i)
            return np.asarray([coord.x, coord.y, coord.z])

        # set the coordinates, charges, VDW radii and atom count
        res = {
            "coords": np.asarray(
                [get_coords(i) for i in range(0, atoms_count)]) - np.asarray(
                [center.x, center.y, center.z]),
            "vdwradii": np.asarray(
                [self.periodic_table.GetRvdw(atom.GetAtomicNum()) for atom in
                 atoms])
        }
        if use_esp:
            res['charges'] = np.asarray([float(atom.GetProp("_GasteigerCharge")) for atom in atoms])
        return res
Esempio n. 11
0
    def test1Canonicalization(self):
        mol = Chem.MolFromSmiles("C")
        conf = Chem.Conformer(1)
        conf.SetAtomPosition(0, (4.0, 5.0, 6.0))
        mol.AddConformer(conf, 1)

        conf = mol.GetConformer()
        pt = rdmt.ComputeCentroid(conf)
        self.failUnless(ptEq(pt, geom.Point3D(4.0, 5.0, 6.0)))

        fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol',
                             'MolTransforms', 'test_data', '1oir.mol')
        m = Chem.MolFromMolFile(fileN)
        cpt = rdmt.ComputeCentroid(m.GetConformer())
        trans = rdmt.ComputeCanonicalTransform(m.GetConformer(), cpt)
        trans2 = rdmt.ComputeCanonicalTransform(m.GetConformer())
        for i in range(4):
            for j in range(4):
                self.failUnless(feq(trans[i, j], trans2[i, j]))
        rdmt.TransformConformer(m.GetConformer(), trans2)
        m2 = Chem.MolFromMolFile(fileN)
        rdmt.CanonicalizeConformer(m2.GetConformer())
        nats = m.GetNumAtoms()
        cnf1 = m.GetConformer()
        cnf2 = m2.GetConformer()
        for i in range(nats):
            p1 = list(cnf1.GetAtomPosition(i))
            p2 = list(cnf2.GetAtomPosition(i))
            self.failUnless(feq(p1[0], p2[0]))
            self.failUnless(feq(p1[1], p2[1]))
            self.failUnless(feq(p1[2], p2[2]))

        m3 = Chem.MolFromMolFile(fileN)
        rdmt.CanonicalizeMol(m3)
        cnf1 = m.GetConformer()
        cnf2 = m3.GetConformer()
        for i in range(nats):
            p1 = list(cnf1.GetAtomPosition(i))
            p2 = list(cnf2.GetAtomPosition(i))
            self.failUnless(feq(p1[0], p2[0]))
            self.failUnless(feq(p1[1], p2[1]))
            self.failUnless(feq(p1[2], p2[2]))
Esempio n. 12
0
 def __init__(self, inputFile):
     """Initialize the ligand from a file"""
     self.inputFile = inputFile
     fileExtension = os.path.splitext(inputFile)[1]
     if fileExtension.lower() == '.mol2':
         logger.debug('Reading {}'.format(self.inputFile))
         self.mol = Chem.MolFromMol2File(inputFile, sanitize=True, removeHs=False)
     else:
         raise ValueError('{} files are not supported for the ligand.'.format(fileExtension[1:].upper()))
     # Set Centroid
     self.coordinates = self.mol.GetConformer().GetPositions()
     self.centroid = rdMolTransforms.ComputeCentroid(self.mol.GetConformer())
     logger.debug('Set ligand centroid to {:.3f} {:.3f} {:.3f}'.format(*[c for c in self.centroid]))
Esempio n. 13
0
    def get_center(self, xyz):
        """
        get the molecular center for a transformed xyz
        """
        if self.smile is None:
            return np.mean(xyz, axis=0)
        else:
            # from rdkit
            from rdkit.Geometry import Point3D
            from rdkit.Chem import rdMolTransforms as rdmt

            conf1 = self.rdkit_mol(self.smile).GetConformer(0)
            for i in range(conf1.GetNumAtoms()):
                x, y, z = xyz[i]
                conf1.SetAtomPosition(i, Point3D(x,y,z))
            pt = rdmt.ComputeCentroid(conf1)
            return np.array([pt.x, pt.y, pt.z])
Esempio n. 14
0
def prepare_vina_job(docking_dir, prepared_receptor, prepared_ligand, vina_exe, box_size, job_fname, job_name):
    ligand = os.path.join(docking_dir, prepared_ligand.replace('.pdbqt', '.mol2'))

    os.chdir(docking_dir)
    # create an rdkit mol from ligand
    mol = Chem.MolFromMol2File(ligand)

    # get the ligand conformer and find its' centroid
    conf = mol.GetConformer()
    centre = rdMolTransforms.ComputeCentroid(conf)  # out = centre.x, centre.y and centre.z for coords

    # pdbqt name for results of vina
    vina_out = str(''.join(prepared_ligand.split('.')[:-1]) + '_vinaout.pdbqt')

    # vina options
    params = [
        '--receptor',
        os.path.join(docking_dir, prepared_receptor),
        '--ligand',
        os.path.join(docking_dir, prepared_ligand),
        '--center_x',
        centre.x,
        '--center_y',
        centre.y,
        '--center_z',
        centre.z,
        '--size_x',
        str(box_size[0]),
        '--size_y',
        str(box_size[1]),
        '--size_z',
        str(box_size[2]),
        '--out',
        vina_out
    ]

    # parse options into string for vina
    parameters = ' '.join(str(v) for v in params)

    # write job file to run wherever
    write_job(directory=docking_dir, name=job_name, fname=job_fname, exe=vina_exe, options=parameters)
Esempio n. 15
0
    def test1Shape(self):
        fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol',
                             'ShapeHelpers', 'test_data', '1oir.mol')
        m = Chem.MolFromMolFile(fileN)
        rdmt.CanonicalizeMol(m)
        dims1, offset1 = rdshp.ComputeConfDimsAndOffset(m.GetConformer())
        grd = geom.UniformGrid3D(30.0, 16.0, 10.0)
        rdshp.EncodeShape(m, grd, 0)
        ovect = grd.GetOccupancyVect()
        self.failUnless(ovect.GetTotalVal() == 9250)

        m = Chem.MolFromMolFile(fileN)
        trans = rdmt.ComputeCanonicalTransform(m.GetConformer())
        dims, offset = rdshp.ComputeConfDimsAndOffset(m.GetConformer(),
                                                      trans=trans)
        dims -= dims1
        offset -= offset1
        self.failUnless(feq(dims.Length(), 0.0))
        self.failUnless(feq(offset.Length(), 0.0))

        grd1 = geom.UniformGrid3D(30.0, 16.0, 10.0)
        rdshp.EncodeShape(m, grd1, 0, trans)
        ovect = grd1.GetOccupancyVect()

        self.failUnless(ovect.GetTotalVal() == 9250)

        grd2 = geom.UniformGrid3D(30.0, 16.0, 10.0)
        rdshp.EncodeShape(m, grd2, 0)

        fileN2 = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol',
                              'ShapeHelpers', 'test_data', '1oir_conf.mol')
        m2 = Chem.MolFromMolFile(fileN2)

        rmsd = rdMolAlign.AlignMol(m, m2)
        self.failUnless(feq(rdshp.ShapeTanimotoDist(m, m2), 0.2813))

        dist = rdshp.ShapeTanimotoDist(mol1=m,
                                       mol2=m2,
                                       confId1=0,
                                       confId2=0,
                                       gridSpacing=0.25,
                                       stepSize=0.125)
        self.failUnless(feq(dist, 0.3021))

        m = Chem.MolFromMolFile(fileN)
        cpt = rdmt.ComputeCentroid(m.GetConformer())
        dims, offset = rdshp.ComputeConfDimsAndOffset(m.GetConformer())

        grd = geom.UniformGrid3D(dims.x, dims.y, dims.z, 0.5,
                                 DataStructs.DiscreteValueType.TWOBITVALUE,
                                 offset)
        dims -= geom.Point3D(13.927, 16.97, 9.775)
        offset -= geom.Point3D(-4.353, 16.829, 2.782)
        self.failUnless(feq(dims.Length(), 0.0))
        self.failUnless(feq(offset.Length(), 0.0))
        rdshp.EncodeShape(m, grd, 0)

        ovect = grd.GetOccupancyVect()

        self.failUnless(ovect.GetTotalVal() == 9275)
        geom.WriteGridToFile(grd, '1oir_shape.grd')

        m = Chem.MolFromMolFile(fileN)
        lc, uc = rdshp.ComputeConfBox(m.GetConformer())
        rdmt.CanonicalizeMol(m)
        lc1, uc1 = rdshp.ComputeConfBox(m.GetConformer())

        lc2, uc2 = rdshp.ComputeUnionBox((lc, uc), (lc1, uc1))
        lc -= geom.Point3D(-4.353, 16.829, 2.782)
        uc -= geom.Point3D(9.574, 33.799, 12.557)
        self.failUnless(feq(lc.Length(), 0.0))
        self.failUnless(feq(uc.Length(), 0.0))

        lc1 -= geom.Point3D(-10.7519, -6.0778, -3.0123)
        uc1 -= geom.Point3D(8.7163, 5.3279, 3.1621)
        self.failUnless(feq(lc1.Length(), 0.0))
        self.failUnless(feq(uc1.Length(), 0.0))

        lc2 -= geom.Point3D(-10.7519, -6.0778, -3.01226)
        uc2 -= geom.Point3D(9.574, 33.799, 12.557)
        self.failUnless(feq(lc2.Length(), 0.0))
        self.failUnless(feq(uc2.Length(), 0.0))
Esempio n. 16
0
    def process_molecule(self, pdb_file):
        """
        Splits the molecules into separate channels.
        :param pdb_file: the pdb file to be processed
        :return: a dictionary of the coordinates and vdwradii for each channel
        """
        hydro_file_name = '_hydrogenized.'.join(
            os.path.basename(pdb_file).split('.'))
        hydrogenized_pdb_file = os.path.join(os.path.dirname(pdb_file),
                                             hydro_file_name)
        try:
            mol_rdkit = Chem.MolFromPDBFile(molFileName=pdb_file,
                                            removeHs=False, sanitize=True)
            if mol_rdkit is not None:
                mol_rdkit = rdMO.AddHs(mol_rdkit, addCoords=True)
                # get the conformation of the molecule
                conformer = mol_rdkit.GetConformer()
                # calculate the center of the molecule
                center = rdMT.ComputeCentroid(conformer, ignoreHs=False)
                mol_center = np.asarray([center.x, center.y, center.z])
            else:
                raise ValueError
            pdbw = Chem.rdmolfiles.PDBWriter(fileName=hydrogenized_pdb_file)
            pdbw.write(mol_rdkit)
            pdbw.flush()
            pdbw.close()
            del mol_rdkit, pdbw
        except (IOError, ValueError):
            log.warning("Bad PDB file.")
            return None

        try:
            mol = pd.parsePDB(hydrogenized_pdb_file)
        except IOError:
            log.warning("Could not read PDB file.")
            return None

        if mol is None:
            log.warning("Bad pdb file found.")
            return None

        std_amino_acids = ['ALA', 'ARG', 'ASN', 'ASP', 'CYS',
                           'GLN', 'GLU', 'GLY', 'HIS', 'ILE',
                           'LEU', 'LYS', 'MET', 'PHE', 'PRO',
                           'SER', 'THR', 'TRP', 'TYR', 'VAL']

        canonical_notation = lambda x: x[0].upper() + x[1:].lower() if len(
            x) > 1 else x
        res = {'coords': mol.getCoords() - mol_center,
               'vdwradii': np.asarray([self.periodic_table.GetRvdw(
                   self.periodic_table.GetAtomicNumber(
                       canonical_notation(atom)))
                                       for atom in mol.getElements()])}

        # find the data for all the 20 amino acids
        for aa in std_amino_acids:
            all_aas_in_mol = mol.select('resname ' + aa)
            if all_aas_in_mol is not None:
                mask = all_aas_in_mol.getIndices()
            else:
                mask = np.array([], dtype=np.int32)
            res['coords_' + aa] = res['coords'][mask, :]
            res['vdwradii_' + aa] = res['vdwradii'][mask]

        # find the data for the backbones
        backbone_mask = mol.backbone.getIndices()
        res['coords_backbone'] = res['coords'][backbone_mask, :]
        res['vdwradii_backbone'] = res['vdwradii'][backbone_mask]

        # find the data for the heavy atoms (i.e. no H atoms)
        heavy_mask = mol.heavy.getIndices()
        res['coords_heavy'] = res['coords'][heavy_mask, :]
        res['vdwradii_heavy'] = res['vdwradii'][heavy_mask]

        # find the data for the heavy atoms (i.e. no H atoms)
        hydro_mask = mol.hydrogen.getIndices()
        res['coords_hydro'] = res['coords'][hydro_mask, :]
        res['vdwradii_hydro'] = res['vdwradii'][hydro_mask]

        return res