Ejemplo n.º 1
0
    def test13CrippenO3A(self):
        " test CrippenO3A with variable weight constraints followed by local-only optimization "

        sdf = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolAlign',
                           'test_data', 'ref_e2.sdf')
        # alignedSdf = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol',
        #                           'MolAlign', 'test_data', 'localonly.sdf')
        molS = Chem.SDMolSupplier(sdf, True, False)
        refNum = 23
        prbNum = 32
        refMol = molS[refNum]
        prbMol = molS[prbNum]
        refPyMP = ChemicalForceFields.MMFFGetMoleculeProperties(refMol)
        prbPyMP = ChemicalForceFields.MMFFGetMoleculeProperties(prbMol)
        refSIdx = refMol.GetSubstructMatch(Chem.MolFromSmarts('S'))[0]
        prbOIdx = prbMol.GetSubstructMatch(Chem.MolFromSmarts('O'))[0]
        # molW = Chem.SDWriter(alignedSdf)
        # molW.write(refMol)
        weights = [0.1, 100.0]
        distOS = [2.7, 0.4]
        for i in [0, 1]:
            pyO3A = rdMolAlign.GetCrippenO3A(prbMol,
                                             refMol,
                                             constraintMap=[[prbOIdx,
                                                             refSIdx]],
                                             constraintWeights=[weights[i]])
            pyO3A.Align()
            # molW.write(prbMol)
            pyO3A = rdMolAlign.GetCrippenO3A(prbMol, refMol, options=4)
            pyO3A.Align()
            # molW.write(prbMol)
            d = prbMol.GetConformer().GetAtomPosition(prbOIdx). \
              Distance(refMol.GetConformer().GetAtomPosition(refSIdx))
            self.assertAlmostEqual(d, distOS[i], 1)
Ejemplo n.º 2
0
    def test12CrippenO3A(self):
        " test CrippenO3A with constraints "

        #we superimpose two identical coplanar 4-phenylpyridines:
        #1) the usual way
        #2) forcing the pyridine nitrogen to match with the para
        #   carbon of the phenyl ring
        m = Chem.MolFromSmiles('n1ccc(cc1)-c1ccccc1')
        m1 = Chem.AddHs(m)
        rdDistGeom.EmbedMolecule(m1)
        mp = ChemicalForceFields.MMFFGetMoleculeProperties(m1)
        ff = ChemicalForceFields.MMFFGetMoleculeForceField(m1, mp)
        ff.Minimize()
        sub1 = m1.GetSubstructMatch(Chem.MolFromSmarts('nccc-cccc'))
        nIdx = sub1[0]
        cIdx = sub1[-1]
        dihe = sub1[2:6]
        rdMolTransforms.SetDihedralDeg(m1.GetConformer(), dihe[0], dihe[1],
                                       dihe[2], dihe[3], 0)
        m2 = copy.copy(m1)
        rdMolAlign.RandomTransform(m2)
        m3 = copy.copy(m2)
        pyO3A = rdMolAlign.GetCrippenO3A(m2, m1)
        pyO3A.Align()
        d = m2.GetConformer().GetAtomPosition(cIdx). \
          Distance(m1.GetConformer().GetAtomPosition(cIdx))
        self.assertAlmostEqual(d, 0, 0)
        pyO3A = rdMolAlign.GetCrippenO3A(m3, m1, constraintMap=[[cIdx, nIdx]])
        pyO3A.Align()
        d = m3.GetConformer().GetAtomPosition(cIdx). \
          Distance(m1.GetConformer().GetAtomPosition(cIdx))
        self.assertAlmostEqual(d, 7, 0)
Ejemplo n.º 3
0
    def test15MultiConfs(self):
        " test multi-conf alignment "
        sdf = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolAlign',
                           'test_data', 'ref_e2.sdf')
        suppl = Chem.SDMolSupplier(sdf, removeHs=False)
        refMol = suppl[13]
        sdf = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolAlign',
                           'test_data', 'probe_mol.sdf')
        prbSuppl = Chem.SDMolSupplier(sdf, removeHs=False)
        tms = [x for x in prbSuppl]
        prbMol = tms[0]
        for tm in tms[1:]:
            prbMol.AddConformer(tm.GetConformer(), True)
        self.failUnlessEqual(prbMol.GetNumConformers(), 50)

        refParams = ChemicalForceFields.MMFFGetMoleculeProperties(refMol)
        prbParams = ChemicalForceFields.MMFFGetMoleculeProperties(prbMol)
        cp = Chem.Mol(prbMol)
        o3s = rdMolAlign.GetO3AForProbeConfs(cp, refMol, 1, prbParams,
                                             refParams)
        for i in range(prbMol.GetNumConformers()):
            cp2 = Chem.Mol(prbMol)
            o3 = rdMolAlign.GetO3A(cp2, refMol, prbParams, refParams, prbCid=i)
            self.failUnlessAlmostEqual(o3s[i].Align(), o3.Align(), 6)
            self.failUnlessAlmostEqual(o3s[i].Score(), o3.Score(), 6)

        cp = Chem.Mol(prbMol)
        o3s = rdMolAlign.GetCrippenO3AForProbeConfs(cp, refMol)
        for i in range(prbMol.GetNumConformers()):
            cp2 = Chem.Mol(prbMol)
            o3 = rdMolAlign.GetCrippenO3A(cp2, refMol, prbCid=i)
            self.failUnlessAlmostEqual(o3s[i].Align(), o3.Align(), 6)
            self.failUnlessAlmostEqual(o3s[i].Score(), o3.Score(), 6)
Ejemplo n.º 4
0
def PerformAlignmentAndWrieOutput(RefMol, ProbeMol, RefMolName, ProbeMolName,
                                  Writer):
    """Perform alignment and write to output file."""

    Status = True
    try:
        if OptionsInfo["UseRMSD"]:
            RMSD = rdMolAlign.AlignMol(ProbeMol,
                                       RefMol,
                                       maxIters=OptionsInfo["MaxIters"])
        elif OptionsInfo["UseBestRMSD"]:
            RMSD = AllChem.GetBestRMS(RefMol, ProbeMol)
        elif OptionsInfo["UseOpen3A"]:
            O3A = rdMolAlign.GetO3A(ProbeMol, RefMol)
            Score = O3A.Align()
        elif OptionsInfo["UseCrippenOpen3A"]:
            CrippenO3A = rdMolAlign.GetCrippenO3A(ProbeMol, RefMol)
            Score = CrippenO3A.Align()
        else:
            MiscUtil.PrintError(
                "Alignment couldn't be performed: Specified alignment value, %s, is not supported"
                % OptionsInfo["Alignment"])
    except (RuntimeError, ValueError):
        Status = False
        MiscUtil.PrintWarning(
            "Alignment failed between reference molecule, %s, and probe molecule, %s.\nWriting unaligned probe molecule...\n"
            % (RefMolName, ProbeMolName))

    # Write out aligned probe molecule...
    Writer.write(ProbeMol)

    return Status
Ejemplo n.º 5
0
def PerformShapeAlignment(RefMol, ProbeMol):
    """Perform shape alignment and return alignment score."""
    
    if OptionsInfo["UseCrippenOpen3A"]:
        CrippenO3A = rdMolAlign.GetCrippenO3A(ProbeMol, RefMol)
        Score = CrippenO3A.Align()
    else:
        O3A = rdMolAlign.GetO3A(ProbeMol, RefMol)
        Score = O3A.Align()

    return Score
Ejemplo n.º 6
0
def align_set_of_ligands(ligands: Sequence) -> Tuple[List[Chem.Mol], List[float]]:
    """ Align a set of ligands to each other

        Parameters
        ----------
        ligands : list of rdkit.Chem.rdchem.Mol or rdkit.Chem.SmilesMolSupplier or rdkit.Chem.SDMolSupplier
            List of ligands.
        
        Returns
        ----------
        aligned_molecules : list of rdkit.Chem.rdchem.Mol
            List of aligned ligands.
        
        crippen_score : list of float
            List with crippen scores calculated during the alignment.

    """
    
    if not isinstance(ligands, list):
        ligands = list(ligands)

    molecules = copy.deepcopy(ligands)
    molecules = [generate_conformers(mol, 100) for mol in molecules]

    crippen_contribs = [rdMolDescriptors._CalcCrippenContribs(mol) for mol in molecules]
    crippen_ref_contrib = crippen_contribs[0]
    crippen_prob_contribs = crippen_contribs

    ref_mol = molecules[0]
    probe_mols = molecules

    crippen_score = []
    aligned_molecules = []
    for idx, mol in enumerate(probe_mols):
        tempscore = []
        
        for cid in range(100):
            crippenO3A = rdMolAlign.GetCrippenO3A(mol, ref_mol, crippen_prob_contribs[idx], crippen_ref_contrib, cid, 0)
            crippenO3A.Align()
            tempscore.append(crippenO3A.Score())
            
        best = np.argmax(tempscore)
        mol_string = Chem.MolToMolBlock(mol, confId=int(best))
        temp_mol = Chem.MolFromMolBlock(mol_string, removeHs=False)
        
        crippen_score.append(tempscore[best])
        aligned_molecules.append(temp_mol)
    
    return aligned_molecules, crippen_score
        
    
Ejemplo n.º 7
0
 def test11CrippenO3A(self):
     " now test where the Crippen parameters are generated on call "
     sdf = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'MolAlign',
                        'test_data', 'ref_e2.sdf')
     molS = Chem.SDMolSupplier(sdf, True, False)
     refNum = 48
     refMol = molS[refNum]
     cumScore = 0.0
     cumMsd = 0.0
     for prbMol in molS:
         pyO3A = rdMolAlign.GetCrippenO3A(prbMol, refMol)
         cumScore += pyO3A.Score()
         rmsd = pyO3A.Trans()[0]
         cumMsd += rmsd * rmsd
     cumMsd /= len(molS)
     self.assertAlmostEqual(cumScore, 4918, 0)
     self.assertAlmostEqual(math.sqrt(cumMsd), .304, 3)
Ejemplo n.º 8
0
 def test8CrippenO3A(self):
   sdf = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol',
                      'MolAlign', 'test_data', 'ref_e2.sdf')
   # alignedSdf = os.path.join(RDConfig.RDBaseDir,'Code','GraphMol',
   #                           'MolAlign', 'test_data', 'ref_e2_pyCrippenO3A.sdf')
   molS = Chem.SDMolSupplier(sdf, True, False)
   # molW = Chem.SDWriter(alignedSdf)
   refNum = 48
   refMol = molS[refNum]
   cumScore = 0.0
   cumMsd = 0.0
   refList = rdMolDescriptors._CalcCrippenContribs(refMol, True)
   for prbMol in molS:
     prbList = rdMolDescriptors._CalcCrippenContribs(prbMol, True)
     pyO3A = rdMolAlign.GetCrippenO3A(prbMol, refMol, prbList, refList)
     cumScore += pyO3A.Score()
     rmsd = pyO3A.Align()
     cumMsd += rmsd * rmsd
     # molW.write(prbMol)
   cumMsd /= len(molS)
   self.failUnlessAlmostEqual(cumScore,4918,0)
   self.failUnlessAlmostEqual(math.sqrt(cumMsd),.304,3)
Ejemplo n.º 9
0
def retrieve_features(df, Complex, molecules_3D, fragments, core, resname, positions_to_grow, bonded_atom, core_smile_pattern, labels=None, save=True):

   # Preprocess fragment
   features_test = pds.DataFrame(columns=COLUMNS) 
   true_labels = []
   print("nmolecules: {}\n  nfragments: {},\n nlabels: {}\n".format(len(molecules_3D), len(fragments), len(labels)))
   for i, (mol, fragment, bonded, label) in tqdm(enumerate(zip(molecules_3D, fragments, bonded_atom, labels)), total=len(fragments)):
       #Adapt ring to the right index
       next_ring = str(max([int(c) for c in core_smile_pattern if c.isdigit()]) + 1)
       for i in range(1, int(next_ring)):
           new_fragment = fragment.replace(str(i), next_ring)
           next_ring = str(int(next_ring) +  1)
           if new_fragment:
               fragment = new_fragment
           
       #mol = Chem.AddHs(mol)
       rec_info = []
       frag_info = [ 1 if i==bonded else 0 for i in range(positions_to_grow) ]
       atoms1 = pd.parsePDB(Complex)
       fragment = Chem.MolFromSmiles(fragment.strip("!"))

       # Count Heteroatoms
       n_carbon, n_nitrogen, n_oxigen = 0, 0, 0
       for atom in fragment.GetAtoms():
           if atom.GetAtomicNum() == 6: n_carbon += 1
           if atom.GetAtomicNum() == 7: n_nitrogen += 1
           if atom.GetAtomicNum() == 8: n_oxigen += 1
       frag_info.extend([n_carbon, n_nitrogen, n_oxigen])       

       # Fragment Properties
       fragment_features = collections.OrderedDict({ "Donor": 0, "Acceptor":0, "Aromatic": 0,  "Hydrophobe":0})
       for feature in factory.GetFeaturesForMol(fragment):
           feature_family  = feature.GetFamily()

       #Embed fragment properties
           if feature_family in fragment_features:
               fragment_features[feature_family] += 1
       # Calculate fragmetn volume
       #fragment_h = Chem.AddHs(fragment)
       #AllChem.EmbedMolecule(fragment_h)
       #fragment_volume = Chem.AllChem.ComputeMolVolume(fragment_h)
       #Embed information on a vector
       #frag_info = fragment_volume + fragment__features.values() 
       frag_info.extend(fragment_features.values())
       frag_info.append(len(fragment.GetAtoms()))
       frag_MACCS_fp = MACCSkeys.GenMACCSKeys(mol).ToBitString()
       for i in range(1, 167):
           frag_info.append(int(frag_MACCS_fp[i]))
       frag_info.append(Chem.rdMolDescriptors.CalcNumRotatableBonds(fragment))
       frag_info.append(Chem.rdMolDescriptors.CalcNumRings(fragment))
       frag_info.append(Chem.rdMolDescriptors.CalcNumHBD(fragment))
       frag_info.append(Chem.rdMolDescriptors.CalcNumHBA(fragment))
       frag_info.append(Chem.rdMolDescriptors.CalcNumHeteroatoms(fragment))
       frag_info.append(Chem.rdMolDescriptors.CalcNumAmideBonds(fragment))
       frag_info.append(Chem.rdMolDescriptors.CalcFractionCSP3(fragment))
       frag_info.append(Chem.rdMolDescriptors.CalcExactMolWt(fragment))
     
       # Align molecules
       molec = rdMolAlign.GetCrippenO3A(mol, core)
       molec.Align() 

       #Centroid of fragment
       fragment_atom_indxs = mol.GetSubstructMatch(fragment)
       molec_atoms = mol.GetAtoms()
       coords = [mol.GetConformer().GetAtomPosition(i) for i, atom in enumerate(molec_atoms) if i in fragment_atom_indxs ]
       try:
           fragment_centeroid = centeroidnp(np.array(coords))
       except IndexError:
           print("Molecule {} skipped".format(mol.GetProp("_Name")))
           continue
       frag_info = np.hstack([frag_info, fragment_centeroid])
       #Extract receptor info
       closer_residues = pd.parsePDB(Complex).select("protein within 10 of center", center=fragment_centeroid)
       distances = [ np.linalg.norm(fragment_centeroid-coords) for coords in closer_residues.getCoords() ]
       min_dist, average_dist, n_close_residues = np.min(distances), np.mean(distances), len(distances)
       residues = [0] * len(VOC_AMINOACIDS)
       for res in closer_residues.getResnames():
           idx = VOC_AMINOACIDS.index(res)
           residues[idx] += 1
       rec_info = np.hstack([np.array([min_dist, average_dist, n_close_residues]), np.array(residues)])
       fragment_receptor_info = np.hstack([frag_info, rec_info])
       
       # Save to general df
       df = df.append(pds.Series(fragment_receptor_info, index=COLUMNS), ignore_index=True)
       features_test = features_test.append(pds.Series(fragment_receptor_info, index=COLUMNS), ignore_index=True)
       true_labels.append(label)
   if save:
       df_save = features_test.copy()
       df_save["labels"] = true_labels
       df_save.to_csv("descriptors.csv")
   return df, features_test, true_labels
Ejemplo n.º 10
0
    m1 = Chem.MolFromMolBlock(m, removeHs=False)
    mol_list.append(m1)

cdk2mol = [m for m in mol_list]

cdk2mol2 = copy.deepcopy(cdk2mol)
crippen_contribs = [
    rdMolDescriptors._CalcCrippenContribs(mol) for mol in cdk2mol2
]
ref = cdk2mol_reference
ref_contrib = rdMolDescriptors._CalcCrippenContribs(ref)
targets = cdk2mol2[0:]
targets_contrib = crippen_contribs[0:]

for i, target in enumerate(targets):
    crippenO3A = rdMolAlign.GetCrippenO3A(target, ref, targets_contrib[i],
                                          ref_contrib)
    crippenO3A.Align()

v.DeleteAll()
v.ShowMol(ref, name='ref', showOnly=False)
for i in range(len(targets)):
    name = f'probe_{i}'
    v.ShowMol(targets[i], name=name, showOnly=False)

ref_mol_block = Chem.MolToMolBlock(ref_mol, removeHs=False)
hmols_1 = mol_list

crippen_contribs = [
    rdMolDescriptors._CalcCrippenContribs(mol) for mol in hmols_1
]
crippen_ref_contrib = crippen_contribs[0]
Ejemplo n.º 11
0
def main():

    parser = argparse.ArgumentParser(description='Open3DAlign with RDKit')
    parser.add_argument('query', help='query molfile')
    parser.add_argument(
        '--qmolidx',
        help="Query molecule index in SD file if not the first",
        type=int,
        default=1)
    parser.add_argument('--crippen',
                        action='store_true',
                        help='Use Crippen (logP) contributions')
    parser.add_argument(
        '-t',
        '--threshold',
        type=float,
        help='score cuttoff relative to alignment of query to itself')
    parser.add_argument(
        '-n',
        '--num',
        default=0,
        type=int,
        help=
        'number of conformers to generate, if None then input structures are assumed to already be 3D'
    )
    parser.add_argument('-a',
                        '--attempts',
                        default=0,
                        type=int,
                        help='number of attempts to generate conformers')
    parser.add_argument('-r',
                        '--rmsd',
                        type=float,
                        default=1.0,
                        help='prune RMSD threshold for excluding conformers')
    parser.add_argument(
        '-e',
        '--emin',
        type=int,
        default=0,
        help=
        'energy minimisation iterations for generated conformers (default of 0 means none)'
    )
    parameter_utils.add_default_io_args(parser)

    args = parser.parse_args()
    utils.log("o3dAlign Args: ", args)

    # TODO - handle molecules with multiple fragments
    # TODO - allow to specify threshold as fraction of perfect score?

    qmol = rdkit_utils.read_single_molecule(args.query, index=args.qmolidx)
    qmol = Chem.RemoveHs(qmol)
    qmol2 = Chem.Mol(qmol)

    source = "conformers.py"
    datasetMetaProps = {
        "source": source,
        "description": "Open3DAlign using RDKit " + rdBase.rdkitVersion
    }
    clsMappings = {"O3DAScore": "java.lang.Float"}
    fieldMetaProps = [{
        "fieldName": "O3DAScore",
        "values": {
            "source": source,
            "description": "Open3DAlign alignment score"
        }
    }]
    if args.num > 0:
        # we generate the conformers so will add energy info
        clsMappings["EnergyDelta"] = "java.lang.Float"
        clsMappings["EnergyAbs"] = "java.lang.Float"
        fieldMetaProps.append({
            "fieldName": "EnergyDelta",
            "values": {
                "source": source,
                "description": "Energy difference to lowest energy conformer"
            }
        })
        fieldMetaProps.append({
            "fieldName": "EnergyAbs",
            "values": {
                "source": source,
                "description": "Absolute energy"
            }
        })


    input,output,suppl,writer,output_base = rdkit_utils.\
        default_open_input_output(args.input, args.informat, args.output,
                                  'o3dAlign', args.outformat,
                                  valueClassMappings=clsMappings,
                                  datasetMetaProps=datasetMetaProps,
                                  fieldMetaProps=fieldMetaProps)

    if args.crippen:
        pyO3A = rdMolAlign.GetCrippenO3A(qmol2, qmol)
    else:
        pyO3A = rdMolAlign.GetO3A(qmol2, qmol)

    perfect_align = pyO3A.Align()
    perfect_score = pyO3A.Score()
    utils.log('Perfect score:', perfect_align, perfect_score,
              Chem.MolToSmiles(qmol, isomericSmiles=True), qmol.GetNumAtoms())

    i = 0
    count = 0
    total = 0
    errors = 0
    for mol in suppl:
        if mol is None:
            i += 1
            continue
        try:
            if args.num > 0:
                mol.RemoveAllConformers()
                conformerProps, minEnergy = conformers.process_mol_conformers(
                    mol, i, args.num, args.attempts, args.rmsd, None, None, 0)
                mol = Chem.RemoveHs(mol)
                count += doO3Dalign(i,
                                    mol,
                                    qmol,
                                    args.crippen,
                                    args.threshold,
                                    perfect_score,
                                    writer,
                                    conformerProps=conformerProps,
                                    minEnergy=minEnergy)
            else:
                mol = Chem.RemoveHs(mol)
                count += doO3Dalign(i, mol, qmol, args.crippen, args.threshold,
                                    perfect_score, writer)
            total += mol.GetNumConformers()
        except ValueError as e:
            errors += 1
            utils.log("Molecule", i, "failed to align:", e.message)
        i += 1

    input.close()
    writer.flush()
    writer.close()
    output.close()

    if args.meta:
        utils.write_metrics(
            output_base, {
                '__InputCount__': i,
                '__OutputCount__': count,
                '__ErrorCount__': errors,
                'RDKitO3DAlign': total
            })