Ejemplo n.º 1
0
def run_transforms(rxn_file, data_file, smarts):
    """
    Given a reaction and a file with SMILES and activity values, generate products and check whether products are in
    the input
    :param rxn_file: reaction file name
    :param data_file: data file name
    :param smarts: smarts that must be matched by input molecules
    :return: list of output molecules
    """
    if smarts:
        smarts_mol = Chem.MolFromSmarts(smarts)
        if smarts_mol is None:
            print(f"Could not parse SMARTS {smarts}")
            sys.exit(1)
    rxn = AllChem.ReactionFromRxnFile(rxn_file)
    reactant_template = Chem.Mol(rxn.GetReactantTemplate(0))
    product_template = Chem.Mol(rxn.GetProductTemplate(0))
    # In order to make the output easier to understand, we will align the molecules to the reaction
    # The aligned structures will look funny unless we scale the reaction and product templates
    scale_molecule(reactant_template)
    scale_molecule(product_template)
    # Clear the atom maps from the templates, may not be necessary, but can't hurt
    clear_atom_maps(reactant_template)
    clear_atom_maps(product_template)
    # Read in the data
    df = pd.read_csv(data_file)
    # Create a list of input molecules
    mol_list = [Chem.MolFromSmiles(x) for x in df.SMILES]
    df["Mol"] = [remove_salts(x) for x in mol_list]
    # Build a dictionary of the input SMILES
    smiles_dict = build_smiles_dictionary(df)
    output_list = []
    used = set()
    # Loop over the molecules, apply the reaction and check if the product SMILES is in smiles_dict
    # If it is, we have a pair
    for (mol, name, val) in df[["Mol", "Name", "Value"]].values:
        if smarts and not mol.HasSubstructMatch(smarts_mol):
            continue
        prods = rxn.RunReactants([mol])
        for prod in prods:
            prod_mol = prod[0]
            prod_smiles = Chem.MolToSmiles(prod_mol)
            prod_lookup = smiles_dict.get(prod_smiles)
            if prod_lookup is not None:
                # skip duplicate products
                if prod_smiles not in used:
                    # Generate a depiction aligned to the reactant template
                    rdDepictor.GenerateDepictionMatching2DStructure(mol, reactant_template)
                    output_list.append([mol, name, val])
                    prod_name, prod_val = prod_lookup
                    prod_mol.UpdatePropertyCache()
                    # Generate a depiction aligned to the product template
                    rdDepictor.GenerateDepictionMatching2DStructure(prod_mol, product_template)
                    output_list.append([prod_mol, prod_name, prod_val])
                    used.add(prod_smiles)
    return output_list
Ejemplo n.º 2
0
    def testConstrainedCoords(self):
        templ = Chem.MolFromSmiles('c1nccc2n1ccc2')
        rdDepictor.Compute2DCoords(templ)
        m1 = Chem.MolFromSmiles('c1cccc2ncn3cccc3c21')
        rdDepictor.GenerateDepictionMatching2DStructure(m1, templ)
        m2 = Chem.MolFromSmiles('c1cc(Cl)cc2ncn3cccc3c21')
        rdDepictor.Compute2DCoords(m2)
        refPatt1 = Chem.MolFromSmarts('*1****2*1***2')
        rdDepictor.GenerateDepictionMatching2DStructure(m2, templ, -1, refPatt1)
        fileN = os.path.join(RDConfig.RDBaseDir, 'Code', 'GraphMol', 'Depictor', 'test_data',
                             '1XP0_ligand.sdf')

        xp0_lig = Chem.MolFromMolFile(fileN)
        xp0_lig_2d = Chem.Mol(xp0_lig)
        rdDepictor.GenerateDepictionMatching3DStructure(xp0_lig_2d, xp0_lig)
        xp0_ref = Chem.MolFromSmarts('[#6]1~[#7][#6]~[#6]2[#6](=[#8])[#7]~[#6](c3ccccc3)[#7][#7]12')
        rdDepictor.GenerateDepictionMatching3DStructure(xp0_lig_2d, xp0_lig, -1, xp0_ref)
Ejemplo n.º 3
0
 def _reorient_molecule(self):
     """Reorients the drawing molecule following the core."""
     rdDepictor.GenerateDepictionMatching2DStructure(
         self.draw_mol, self.core)
Ejemplo n.º 4
0
    def testGenerate2DDepictionAllowRGroups(self):
        templateMolblock = """
     RDKit          2D

  9  9  0  0  0  0  0  0  0  0999 V2000
   -0.8929    1.0942    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -2.1919    0.3442    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -2.1919   -1.1558    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -0.8929   -1.9059    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.4060   -1.1558    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.4060    0.3442    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -3.4910    1.0942    0.0000 R1  0  0  0  0  0  0  0  0  0  0  0  0
    1.7051    1.0942    0.0000 R2  0  0  0  0  0  0  0  0  0  0  0  0
   -3.4910   -1.9059    0.0000 R3  0  0  0  0  0  0  0  0  0  0  0  0
  1  2  2  0
  2  3  1  0
  3  4  2  0
  4  5  1  0
  5  6  2  0
  6  1  1  0
  6  8  1  0
  3  9  1  0
  2  7  1  0
M  RGP  3   7   1   8   2   9   3
M  END"""
        templateRef = Chem.MolFromMolBlock(templateMolblock)
        orthoMeta = Chem.MolFromSmiles(
            "c1ccc(-c2ccc(-c3ccccc3)c(-c3ccccc3)c2)cc1")
        ortho = Chem.MolFromSmiles("c1ccc(-c2ccccc2-c2ccccc2)cc1")
        meta = Chem.MolFromSmiles("c1ccc(-c2cccc(-c3ccccc3)c2)cc1")
        biphenyl = Chem.MolFromSmiles("c1ccccc1-c1ccccc1")
        phenyl = Chem.MolFromSmiles("c1ccccc1")

        atomMap = rdDepictor.GenerateDepictionMatching2DStructure(
            orthoMeta, templateRef)
        self.assertEqual(orthoMeta.GetNumConformers(), 1)

        for mol in (ortho, meta, biphenyl, phenyl):
            # fails as does not match template
            with self.assertRaises(ValueError):
                rdDepictor.GenerateDepictionMatching2DStructure(
                    mol, templateRef)

            # succeeds with allowRGroups=true
            atomMap = rdDepictor.GenerateDepictionMatching2DStructure(
                mol, templateRef, allowRGroups=True)
            self.assertEqual(mol.GetNumConformers(), 1)
            msd = 0.0
            for refIdx, molIdx in atomMap:
                msd += (templateRef.GetConformer().GetAtomPosition(refIdx) -
                        mol.GetConformer().GetAtomPosition(molIdx)).LengthSq()
            msd /= len(atomMap)
            self.assertAlmostEqual(msd, 0.0)

        # test that using a refPattern with R groups and a reference without works
        pyridineRef = Chem.MolFromMolBlock("""
     RDKit          2D

  6  6  0  0  0  0  0  0  0  0999 V2000
   -0.8929    1.0942    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -2.1919    0.3442    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -2.1919   -1.1558    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -0.8929   -1.9059    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.4060   -1.1558    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
    0.4060    0.3442    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
  1  2  2  0
  2  3  1  0
  3  4  2  0
  4  5  1  0
  5  6  2  0
  6  1  1  0
M  END""")
        genericRefPatternWithRGroups = Chem.MolFromSmarts(
            "[*:3]a1a([*:1])aa([*:2])aa1")
        pyridineRefHs = Chem.AddHs(pyridineRef)

        for mol in (ortho, meta, biphenyl, phenyl):
            atomMap = rdDepictor.GenerateDepictionMatching2DStructure(
                mol,
                pyridineRef,
                refPatt=genericRefPatternWithRGroups,
                allowRGroups=True)
            self.assertEqual(mol.GetNumConformers(), 1)
            msd = 0.0
            for refIdx, molIdx in atomMap:
                msd += (pyridineRef.GetConformer().GetAtomPosition(refIdx) -
                        mol.GetConformer().GetAtomPosition(molIdx)).LengthSq()
            msd /= len(atomMap)
            self.assertAlmostEqual(msd, 0.0)
Ejemplo n.º 5
0
    def testGenerate2DDepictionRefPatternAtomMap(self):
        indazoleMolblock = """
     RDKit          2D

  9 10  0  0  0  0  0  0  0  0999 V2000
   -6.0878    2.4335    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -7.3867    1.6835    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -7.3867    0.1833    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -6.0878   -0.5666    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -4.7887    0.1833    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -4.7887    1.6835    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -3.4897   -0.5664    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
   -2.1906    1.6833    0.0000 C   0  0  0  0  0  0  0  0  0  0  0  0
   -2.1906    0.1835    0.0000 N   0  0  0  0  0  0  0  0  0  0  0  0
  1  2  2  0
  2  3  1  0
  3  4  2  0
  4  5  1  0
  5  6  2  0
  6  1  1  0
  8  9  2  0
  6  8  1  0
  7  9  1  0
  7  5  1  0
M  END"""
        indazoleRef = Chem.MolFromMolBlock(indazoleMolblock)
        cycloheptylPyrazole = Chem.MolFromSmiles("c1cc(C2CCCCCC2)[nH]n1")

        # test using refPattern
        refPatt = Chem.MolFromSmarts("a1aan[nH]1")
        rdDepictor.GenerateDepictionMatching2DStructure(cycloheptylPyrazole,
                                                        indazoleRef,
                                                        refPatt=refPatt)
        self.assertEqual(cycloheptylPyrazole.GetNumConformers(), 1)
        molMatchVect = cycloheptylPyrazole.GetSubstructMatch(refPatt)
        self.assertEqual(len(molMatchVect), refPatt.GetNumAtoms())
        refMatchVect = indazoleRef.GetSubstructMatch(refPatt)
        self.assertEqual(len(refMatchVect), refPatt.GetNumAtoms())
        atomMap = tuple(zip(refMatchVect, molMatchVect))
        msd = 0.0
        for refIdx, molIdx in atomMap:
            msd += (indazoleRef.GetConformer().GetAtomPosition(refIdx) -
                    cycloheptylPyrazole.GetConformer().GetAtomPosition(molIdx)
                    ).LengthSq()
        msd /= len(molMatchVect)
        self.assertAlmostEqual(msd, 0.0)
        # try with a pattern larger than the reference molecule
        hugePatt = Chem.MolFromSmarts("CCCCCCCCCCCCCCCCCCCCCCCCCCC")
        with self.assertRaises(ValueError):
            rdDepictor.GenerateDepictionMatching2DStructure(
                cycloheptylPyrazole, indazoleRef, refPatt=hugePatt)

        # try with an out of range confId
        with self.assertRaises(ValueError):
            rdDepictor.GenerateDepictionMatching2DStructure(
                cycloheptylPyrazole, indazoleRef, confId=1, refPatt=refPatt)

        # test using atomMap directly
        cycloheptylPyrazole.RemoveAllConformers()
        rdDepictor.GenerateDepictionMatching2DStructure(cycloheptylPyrazole,
                                                        indazoleRef,
                                                        atomMap=atomMap)
        self.assertEqual(cycloheptylPyrazole.GetNumConformers(), 1)
        msd = 0.0
        for refIdx, molIdx in atomMap:
            msd += (indazoleRef.GetConformer().GetAtomPosition(refIdx) -
                    cycloheptylPyrazole.GetConformer().GetAtomPosition(molIdx)
                    ).LengthSq()
        msd /= len(atomMap)
        self.assertAlmostEqual(msd, 0.0)

        # try with an atomMap larger than the reference molecule
        atomMapHuge = list(atomMap) + [
            (0, 0) for i in range(indazoleRef.GetNumAtoms())
        ]
        with self.assertRaises(ValueError):
            rdDepictor.GenerateDepictionMatching2DStructure(
                cycloheptylPyrazole, indazoleRef, atomMap=atomMapHuge)

        # try with an atomMap with out of range indices
        atomMapOutOfRange = list(atomMap) + [(100, 100)]
        with self.assertRaises(ValueError):
            rdDepictor.GenerateDepictionMatching2DStructure(
                cycloheptylPyrazole, indazoleRef, atomMap=atomMapOutOfRange)

        # try with an out of range confId
        with self.assertRaises(ValueError):
            rdDepictor.GenerateDepictionMatching2DStructure(
                cycloheptylPyrazole, indazoleRef, atomMap=atomMap, confId=1)