Ejemplo n.º 1
0
 def test_stereoisomers_produce_equal_fingerprints_nonstereo(self):
     from e3fp.fingerprint import fprinter
     from e3fp.conformer.util import mol_from_sdf
     mol1 = mol_from_sdf(ENANT1_SDF_FILE)
     mol2 = mol_from_sdf(ENANT2_SDF_FILE)
     level = 5
     fpr = fprinter.Fingerprinter(level=level, stereo=False,
                                  radius_multiplier=1.718,
                                  remove_duplicate_substructs=True)
     fpr.run(conf=0, mol=mol1)
     fp1 = fpr.get_fingerprint_at_level(level)
     fpr.run(conf=0, mol=mol2)
     fp2 = fpr.get_fingerprint_at_level(level)
     self.assertEqual(fp1, fp2)
Ejemplo n.º 2
0
    def test_reordering_conformers_produces_same_fprints(self):
        from e3fp.fingerprint import fprinter
        from e3fp.conformer.util import mol_from_sdf
        import random

        rand_sdf_files = glob.glob(os.path.join(RAND_SDF_DIR, "*.sdf*"))
        mol = mol_from_sdf(rand_sdf_files[0])
        level = 5
        fpr = fprinter.Fingerprinter(
            level=level,
            stereo=False,
            radius_multiplier=1.718,
            remove_duplicate_substructs=True,
        )
        conf_ids1 = [x.GetId() for x in mol.GetConformers()]
        fprints1 = {}
        for conf_id in conf_ids1:
            fpr.run(conf_id, mol)
            fprints1[conf_id] = fpr.get_fingerprint_at_level(level)

        conf_ids2 = list(conf_ids1)
        random.shuffle(conf_ids2)
        fprints2 = {}
        for conf_id in conf_ids2:
            fpr.run(conf_id, mol)
            fprints2[conf_id] = fpr.get_fingerprint_at_level(level)
        self.assertEqual(fprints1, fprints2)
Ejemplo n.º 3
0
 def test_generates_correct_disconnected_shells_level2(self):
     from e3fp.fingerprint.fprinter import ShellsGenerator
     from e3fp.fingerprint.structs import Shell
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     conf = mol.GetConformers()[0]
     atoms = list(range(3))
     for atom in atoms:
         conf.SetAtomPosition(atom, [0, 0, .45 * atom])
     expected_shells_dict1 = {
         0: Shell(0, {1}),
         1: Shell(1, {0, 2}),
         2: Shell(2, {1})
     }
     expected_shells_dict2 = {
         0: Shell(0, {expected_shells_dict1[1], expected_shells_dict1[2]}),
         1: Shell(1, {expected_shells_dict1[0], expected_shells_dict1[2]}),
         2: Shell(2, {expected_shells_dict1[0], expected_shells_dict1[1]})
     }
     shells_gen = ShellsGenerator(conf,
                                  atoms,
                                  radius_multiplier=0.5,
                                  include_disconnected=True)
     for i in range(3):
         shells_dict = next(shells_gen)
     self.assertDictEqual(shells_dict, expected_shells_dict2)
Ejemplo n.º 4
0
    def test_connected_substructs_converge(self):
        from e3fp.fingerprint import fprinter
        from e3fp.conformer.util import mol_from_sdf
        mol = mol_from_sdf(PLANAR_SDF_FILE)
        conf = mol.GetConformers()[0]
        atoms = list(range(3))
        bonds_dict = {0: {1, 2}, 1: {0}, 2: {0}}
        for atom in atoms:
            conf.SetAtomPosition(atom, [0, 0, .45 * atom])
        with mock.patch('e3fp.fingerprint.fprinter.bound_atoms_from_mol',
                        return_value=bonds_dict):
            shells_gen = fprinter.ShellsGenerator(conf,
                                                  atoms,
                                                  radius_multiplier=0.5,
                                                  include_disconnected=False)
            for i in range(4):
                shells_dict = next(shells_gen)
                substructs_dict = {
                    k: v.substruct
                    for k, v in shells_dict.items()
                }

            next_shells_dict = next(shells_gen)
            next_substructs_dict = {
                k: v.substruct
                for k, v in next_shells_dict.items()
            }

            self.assertDictEqual(substructs_dict, next_substructs_dict)
Ejemplo n.º 5
0
 def test_generates_correct_connected_shells_level2(self):
     from e3fp.fingerprint import fprinter
     from e3fp.fingerprint.structs import Shell
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     conf = mol.GetConformers()[0]
     atoms = list(range(3))
     bonds_dict = {0: {1, 2}, 1: {0}, 2: {0}}
     for atom in atoms:
         conf.SetAtomPosition(atom, [0, 0, .45 * atom])
     expected_shells_dict1 = {
         0: Shell(0, {1}),
         1: Shell(1, {0}),
         2: Shell(2, {})
     }
     expected_shells_dict2 = {
         0: Shell(0, {expected_shells_dict1[1], expected_shells_dict1[2]}),
         1: Shell(1, {expected_shells_dict1[0]}),
         2: Shell(2, {expected_shells_dict1[0]})
     }
     with mock.patch('e3fp.fingerprint.fprinter.bound_atoms_from_mol',
                     return_value=bonds_dict):
         shells_gen = fprinter.ShellsGenerator(conf,
                                               atoms,
                                               radius_multiplier=0.5,
                                               include_disconnected=False)
         for i in range(3):
             shells_dict = next(shells_gen)
         self.assertDictEqual(shells_dict, expected_shells_dict2)
Ejemplo n.º 6
0
 def test_create_shell_no_shell(self):
     from e3fp.fingerprint.structs import Shell
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     atoms = list(mol.GetAtoms())
     center_atom = atoms[0]
     Shell(center_atom)
Ejemplo n.º 7
0
    def test_remove_dupe_substructs_makes_same_substruts_diff_shells(self):
        from e3fp.fingerprint import fprinter
        from e3fp.conformer.util import mol_from_sdf
        mol = mol_from_sdf(PLANAR_SDF_FILE)
        level = 2
        conf = mol.GetConformers()[0]
        fpr = fprinter.Fingerprinter(level=level,
                                     bits=1024,
                                     stereo=True,
                                     radius_multiplier=1.718,
                                     remove_duplicate_substructs=True)
        fpr.run(conf, mol)
        shells_no_dupes = set(fpr.level_shells[fpr.current_level])
        substructs_no_dupes = set([x.substruct for x in shells_no_dupes])

        fpr = fprinter.Fingerprinter(level=level,
                                     bits=1024,
                                     stereo=True,
                                     radius_multiplier=1.718,
                                     remove_duplicate_substructs=False)
        fpr.run(conf, mol)
        shells_with_dupes = set(fpr.level_shells[fpr.current_level])
        substructs_with_dupes = set([x.substruct for x in shells_with_dupes])

        self.assertEqual(substructs_no_dupes, substructs_with_dupes)
        self.assertNotEqual(shells_no_dupes, shells_with_dupes)
Ejemplo n.º 8
0
 def test_rdkit_invariants(self):
     from e3fp.fingerprint import fprinter
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     atom = mol.GetAtomWithIdx(2)
     invars = fprinter.rdkit_invariants_from_atom(atom)
     self.assertListEqual(list(invars), [6, 3, 1, 0, 0, 1])
Ejemplo n.º 9
0
 def test_shells_diff_center_same_atoms_nonequal(self):
     from e3fp.fingerprint.structs import Shell
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     atoms = list(mol.GetAtoms())
     shell1 = Shell(atoms[0], atoms[2:])
     shell2 = Shell(atoms[1], atoms[2:])
     self.assertNotEqual(shell1, shell2)
Ejemplo n.º 10
0
 def test_create_shell_with_same_center_fails(self):
     from e3fp.fingerprint.structs import Shell, FormatError
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     atoms = list(mol.GetAtoms())
     center_atom = atoms[0]
     with self.assertRaises(FormatError):
         Shell(center_atom, atoms)
Ejemplo n.º 11
0
 def test_same_shells_hash_to_same_value(self):
     from e3fp.fingerprint.structs import Substruct
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     atoms = list(mol.GetAtoms())
     center_atom = atoms[0]
     substruct = Substruct(center_atom, atoms[1:])
     self.assertEqual(hash(substruct), hash(substruct))
Ejemplo n.º 12
0
 def test_creation_with_atoms_or_ids_equivalent(self):
     from e3fp.fingerprint.structs import Shell
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     atoms = list(mol.GetAtoms())
     atom_ids = [x.GetIdx() for x in atoms]
     self.assertEqual(Shell(atoms[0], atoms[1:]),
                      Shell(atom_ids[0], atom_ids[1:]))
Ejemplo n.º 13
0
 def test_substructs_same_center_diff_atoms_nonequal(self):
     from e3fp.fingerprint.structs import Substruct
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     atoms = list(mol.GetAtoms())
     substruct1 = Substruct(atoms[0], atoms[1:])
     substruct2 = Substruct(atoms[0], atoms[2:])
     self.assertNotEqual(substruct1, substruct2)
Ejemplo n.º 14
0
 def test_center_atom_auto_added_to_atoms(self):
     from e3fp.fingerprint.structs import Substruct
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     atoms = list(mol.GetAtoms())
     center_atom = atoms[0]
     substruct = Substruct(center_atom, atoms[1:])
     self.assertIn(center_atom.GetIdx(), substruct.atoms)
Ejemplo n.º 15
0
 def test_substruct_creation_from_shell(self):
     from e3fp.fingerprint.structs import Shell, Substruct
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     atoms = list(mol.GetAtoms())
     shell = Shell(atoms[0], atoms[1:])
     substruct = Substruct.from_shell(shell)
     self.assertEqual(shell.substruct, substruct)
Ejemplo n.º 16
0
 def test_shells_generator_creation_success(self):
     from e3fp.fingerprint.fprinter import ShellsGenerator
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     conf = mol.GetConformers()[0]
     atoms = [x.GetIdx() for x in mol.GetAtoms()]
     ShellsGenerator(conf, atoms, radius_multiplier=0.5,
                     include_disconnected=True)
Ejemplo n.º 17
0
 def test_shell_creation_from_substruct_without_center_fails(self):
     from e3fp.fingerprint.structs import Shell, Substruct, FormatError
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     atoms = list(mol.GetAtoms())
     substruct = Substruct(None, atoms[:2])
     with self.assertRaises(FormatError):
         Shell.from_substruct(substruct)
Ejemplo n.º 18
0
def main(sdf_dir, mol_file, num_confs=10000,
         out_conf_file="random_conformers.txt",
         out_sdf_file="random_conformers.sdf.bz2",
         out_mol_file="random_conformers.csv.bz2"):
    confs = set()
    if os.path.isfile(out_mol_file):
        logging.info("Loading existing random molecules.")
        _, conf_mol_list_dict, _ = molecules_to_lists_dicts(out_mol_file,
                                                            merge_proto=False)
        for proto_name in conf_mol_list_dict:
            for _, conf_name in conf_mol_list_dict[proto_name]:
                confs.add(split_conf_name(conf_name))
    else:
        logging.info("Loading molecules file.")
        smiles_dict, mol_list_dict, fp_type = molecules_to_lists_dicts(
            mol_file, merge_proto=False)
        mol_name_to_proto_names = {}
        for proto_name in mol_list_dict:
            mol_name, _ = split_conf_name(proto_name)
            mol_name_to_proto_names.setdefault(mol_name, []).append(proto_name)
        conf_mol_list_dict = {}
        logging.info("Picking random molecules.")
        while len(confs) < num_confs:
            mol_name = random.choice(mol_name_to_proto_names.keys())
            proto_name = random.choice(mol_name_to_proto_names[mol_name])
            _, conf_name = random.choice(mol_list_dict[proto_name])
            conf = split_conf_name(conf_name)
            confs.add(conf)
            conf_mol_list_dict.setdefault(proto_name, set()).add(
                mol_list_dict[proto_name][conf[2]])
            if len(confs) % 100 == 0:
                logging.info(len(confs))
        conf_mol_list_dict = {k: sorted(v) for k, v
                              in conf_mol_list_dict.items()}
        lists_dicts_to_molecules(out_mol_file, smiles_dict, conf_mol_list_dict,
                                 fp_type)
    confs = sorted(confs)

    logging.info("Writing mol names to file.")
    with open(out_conf_file, "w") as f:
        for conf in confs:
            f.write("{}\n".format(join_conf_name(*conf)))

    logging.info("Saving mols to SDF file.")
    with smart_open(out_sdf_file, "wb") as f:
        writer = rdkit.Chem.SDWriter(f)
        for j, conf in enumerate(confs):
            mol_name, proto_id, conf_id = conf
            sdf_file = glob.glob(os.path.join(
                sdf_dir, "{}.sdf*".format(
                    join_conf_name(mol_name, proto_id))))[0]
            mol = mol_from_sdf(sdf_file, conf_num=conf_id + 1)
            name = join_conf_name(*conf)
            mol.SetProp("_Name", name)
            writer.write(mol, confId=conf_id)
            if j > 0 and j % 10 == 0:
                logging.info(j)
        writer.close()
Ejemplo n.º 19
0
 def test_substructs_same_center_same_atoms_equal(self):
     from e3fp.fingerprint.structs import Substruct
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     atoms = list(mol.GetAtoms())
     center_atom = atoms[0]
     substruct1 = Substruct(center_atom, atoms)
     substruct2 = Substruct(center_atom, atoms)
     self.assertEqual(substruct1, substruct2)
Ejemplo n.º 20
0
 def test_atoms_converted_to_shells(self):
     from e3fp.fingerprint.structs import Shell
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     atoms = list(mol.GetAtoms())
     center_atom = atoms[0]
     shell = Shell(center_atom, atoms[1:])
     for s in shell.shells:
         self.assertIsInstance(s, Shell)
Ejemplo n.º 21
0
 def test_quick(self):
     from e3fp.fingerprint import fprinter
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     level = 5
     conf = mol.GetConformers()[0]
     fpr = fprinter.Fingerprinter(level=level, bits=1024, stereo=True,
                                   radius_multiplier=1.718)
     fpr.run(conf, mol)
Ejemplo n.º 22
0
 def test_creation_with_atoms_or_shells_equal(self):
     from e3fp.fingerprint.structs import Shell
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     atoms = list(mol.GetAtoms())
     shells = list(map(Shell, atoms))
     center_atom = atoms[0]
     shell1 = Shell(center_atom, atoms[1:])
     shell2 = Shell(center_atom, shells[1:])
     self.assertEqual(shell1, shell2)
Ejemplo n.º 23
0
 def test_recursive_atom_shells_correct(self):
     from e3fp.fingerprint.structs import Shell
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     atoms = list(mol.GetAtoms())
     shell1 = Shell(atoms[5], atoms[6:8])
     shell2 = Shell(atoms[2], atoms[3:5])
     shell = Shell(atoms[0], (shell1, shell2))
     self.assertEqual(shell.atoms,
                      {x.GetIdx()
                       for x in (atoms[0], atoms[2], atoms[5])})
Ejemplo n.º 24
0
 def test_generates_correct_disconnected_shells_level0(self):
     from e3fp.fingerprint.fprinter import ShellsGenerator
     from e3fp.fingerprint.structs import Shell
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     conf = mol.GetConformers()[0]
     atoms = list(range(3))
     expected_shells_dict = {0: Shell(0), 1: Shell(1), 2: Shell(2)}
     shells_gen = ShellsGenerator(conf, atoms)
     shells_dict = next(shells_gen)
     self.assertDictEqual(shells_dict, expected_shells_dict)
Ejemplo n.º 25
0
 def test_connected_match_atoms_rad0_correct(self):
     from e3fp.fingerprint.fprinter import ShellsGenerator
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     conf = mol.GetConformers()[0]
     atoms = list(range(3))
     shells_gen = ShellsGenerator(conf, atoms, radius_multiplier=0.5,
                                  include_disconnected=True)
     match_atoms = shells_gen.get_match_atoms(0.)
     expect_match_atoms = {k: set() for k in atoms}
     self.assertDictEqual(match_atoms, expect_match_atoms)
Ejemplo n.º 26
0
 def test_shells_generator_next_works_correctly(self):
     from e3fp.fingerprint.fprinter import ShellsGenerator
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     conf = mol.GetConformers()[0]
     atoms = [x.GetIdx() for x in mol.GetAtoms()]
     shells_gen1 = ShellsGenerator(conf, atoms, radius_multiplier=0.5,
                                   include_disconnected=True)
     shells_gen2 = ShellsGenerator(conf, atoms, radius_multiplier=0.5,
                                   include_disconnected=True)
     self.assertDictEqual(next(shells_gen1), next(shells_gen2))
Ejemplo n.º 27
0
 def test_atom_coords_calculated_correctly(self):
     from e3fp.fingerprint.fprinter import coords_from_atoms
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     conf = mol.GetConformers()[0]
     atoms = [x.GetIdx() for x in mol.GetAtoms()]
     for atom in atoms:
         conf.SetAtomPosition(atom, [0, 0, 0])
     atom_coords = coords_from_atoms(atoms, conf)
     expected_coords = dict(
         list(zip(atoms, np.zeros((len(atoms), 3), dtype=np.float))))
     np.testing.assert_equal(atom_coords, expected_coords)
Ejemplo n.º 28
0
def fprints_dict_from_sdf(sdf_file, **kwargs):
    """Build fingerprints dict for conformers encoded in an SDF file.

    See `fprints_dict_from_mol` for description of arguments.
    """
    try:
        mol = mol_from_sdf(sdf_file)
    except:
        logging.error("Error retrieving mol from {!s}.".format(sdf_file))
        return False
    fprints_dict = fprints_dict_from_mol(mol, **kwargs)
    return fprints_dict
Ejemplo n.º 29
0
 def test_initial_identifiers_assigned_correctly(self):
     from e3fp.fingerprint import fprinter
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     level = 0
     conf = mol.GetConformers()[0]
     fpr = fprinter.Fingerprinter(level=level, bits=1024, stereo=True,
                                  radius_multiplier=1.718)
     fpr.run(conf, mol)
     fprint = fpr.get_fingerprint_at_level(0)
     expect_ident = set([48, 124, 185, 484, 617, 674])
     self.assertEqual(set(fprint.indices), expect_ident)
Ejemplo n.º 30
0
 def test_connected_match_atoms_rad1_correct2(self):
     from e3fp.fingerprint.fprinter import ShellsGenerator
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     conf = mol.GetConformers()[0]
     atoms = list(range(3))
     for atom in atoms:
         conf.SetAtomPosition(atom, [0, 0, atom*.75])
     shells_gen = ShellsGenerator(conf, atoms, radius_multiplier=0.5,
                                  include_disconnected=True)
     match_atoms = shells_gen.get_match_atoms(1.)
     expect_match_atoms = {0: {1}, 1: {0, 2}, 2: {1}}
     self.assertDictEqual(match_atoms, expect_match_atoms)