def test_stereoisomers_produce_equal_fingerprints_nonstereo(self): from e3fp.fingerprint import fprinter from e3fp.conformer.util import mol_from_sdf mol1 = mol_from_sdf(ENANT1_SDF_FILE) mol2 = mol_from_sdf(ENANT2_SDF_FILE) level = 5 fpr = fprinter.Fingerprinter(level=level, stereo=False, radius_multiplier=1.718, remove_duplicate_substructs=True) fpr.run(conf=0, mol=mol1) fp1 = fpr.get_fingerprint_at_level(level) fpr.run(conf=0, mol=mol2) fp2 = fpr.get_fingerprint_at_level(level) self.assertEqual(fp1, fp2)
def test_reordering_conformers_produces_same_fprints(self): from e3fp.fingerprint import fprinter from e3fp.conformer.util import mol_from_sdf import random rand_sdf_files = glob.glob(os.path.join(RAND_SDF_DIR, "*.sdf*")) mol = mol_from_sdf(rand_sdf_files[0]) level = 5 fpr = fprinter.Fingerprinter( level=level, stereo=False, radius_multiplier=1.718, remove_duplicate_substructs=True, ) conf_ids1 = [x.GetId() for x in mol.GetConformers()] fprints1 = {} for conf_id in conf_ids1: fpr.run(conf_id, mol) fprints1[conf_id] = fpr.get_fingerprint_at_level(level) conf_ids2 = list(conf_ids1) random.shuffle(conf_ids2) fprints2 = {} for conf_id in conf_ids2: fpr.run(conf_id, mol) fprints2[conf_id] = fpr.get_fingerprint_at_level(level) self.assertEqual(fprints1, fprints2)
def test_generates_correct_disconnected_shells_level2(self): from e3fp.fingerprint.fprinter import ShellsGenerator from e3fp.fingerprint.structs import Shell from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) conf = mol.GetConformers()[0] atoms = list(range(3)) for atom in atoms: conf.SetAtomPosition(atom, [0, 0, .45 * atom]) expected_shells_dict1 = { 0: Shell(0, {1}), 1: Shell(1, {0, 2}), 2: Shell(2, {1}) } expected_shells_dict2 = { 0: Shell(0, {expected_shells_dict1[1], expected_shells_dict1[2]}), 1: Shell(1, {expected_shells_dict1[0], expected_shells_dict1[2]}), 2: Shell(2, {expected_shells_dict1[0], expected_shells_dict1[1]}) } shells_gen = ShellsGenerator(conf, atoms, radius_multiplier=0.5, include_disconnected=True) for i in range(3): shells_dict = next(shells_gen) self.assertDictEqual(shells_dict, expected_shells_dict2)
def test_connected_substructs_converge(self): from e3fp.fingerprint import fprinter from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) conf = mol.GetConformers()[0] atoms = list(range(3)) bonds_dict = {0: {1, 2}, 1: {0}, 2: {0}} for atom in atoms: conf.SetAtomPosition(atom, [0, 0, .45 * atom]) with mock.patch('e3fp.fingerprint.fprinter.bound_atoms_from_mol', return_value=bonds_dict): shells_gen = fprinter.ShellsGenerator(conf, atoms, radius_multiplier=0.5, include_disconnected=False) for i in range(4): shells_dict = next(shells_gen) substructs_dict = { k: v.substruct for k, v in shells_dict.items() } next_shells_dict = next(shells_gen) next_substructs_dict = { k: v.substruct for k, v in next_shells_dict.items() } self.assertDictEqual(substructs_dict, next_substructs_dict)
def test_generates_correct_connected_shells_level2(self): from e3fp.fingerprint import fprinter from e3fp.fingerprint.structs import Shell from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) conf = mol.GetConformers()[0] atoms = list(range(3)) bonds_dict = {0: {1, 2}, 1: {0}, 2: {0}} for atom in atoms: conf.SetAtomPosition(atom, [0, 0, .45 * atom]) expected_shells_dict1 = { 0: Shell(0, {1}), 1: Shell(1, {0}), 2: Shell(2, {}) } expected_shells_dict2 = { 0: Shell(0, {expected_shells_dict1[1], expected_shells_dict1[2]}), 1: Shell(1, {expected_shells_dict1[0]}), 2: Shell(2, {expected_shells_dict1[0]}) } with mock.patch('e3fp.fingerprint.fprinter.bound_atoms_from_mol', return_value=bonds_dict): shells_gen = fprinter.ShellsGenerator(conf, atoms, radius_multiplier=0.5, include_disconnected=False) for i in range(3): shells_dict = next(shells_gen) self.assertDictEqual(shells_dict, expected_shells_dict2)
def test_create_shell_no_shell(self): from e3fp.fingerprint.structs import Shell from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) atoms = list(mol.GetAtoms()) center_atom = atoms[0] Shell(center_atom)
def test_remove_dupe_substructs_makes_same_substruts_diff_shells(self): from e3fp.fingerprint import fprinter from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) level = 2 conf = mol.GetConformers()[0] fpr = fprinter.Fingerprinter(level=level, bits=1024, stereo=True, radius_multiplier=1.718, remove_duplicate_substructs=True) fpr.run(conf, mol) shells_no_dupes = set(fpr.level_shells[fpr.current_level]) substructs_no_dupes = set([x.substruct for x in shells_no_dupes]) fpr = fprinter.Fingerprinter(level=level, bits=1024, stereo=True, radius_multiplier=1.718, remove_duplicate_substructs=False) fpr.run(conf, mol) shells_with_dupes = set(fpr.level_shells[fpr.current_level]) substructs_with_dupes = set([x.substruct for x in shells_with_dupes]) self.assertEqual(substructs_no_dupes, substructs_with_dupes) self.assertNotEqual(shells_no_dupes, shells_with_dupes)
def test_rdkit_invariants(self): from e3fp.fingerprint import fprinter from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) atom = mol.GetAtomWithIdx(2) invars = fprinter.rdkit_invariants_from_atom(atom) self.assertListEqual(list(invars), [6, 3, 1, 0, 0, 1])
def test_shells_diff_center_same_atoms_nonequal(self): from e3fp.fingerprint.structs import Shell from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) atoms = list(mol.GetAtoms()) shell1 = Shell(atoms[0], atoms[2:]) shell2 = Shell(atoms[1], atoms[2:]) self.assertNotEqual(shell1, shell2)
def test_create_shell_with_same_center_fails(self): from e3fp.fingerprint.structs import Shell, FormatError from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) atoms = list(mol.GetAtoms()) center_atom = atoms[0] with self.assertRaises(FormatError): Shell(center_atom, atoms)
def test_same_shells_hash_to_same_value(self): from e3fp.fingerprint.structs import Substruct from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) atoms = list(mol.GetAtoms()) center_atom = atoms[0] substruct = Substruct(center_atom, atoms[1:]) self.assertEqual(hash(substruct), hash(substruct))
def test_creation_with_atoms_or_ids_equivalent(self): from e3fp.fingerprint.structs import Shell from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) atoms = list(mol.GetAtoms()) atom_ids = [x.GetIdx() for x in atoms] self.assertEqual(Shell(atoms[0], atoms[1:]), Shell(atom_ids[0], atom_ids[1:]))
def test_substructs_same_center_diff_atoms_nonequal(self): from e3fp.fingerprint.structs import Substruct from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) atoms = list(mol.GetAtoms()) substruct1 = Substruct(atoms[0], atoms[1:]) substruct2 = Substruct(atoms[0], atoms[2:]) self.assertNotEqual(substruct1, substruct2)
def test_center_atom_auto_added_to_atoms(self): from e3fp.fingerprint.structs import Substruct from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) atoms = list(mol.GetAtoms()) center_atom = atoms[0] substruct = Substruct(center_atom, atoms[1:]) self.assertIn(center_atom.GetIdx(), substruct.atoms)
def test_substruct_creation_from_shell(self): from e3fp.fingerprint.structs import Shell, Substruct from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) atoms = list(mol.GetAtoms()) shell = Shell(atoms[0], atoms[1:]) substruct = Substruct.from_shell(shell) self.assertEqual(shell.substruct, substruct)
def test_shells_generator_creation_success(self): from e3fp.fingerprint.fprinter import ShellsGenerator from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) conf = mol.GetConformers()[0] atoms = [x.GetIdx() for x in mol.GetAtoms()] ShellsGenerator(conf, atoms, radius_multiplier=0.5, include_disconnected=True)
def test_shell_creation_from_substruct_without_center_fails(self): from e3fp.fingerprint.structs import Shell, Substruct, FormatError from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) atoms = list(mol.GetAtoms()) substruct = Substruct(None, atoms[:2]) with self.assertRaises(FormatError): Shell.from_substruct(substruct)
def main(sdf_dir, mol_file, num_confs=10000, out_conf_file="random_conformers.txt", out_sdf_file="random_conformers.sdf.bz2", out_mol_file="random_conformers.csv.bz2"): confs = set() if os.path.isfile(out_mol_file): logging.info("Loading existing random molecules.") _, conf_mol_list_dict, _ = molecules_to_lists_dicts(out_mol_file, merge_proto=False) for proto_name in conf_mol_list_dict: for _, conf_name in conf_mol_list_dict[proto_name]: confs.add(split_conf_name(conf_name)) else: logging.info("Loading molecules file.") smiles_dict, mol_list_dict, fp_type = molecules_to_lists_dicts( mol_file, merge_proto=False) mol_name_to_proto_names = {} for proto_name in mol_list_dict: mol_name, _ = split_conf_name(proto_name) mol_name_to_proto_names.setdefault(mol_name, []).append(proto_name) conf_mol_list_dict = {} logging.info("Picking random molecules.") while len(confs) < num_confs: mol_name = random.choice(mol_name_to_proto_names.keys()) proto_name = random.choice(mol_name_to_proto_names[mol_name]) _, conf_name = random.choice(mol_list_dict[proto_name]) conf = split_conf_name(conf_name) confs.add(conf) conf_mol_list_dict.setdefault(proto_name, set()).add( mol_list_dict[proto_name][conf[2]]) if len(confs) % 100 == 0: logging.info(len(confs)) conf_mol_list_dict = {k: sorted(v) for k, v in conf_mol_list_dict.items()} lists_dicts_to_molecules(out_mol_file, smiles_dict, conf_mol_list_dict, fp_type) confs = sorted(confs) logging.info("Writing mol names to file.") with open(out_conf_file, "w") as f: for conf in confs: f.write("{}\n".format(join_conf_name(*conf))) logging.info("Saving mols to SDF file.") with smart_open(out_sdf_file, "wb") as f: writer = rdkit.Chem.SDWriter(f) for j, conf in enumerate(confs): mol_name, proto_id, conf_id = conf sdf_file = glob.glob(os.path.join( sdf_dir, "{}.sdf*".format( join_conf_name(mol_name, proto_id))))[0] mol = mol_from_sdf(sdf_file, conf_num=conf_id + 1) name = join_conf_name(*conf) mol.SetProp("_Name", name) writer.write(mol, confId=conf_id) if j > 0 and j % 10 == 0: logging.info(j) writer.close()
def test_substructs_same_center_same_atoms_equal(self): from e3fp.fingerprint.structs import Substruct from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) atoms = list(mol.GetAtoms()) center_atom = atoms[0] substruct1 = Substruct(center_atom, atoms) substruct2 = Substruct(center_atom, atoms) self.assertEqual(substruct1, substruct2)
def test_atoms_converted_to_shells(self): from e3fp.fingerprint.structs import Shell from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) atoms = list(mol.GetAtoms()) center_atom = atoms[0] shell = Shell(center_atom, atoms[1:]) for s in shell.shells: self.assertIsInstance(s, Shell)
def test_quick(self): from e3fp.fingerprint import fprinter from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) level = 5 conf = mol.GetConformers()[0] fpr = fprinter.Fingerprinter(level=level, bits=1024, stereo=True, radius_multiplier=1.718) fpr.run(conf, mol)
def test_creation_with_atoms_or_shells_equal(self): from e3fp.fingerprint.structs import Shell from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) atoms = list(mol.GetAtoms()) shells = list(map(Shell, atoms)) center_atom = atoms[0] shell1 = Shell(center_atom, atoms[1:]) shell2 = Shell(center_atom, shells[1:]) self.assertEqual(shell1, shell2)
def test_recursive_atom_shells_correct(self): from e3fp.fingerprint.structs import Shell from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) atoms = list(mol.GetAtoms()) shell1 = Shell(atoms[5], atoms[6:8]) shell2 = Shell(atoms[2], atoms[3:5]) shell = Shell(atoms[0], (shell1, shell2)) self.assertEqual(shell.atoms, {x.GetIdx() for x in (atoms[0], atoms[2], atoms[5])})
def test_generates_correct_disconnected_shells_level0(self): from e3fp.fingerprint.fprinter import ShellsGenerator from e3fp.fingerprint.structs import Shell from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) conf = mol.GetConformers()[0] atoms = list(range(3)) expected_shells_dict = {0: Shell(0), 1: Shell(1), 2: Shell(2)} shells_gen = ShellsGenerator(conf, atoms) shells_dict = next(shells_gen) self.assertDictEqual(shells_dict, expected_shells_dict)
def test_connected_match_atoms_rad0_correct(self): from e3fp.fingerprint.fprinter import ShellsGenerator from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) conf = mol.GetConformers()[0] atoms = list(range(3)) shells_gen = ShellsGenerator(conf, atoms, radius_multiplier=0.5, include_disconnected=True) match_atoms = shells_gen.get_match_atoms(0.) expect_match_atoms = {k: set() for k in atoms} self.assertDictEqual(match_atoms, expect_match_atoms)
def test_shells_generator_next_works_correctly(self): from e3fp.fingerprint.fprinter import ShellsGenerator from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) conf = mol.GetConformers()[0] atoms = [x.GetIdx() for x in mol.GetAtoms()] shells_gen1 = ShellsGenerator(conf, atoms, radius_multiplier=0.5, include_disconnected=True) shells_gen2 = ShellsGenerator(conf, atoms, radius_multiplier=0.5, include_disconnected=True) self.assertDictEqual(next(shells_gen1), next(shells_gen2))
def test_atom_coords_calculated_correctly(self): from e3fp.fingerprint.fprinter import coords_from_atoms from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) conf = mol.GetConformers()[0] atoms = [x.GetIdx() for x in mol.GetAtoms()] for atom in atoms: conf.SetAtomPosition(atom, [0, 0, 0]) atom_coords = coords_from_atoms(atoms, conf) expected_coords = dict( list(zip(atoms, np.zeros((len(atoms), 3), dtype=np.float)))) np.testing.assert_equal(atom_coords, expected_coords)
def fprints_dict_from_sdf(sdf_file, **kwargs): """Build fingerprints dict for conformers encoded in an SDF file. See `fprints_dict_from_mol` for description of arguments. """ try: mol = mol_from_sdf(sdf_file) except: logging.error("Error retrieving mol from {!s}.".format(sdf_file)) return False fprints_dict = fprints_dict_from_mol(mol, **kwargs) return fprints_dict
def test_initial_identifiers_assigned_correctly(self): from e3fp.fingerprint import fprinter from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) level = 0 conf = mol.GetConformers()[0] fpr = fprinter.Fingerprinter(level=level, bits=1024, stereo=True, radius_multiplier=1.718) fpr.run(conf, mol) fprint = fpr.get_fingerprint_at_level(0) expect_ident = set([48, 124, 185, 484, 617, 674]) self.assertEqual(set(fprint.indices), expect_ident)
def test_connected_match_atoms_rad1_correct2(self): from e3fp.fingerprint.fprinter import ShellsGenerator from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) conf = mol.GetConformers()[0] atoms = list(range(3)) for atom in atoms: conf.SetAtomPosition(atom, [0, 0, atom*.75]) shells_gen = ShellsGenerator(conf, atoms, radius_multiplier=0.5, include_disconnected=True) match_atoms = shells_gen.get_match_atoms(1.) expect_match_atoms = {0: {1}, 1: {0, 2}, 2: {1}} self.assertDictEqual(match_atoms, expect_match_atoms)