def fingerprint_mol(mol): fp = Fingerprinter(bits=BITS, level=LEVEL, radius_multiplier=RADIUS_MULTIPLIER, stereo=True, remove_duplicate_substructs=True) fp.run(mol=mol, conf=0) return fp
def test_main_parameter_ranges_run_without_fail(self): from e3fp.fingerprint.fprinter import Fingerprinter from e3fp.conformer.util import mol_from_sdf mol = mol_from_sdf(PLANAR_SDF_FILE) conf = mol.GetConformers()[0] stereo_opts = (True, False) counts_opts = (True, False) include_disconnected_opts = (True, False) for bits in (1024, 4096, 2**32): for level, remove_substructs in [(-1, True), (5, False)]: for stereo in stereo_opts: for counts in counts_opts: for include_disconnected in include_disconnected_opts: fprinter = Fingerprinter( bits=bits, level=level, stereo=stereo, counts=counts, remove_duplicate_substructs=remove_substructs, include_disconnected=include_disconnected) fprinter.run(conf, mol) fprinter.get_fingerprint_at_level()
def fprints_dict_from_mol(mol, bits=BITS, level=LEVEL_DEF, radius_multiplier=RADIUS_MULTIPLIER_DEF, first=FIRST_DEF, counts=COUNTS_DEF, stereo=STEREO_DEF, include_disconnected=INCLUDE_DISCONNECTED_DEF, rdkit_invariants=RDKIT_INVARIANTS_DEF, exclude_floating=EXCLUDE_FLOATING_DEF, out_dir_base=None, out_ext=OUT_EXT_DEF, save=False, all_iters=False, overwrite=False): """Build a E3FP fingerprint from a mol with at least one conformer. Parameters ---------- mol : RDKit Mol Input molecule with one or more conformers to be fingerprinted. bits : int Set number of bits for final folded fingerprint. level : int, optional Level/maximum number of iterations of E3FP. If -1 is provided, it runs until termination, and `all_iters` is set to False. radius_multiplier : float, optional Radius multiplier for spherical shells. first : int, optional First `N` number of conformers from file to fingerprint. If -1, all are fingerprinted. counts : bool, optional Instead of bit-based fingerprints. Otherwise, generate count-based fingerprints. stereo : bool, optional Incorporate stereochemistry in fingerprint. include_disconnected : bool, optional Include disconnected atoms when hashing and for stereo calculations. Turn off purely for testing purposes, to make E3FP more like ECFP. rdkit_invariants : bool, optional Use the atom invariants used by RDKit for its Morgan fingerprint. exclude_floating : bool, optional: Mask atoms with no bonds (usually floating ions) from the fingerprint. These are often placed arbitrarily and can confound the fingerprint. out_dir_base : str, optional Basename of out directory to save fingerprints. Iteration number is appended. out_ext : str, optional Extension on fingerprint pickles, used to determine compression level. save : bool, optional Save fingerprints to directory. all_iters : bool, optional Save fingerprints from all iterations to file(s). overwrite : bool, optional Overwrite pre-existing file. Deleted Parameters ------------------ sdf_file : str SDF file path. """ name = mol.GetProp("_Name") if level is None: level = -1 if bits in (-1, None): bits = BITS if save: filenames = [] all_files_exist = True if level == -1 or not all_iters: if level == -1: dir_name = "{!s}_complete".format(out_dir_base) else: dir_name = "{!s}{:d}".format(out_dir_base, level) touch_dir(dir_name) filenames.append( os.path.join(dir_name, "{!s}{!s}".format(name, out_ext))) if not os.path.isfile(filenames[0]): all_files_exist = False else: for i in range(level + 1): dir_name = "{:s}{:d}".format(out_dir_base, i) touch_dir(dir_name) filename = os.path.join(dir_name, "{!s}{!s}".format(name, out_ext)) filenames.append(filename) if not os.path.isfile(filename): all_files_exist = False if all_files_exist and not overwrite: logging.warning("All fingerprint files for {!s} already exist. " "Skipping.".format(name)) return {} fingerprinter = Fingerprinter(bits=bits, level=level, radius_multiplier=radius_multiplier, counts=counts, stereo=stereo, include_disconnected=include_disconnected, rdkit_invariants=rdkit_invariants, exclude_floating=exclude_floating) try: fprints_dict = {} logging.info("Generating fingerprints for {!s}.".format(name)) for j, conf in enumerate(mol.GetConformers()): if j == first: j -= 1 break fingerprinter.run(conf, mol) # fingerprinter.save_substructs_to_db(substruct_db) #PLACEHOLDER level_range = range(level + 1) if level == -1 or not all_iters: level_range = (level, ) else: level_range = range(level + 1) for i in level_range: fprint = fingerprinter.get_fingerprint_at_level(i) fprint.name = MolItemName.from_str(name).to_conf_name(j) # if i not in fprints_dict and j != 0: # fprints_dict[i] = fprints_dict[i-1][:j] fprints_dict.setdefault(i, []).append(fprint) logging.info("Generated {:d} fingerprints for {!s}.".format( j + 1, name)) except: logging.error("Error generating fingerprints for {:s}.".format(name), exc_info=True) return {} if save: if level == -1 or not all_iters: fprints = fprints_dict[max(fprints_dict.keys())] try: fp.savez(filenames[0], *fprints) logging.info("Saved fingerprints for {:s}.".format(name)) except Exception: logging.error( "Error saving fingerprints for {:s} to {:s}".format( name, filenames[0]), exc_info=True) return {} else: try: for i, fprints in sorted(fprints_dict.items()): fp.savez(filenames[i], *fprints) logging.info("Saved fingerprints for {:s}.".format(name)) except Exception: logging.error( "Error saving fingerprints for {:s} to {:s}".format( name, filenames[i]), exc_info=True) return {} return fprints_dict