def fingerprint_mol(mol):
    fp = Fingerprinter(bits=BITS,
                       level=LEVEL,
                       radius_multiplier=RADIUS_MULTIPLIER,
                       stereo=True,
                       remove_duplicate_substructs=True)
    fp.run(mol=mol, conf=0)
    return fp
Example #2
0
 def test_main_parameter_ranges_run_without_fail(self):
     from e3fp.fingerprint.fprinter import Fingerprinter
     from e3fp.conformer.util import mol_from_sdf
     mol = mol_from_sdf(PLANAR_SDF_FILE)
     conf = mol.GetConformers()[0]
     stereo_opts = (True, False)
     counts_opts = (True, False)
     include_disconnected_opts = (True, False)
     for bits in (1024, 4096, 2**32):
         for level, remove_substructs in [(-1, True), (5, False)]:
             for stereo in stereo_opts:
                 for counts in counts_opts:
                     for include_disconnected in include_disconnected_opts:
                         fprinter = Fingerprinter(
                             bits=bits, level=level, stereo=stereo,
                             counts=counts,
                             remove_duplicate_substructs=remove_substructs,
                             include_disconnected=include_disconnected)
                         fprinter.run(conf, mol)
                         fprinter.get_fingerprint_at_level()
Example #3
0
def fprints_dict_from_mol(mol,
                          bits=BITS,
                          level=LEVEL_DEF,
                          radius_multiplier=RADIUS_MULTIPLIER_DEF,
                          first=FIRST_DEF,
                          counts=COUNTS_DEF,
                          stereo=STEREO_DEF,
                          include_disconnected=INCLUDE_DISCONNECTED_DEF,
                          rdkit_invariants=RDKIT_INVARIANTS_DEF,
                          exclude_floating=EXCLUDE_FLOATING_DEF,
                          out_dir_base=None,
                          out_ext=OUT_EXT_DEF,
                          save=False,
                          all_iters=False,
                          overwrite=False):
    """Build a E3FP fingerprint from a mol with at least one conformer.

    Parameters
    ----------
    mol : RDKit Mol
        Input molecule with one or more conformers to be fingerprinted.
    bits : int
        Set number of bits for final folded fingerprint.
    level : int, optional
        Level/maximum number of iterations of E3FP. If -1 is provided, it runs
        until termination, and `all_iters` is set to False.
    radius_multiplier : float, optional
        Radius multiplier for spherical shells.
    first : int, optional
        First `N` number of conformers from file to fingerprint. If -1, all
        are fingerprinted.
    counts : bool, optional
        Instead of bit-based fingerprints. Otherwise, generate count-based
        fingerprints.
    stereo : bool, optional
        Incorporate stereochemistry in fingerprint.
    include_disconnected : bool, optional
        Include disconnected atoms when hashing and for stereo calculations.
        Turn off purely for testing purposes, to make E3FP more like ECFP.
    rdkit_invariants : bool, optional
        Use the atom invariants used by RDKit for its Morgan fingerprint.
    exclude_floating : bool, optional:
        Mask atoms with no bonds (usually floating ions) from the fingerprint.
        These are often placed arbitrarily and can confound the fingerprint.
    out_dir_base : str, optional
        Basename of out directory to save fingerprints. Iteration number is
        appended.
    out_ext : str, optional
        Extension on fingerprint pickles, used to determine compression level.
    save : bool, optional
        Save fingerprints to directory.
    all_iters : bool, optional
        Save fingerprints from all iterations to file(s).
    overwrite : bool, optional
        Overwrite pre-existing file.

    Deleted Parameters
    ------------------
    sdf_file : str
        SDF file path.
    """
    name = mol.GetProp("_Name")

    if level is None:
        level = -1

    if bits in (-1, None):
        bits = BITS

    if save:
        filenames = []
        all_files_exist = True
        if level == -1 or not all_iters:
            if level == -1:
                dir_name = "{!s}_complete".format(out_dir_base)
            else:
                dir_name = "{!s}{:d}".format(out_dir_base, level)
            touch_dir(dir_name)
            filenames.append(
                os.path.join(dir_name, "{!s}{!s}".format(name, out_ext)))
            if not os.path.isfile(filenames[0]):
                all_files_exist = False
        else:
            for i in range(level + 1):
                dir_name = "{:s}{:d}".format(out_dir_base, i)
                touch_dir(dir_name)
                filename = os.path.join(dir_name,
                                        "{!s}{!s}".format(name, out_ext))
                filenames.append(filename)
                if not os.path.isfile(filename):
                    all_files_exist = False

        if all_files_exist and not overwrite:
            logging.warning("All fingerprint files for {!s} already exist. "
                            "Skipping.".format(name))
            return {}

    fingerprinter = Fingerprinter(bits=bits,
                                  level=level,
                                  radius_multiplier=radius_multiplier,
                                  counts=counts,
                                  stereo=stereo,
                                  include_disconnected=include_disconnected,
                                  rdkit_invariants=rdkit_invariants,
                                  exclude_floating=exclude_floating)

    try:
        fprints_dict = {}
        logging.info("Generating fingerprints for {!s}.".format(name))
        for j, conf in enumerate(mol.GetConformers()):
            if j == first:
                j -= 1
                break
            fingerprinter.run(conf, mol)
            # fingerprinter.save_substructs_to_db(substruct_db) #PLACEHOLDER
            level_range = range(level + 1)
            if level == -1 or not all_iters:
                level_range = (level, )
            else:
                level_range = range(level + 1)
            for i in level_range:
                fprint = fingerprinter.get_fingerprint_at_level(i)
                fprint.name = MolItemName.from_str(name).to_conf_name(j)
                # if i not in fprints_dict and j != 0:
                #     fprints_dict[i] = fprints_dict[i-1][:j]
                fprints_dict.setdefault(i, []).append(fprint)
        logging.info("Generated {:d} fingerprints for {!s}.".format(
            j + 1, name))
    except:
        logging.error("Error generating fingerprints for {:s}.".format(name),
                      exc_info=True)
        return {}

    if save:
        if level == -1 or not all_iters:
            fprints = fprints_dict[max(fprints_dict.keys())]
            try:
                fp.savez(filenames[0], *fprints)
                logging.info("Saved fingerprints for {:s}.".format(name))
            except Exception:
                logging.error(
                    "Error saving fingerprints for {:s} to {:s}".format(
                        name, filenames[0]),
                    exc_info=True)
                return {}
        else:
            try:
                for i, fprints in sorted(fprints_dict.items()):
                    fp.savez(filenames[i], *fprints)
                logging.info("Saved fingerprints for {:s}.".format(name))
            except Exception:
                logging.error(
                    "Error saving fingerprints for {:s} to {:s}".format(
                        name, filenames[i]),
                    exc_info=True)
                return {}

    return fprints_dict