예제 #1
0
    def get_fingerprint_from_mol(mol: oechem.OEMol) -> Tuple[float]:
        """Utility that retrieves a molecule's fingerprint and returns it as a tuple.

        Refer to :meth:`~assign_fingerprint` for how the fingerprint is stored
        in the molecule.

        Parameters
        ----------
        mol : oechem.OEMol
            The molecule from which to retrieve the fingerprint.

        Returns
        -------
        Tuple[float]
            A tuple containing the fingerprint.

        Raises
        ------
        ValueError
            If the molecule does not contain fingerprint data.
        """
        if not mol.HasData(DancePipeline.FINGERPRINT_LENGTH_NAME):
            raise ValueError("Could not retrieve fingerprint length for molecule.")
        length = mol.GetIntData(DancePipeline.FINGERPRINT_LENGTH_NAME)

        def get_fingerprint_index(i):
            name = f"{DancePipeline.FINGERPRINT_VALUE_NAME}_{i}"
            if not mol.HasData(name):
                raise ValueError(f"Unable to retrieve fingerprint value at index {i}")
            return mol.GetDoubleData(name)

        return tuple(get_fingerprint_index(i) for i in range(length))
예제 #2
0
파일: dance.py 프로젝트: btjanaka/dance-old
def write_mol_to_fingerprint_file(
        mol: oechem.OEMol,
        properties: [danceprops.DanceProperties],
        select_output_dir: str,
        select_bin_size: float,
        wiberg_precision: float,
):
    """Writes a molecule to its appropriate SMILES fingerprint file"""

    #  Some of the molecules coming in may be invalid. DanceGenerator may find
    #  there was an error in charge calculations, in which case the charged
    #  copy was not assigned to the molecule. This function checks for that.
    is_valid_molecule = \
            lambda mol: mol.HasData(danceprops.DANCE_CHARGED_COPY_KEY)

    if not is_valid_molecule(mol):
        logging.debug(f"Ignored molecule {mol.GetTitle()}")
        return

    charged_copy = mol.GetData(danceprops.DANCE_CHARGED_COPY_KEY)
    for atom in charged_copy.GetAtoms(oechem.OEIsInvertibleNitrogen()):
        tri_n = atom
        break
    fingerprint = danceprops.DanceFingerprint(tri_n, wiberg_precision)

    # Retrieve the total bond order around the trivalent nitrogen
    bond_order = danceprops.get_dance_property(mol, properties).tri_n_bond_order

    # Round the total bond order down to the lowest multiple of bin_size. For
    # instance, if bin_size is 0.02, and the bond_order is 2.028, it becomes
    # 2.02. This works because (bond_order / self._bin_size) generates a
    # multiple of the bin_size. Then floor() finds the next integer less than
    # the multiple. Finally, multiplying back by bin_size obtains the nearest
    # actual value.
    bond_order = math.floor(bond_order / select_bin_size) * select_bin_size

    filename = f"{select_output_dir}/{bond_order},{fingerprint}.smi"
    with open(filename, "a") as f:
        f.write(f"{oechem.OEMolToSmiles(mol)} {mol.GetTitle()}\n")
    logging.debug(f"Wrote {mol.GetTitle()} to {filename}")