def get_dance_property(mol: oechem.OEMol, properties: [DanceProperties]) -> DanceProperties: """ Returns the DanceProperties associated with a given molecule from the array. """ key = mol.GetData(DANCE_PROPS_KEY) return properties[key]
def write_mol_to_fingerprint_file( mol: oechem.OEMol, properties: [danceprops.DanceProperties], select_output_dir: str, select_bin_size: float, wiberg_precision: float, ): """Writes a molecule to its appropriate SMILES fingerprint file""" # Some of the molecules coming in may be invalid. DanceGenerator may find # there was an error in charge calculations, in which case the charged # copy was not assigned to the molecule. This function checks for that. is_valid_molecule = \ lambda mol: mol.HasData(danceprops.DANCE_CHARGED_COPY_KEY) if not is_valid_molecule(mol): logging.debug(f"Ignored molecule {mol.GetTitle()}") return charged_copy = mol.GetData(danceprops.DANCE_CHARGED_COPY_KEY) for atom in charged_copy.GetAtoms(oechem.OEIsInvertibleNitrogen()): tri_n = atom break fingerprint = danceprops.DanceFingerprint(tri_n, wiberg_precision) # Retrieve the total bond order around the trivalent nitrogen bond_order = danceprops.get_dance_property(mol, properties).tri_n_bond_order # Round the total bond order down to the lowest multiple of bin_size. For # instance, if bin_size is 0.02, and the bond_order is 2.028, it becomes # 2.02. This works because (bond_order / self._bin_size) generates a # multiple of the bin_size. Then floor() finds the next integer less than # the multiple. Finally, multiplying back by bin_size obtains the nearest # actual value. bond_order = math.floor(bond_order / select_bin_size) * select_bin_size filename = f"{select_output_dir}/{bond_order},{fingerprint}.smi" with open(filename, "a") as f: f.write(f"{oechem.OEMolToSmiles(mol)} {mol.GetTitle()}\n") logging.debug(f"Wrote {mol.GetTitle()} to {filename}")