def _set_aromatic(cls, ring_matches: List[Dict[int, int]], oe_molecule: oechem.OEMol): """Flag all specified ring atoms and all ring bonds between those atoms as being aromatic. Parameters ---------- ring_matches The indices of the atoms in each of the rings to flag as aromatic. oe_molecule The molecule to assign the aromatic flags to. """ atoms = {atom.GetIdx(): atom for atom in oe_molecule.GetAtoms()} bonds = { tuple(sorted((bond.GetBgnIdx(), bond.GetEndIdx()))): bond for bond in oe_molecule.GetBonds() } for ring_match in ring_matches: ring_atom_indices = {match for match in ring_match.values()} for matched_atom_index in ring_atom_indices: atoms[matched_atom_index].SetAromatic(True) for (index_a, index_b), bond in bonds.items(): if index_a not in ring_atom_indices or index_b not in ring_atom_indices: continue if not bond.IsInRing(): continue bond.SetAromatic(True)
def get_fingerprint_from_mol(mol: oechem.OEMol) -> Tuple[float]: """Utility that retrieves a molecule's fingerprint and returns it as a tuple. Refer to :meth:`~assign_fingerprint` for how the fingerprint is stored in the molecule. Parameters ---------- mol : oechem.OEMol The molecule from which to retrieve the fingerprint. Returns ------- Tuple[float] A tuple containing the fingerprint. Raises ------ ValueError If the molecule does not contain fingerprint data. """ if not mol.HasData(DancePipeline.FINGERPRINT_LENGTH_NAME): raise ValueError("Could not retrieve fingerprint length for molecule.") length = mol.GetIntData(DancePipeline.FINGERPRINT_LENGTH_NAME) def get_fingerprint_index(i): name = f"{DancePipeline.FINGERPRINT_VALUE_NAME}_{i}" if not mol.HasData(name): raise ValueError(f"Unable to retrieve fingerprint value at index {i}") return mol.GetDoubleData(name) return tuple(get_fingerprint_index(i) for i in range(length))
def write_guest_molecules(old_molecule: oechem.OEMol, new_molecule: oechem.OEMol, filepath: str): """ Write out a file containing the guest molecules with the "old molecule" first. Parameters ---------- old_molecule : oechem.OEMol The molecule to be written first. It should have coordinates placing it within the guest new_molecule : oechem.OEMol The molecule to be written next. It does not need to have coordinates, as these will be generated filepath : str The path to the file that is being written """ ostream = oechem.oemolostream() ostream.open(filepath) # set molecule names to avoid template name collision: old_molecule.SetTitle("oldmol") new_molecule.SetTitle("newmol") # set tripos atom names oechem.OETriposAtomNames(old_molecule) oechem.OETriposAtomNames(new_molecule) # write the molecules in order: oechem.OEWriteMolecule(ostream, old_molecule) oechem.OEWriteMolecule(ostream, new_molecule) ostream.close()
def initialize_system(dt=0.001, temperature=100, forcefield_file='forcefield/smirnoff99Frosst.offxml', smiles="C1CCCCC1"): mol = OEMol() # OEParseSmiles(mol, 'CCOCCSCC') # OEParseSmiles(mol, 'c1ccccc1') OEParseSmiles(mol, smiles) # OEParseSmiles(mol, 'C([C@@H]1[C@H]([C@@H]([C@H](C(O1)O)O)O)O)O') OEAddExplicitHydrogens(mol) masses = get_masses(mol) num_atoms = mol.NumAtoms() topology = generateTopologyFromOEMol(mol) ff = ForceField(get_data_filename(forcefield_file)) nrgs, total_params, offsets = system_builder.construct_energies( ff, mol, False) # dt = 0.0025 # friction = 10.0 # temperature = 300 # gradient descent dt = dt friction = 10.0 temperature = temperature a, b, c = get_abc_coefficents(masses, dt, friction, temperature) buf_size = estimate_buffer_size(1e-10, a) print("BUFFER SIZE", buf_size) omegaOpts = oeomega.OEOmegaOptions() omegaOpts.SetMaxConfs(1) omega = oeomega.OEOmega(omegaOpts) omega.SetStrictStereo(False) if not omega(mol): assert 0 x0 = mol_coords_to_numpy_array(mol) / 10 intg = custom_ops.Integrator_double(dt, buf_size, num_atoms, total_params, a, b, c) context = custom_ops.Context_double(nrgs, intg) x0 = minimizer.minimize_newton_cg(nrgs, x0, total_params) return nrgs, offsets, intg, context, x0, total_params
def _add_charge_props(mol: oechem.OEMol, charged_copy: oechem.OEMol, am1_results: oequacpac.OEAM1Results): """ Adds data from AM1 results to the given molecule. After this method is called, the molecule will have a danceprops.DANCE_CHARGED_COPY_KEY data storing the charged copy of the molecule. On that charged copy, each bond will have a danceprops.DANCE_BOND_ORDER_KEY data telling its Wiberg bond order. """ for bond in charged_copy.GetBonds(): bond.SetData( danceprops.DANCE_BOND_ORDER_KEY, am1_results.GetBondOrder(bond.GetBgnIdx(), bond.GetEndIdx())) mol.SetData(danceprops.DANCE_CHARGED_COPY_KEY, charged_copy)
def setOeMol(self, inpOeMol, ccId): """ Load this object with an existing oeMOL() """ self.__clear() self.__oeMol = OEMol(inpOeMol) self.__ccId = ccId self.getElementCounts()
def get_dance_property(mol: oechem.OEMol, properties: [DanceProperties]) -> DanceProperties: """ Returns the DanceProperties associated with a given molecule from the array. """ key = mol.GetData(DANCE_PROPS_KEY) return properties[key]
def _get_adjacency_matrix(self, mol:oechem.OEMol) -> np.ndarray: adj_mat = np.zeros((self.num_atoms, self.num_atoms)) for bond in mol.GetBonds(): bgn_idx = bond.GetBgnIdx() end_idx = bond.GetEndIdx() adj_mat[bgn_idx][end_idx] = 1 adj_mat[end_idx][bgn_idx] = 1 return adj_mat
def test_success(self): mol = OEMol() OESmilesToMol(mol, str("c1(c(cccc1)OC(=O)C)C(=O)O")) self.cube.process(mol, self.cube.intake.name) self.assertEqual(self.runner.outputs["success"].qsize(), 1) self.assertEqual(self.runner.outputs["failure"].qsize(), 1) new_mol = self.runner.outputs["success"].get() self.assertTrue(OEExactGraphMatch(new_mol, mol))
def num_nitrogens_in_molecule(mol: oechem.OEMol): """ Counts the number of trivalent nitrogens in the given molecule. """ checker = oechem.OEIsInvertibleNitrogen() num_trivalent_nitrogens = 0 for atom in mol.GetAtoms(): if checker(atom): num_trivalent_nitrogens += 1 return num_trivalent_nitrogens
def get_LogP(smiles, fuzz): mol = OEMol() OEParseSmiles(mol, smiles) logP = OEGetXLogP(mol) if fuzz: # randomly change logP value by +/- 10% fuzzed_logP = logP + random.uniform(-0.1, 0.1) * logP click.echo(fuzzed_logP) else: click.echo(logP)
def writeOther(self, oeMol, filePath, title='None', constantMol=False): try: ofs = oemolostream() ofs.open(filePath) myMol = OEMol(oeMol) myMol.SetTitle(title) self.__lfh.write( "+PdbxBuildChemComp.writeOther writing %s title %s\n" % (filePath, myMol.GetTitle())) if constantMol: OEWriteConstMolecule(ofs, myMol) else: OEWriteMolecule(ofs, myMol) return True except Exception as e: self.__lfh.write("+PdbxBuildChemComp.writeOther FAILING %s\n" % str(e)) traceback.print_exc(file=self.__lfh) return False
def initialize(input_smiles, gp): train_reference_args = [] train_args = [] train_offset_idxs = [] train_charge_idxs = [] for smi_idx, smiles in enumerate(input_smiles): print("processing", smiles, smi_idx, "/", len(input_smiles)) mol = OEMol() OEParseSmiles(mol, smiles) OEAddExplicitHydrogens(mol) masses = get_masses(mol) num_atoms = mol.NumAtoms() omegaOpts = oeomega.OEOmegaOptions() omegaOpts.SetMaxConfs(1) omega = oeomega.OEOmega(omegaOpts) omega.SetStrictStereo(False) if not omega(mol): assert 0 topology = generateTopologyFromOEMol(mol) reference_forcefield_file = 'forcefield/smirnoff99Frosst_perturbed.offxml' ff = ForceField(get_data_filename(reference_forcefield_file)) params = system_builder.construct_energies(ff, mol, True) train_reference_args.append((params[0], params[1], masses, mol)) params = system_builder.construct_energies(ff, mol, False) # global_params = args[0] # nrg_params = args[1] # total_params = args[2] # masses = args[3] # mol = args[4] # charge_idxs = args[5] train_args.append((gp, params[0], params[1], masses, mol, params[3])) train_offset_idxs.append(params[2]) train_charge_idxs.append(params[3]) label_confs = [generate_conformations(a) for a in train_reference_args] return train_args, train_offset_idxs, train_charge_idxs, label_confs
def convert(self): # Set OEMol ifs = oemolistream() ifs.SetFlavor(OEFormat_MOL2, OEIFlavor_MOL2_Forcefield) ifs.open(self.mol2_file) # Read in molecules for i, mol in enumerate(ifs.GetOEMols()): if i > 0: raise Exception( 'Only single residue molecules are currently supported') OETriposAtomNames(mol) self.molecules.append(OEMol(mol)) # Set topology self.mol2_topo = pmd.load_file(self.mol2_file, structure=True) # Parameterize ff = ForceField('forcefield/smirnoff99Frosst.offxml') self.labels = ff.labelMolecules(self.molecules, verbose=False) self.off_system = ff.createSystem(self.mol2_topo.topology, self.molecules, nonbondedCutoff=1.1 * unit.nanometer, ewaldErrorTolerance=1e-4) # Load into Parmed self.pmd_system = pmd.openmm.topsystem.load_topology( self.mol2_topo.topology, self.off_system, self.mol2_topo.positions) # Convert to AmberParm self.parm = pmd.amber.AmberParm.from_structure(self.pmd_system) # HACKY PART!! # Amber specifies that the third atom in an improper is the central # atom, but smirnoff currently specifies the second atom. A check for # impropers was conducted during pmd.openmm.topsystem.load_topology(), # but that looked at the third atom, so we'll recheck the second atom. for i, dihedral in enumerate(cnvs.parm.dihedrals): a1 = dihedral.atom1 a2 = dihedral.atom2 a3 = dihedral.atom3 a4 = dihedral.atom4 if a1 in a2.bond_partners and a3 in a2.bond_partners and a4 in a2.bond_partners: (dihedral.atom1, dihedral.atom2, dihedral.atom3, dihedral.atom4) = (a3, a4, a2, a1) dihedral.improper = True # Create unique atom types unique_types = aimtools.unique_types.create_unique_type_list(self.parm) # Write AMBER mol2 and frcmod aimtools.unique_types.write_unique_frcmod_mol2s( self.parm, unique_types, names=self.output_prefix)
def write_mol_to_fingerprint_file( mol: oechem.OEMol, properties: [danceprops.DanceProperties], select_output_dir: str, select_bin_size: float, wiberg_precision: float, ): """Writes a molecule to its appropriate SMILES fingerprint file""" # Some of the molecules coming in may be invalid. DanceGenerator may find # there was an error in charge calculations, in which case the charged # copy was not assigned to the molecule. This function checks for that. is_valid_molecule = \ lambda mol: mol.HasData(danceprops.DANCE_CHARGED_COPY_KEY) if not is_valid_molecule(mol): logging.debug(f"Ignored molecule {mol.GetTitle()}") return charged_copy = mol.GetData(danceprops.DANCE_CHARGED_COPY_KEY) for atom in charged_copy.GetAtoms(oechem.OEIsInvertibleNitrogen()): tri_n = atom break fingerprint = danceprops.DanceFingerprint(tri_n, wiberg_precision) # Retrieve the total bond order around the trivalent nitrogen bond_order = danceprops.get_dance_property(mol, properties).tri_n_bond_order # Round the total bond order down to the lowest multiple of bin_size. For # instance, if bin_size is 0.02, and the bond_order is 2.028, it becomes # 2.02. This works because (bond_order / self._bin_size) generates a # multiple of the bin_size. Then floor() finds the next integer less than # the multiple. Finally, multiplying back by bin_size obtains the nearest # actual value. bond_order = math.floor(bond_order / select_bin_size) * select_bin_size filename = f"{select_output_dir}/{bond_order},{fingerprint}.smi" with open(filename, "a") as f: f.write(f"{oechem.OEMolToSmiles(mol)} {mol.GetTitle()}\n") logging.debug(f"Wrote {mol.GetTitle()} to {filename}")
def decode(self, mol_data): """ By default, deserializes data into molecules for use in the cube """ mol = OEMol() if type(mol_data) == OEMol: return mol_data if not self._ifs.openstring(mol_data): raise RuntimeError("Failed to open string") if not OEReadMolecule(self._ifs, mol): print("Unable to decode molecule") self._ifs.close() return mol
def _process_mol(mol: oechem.OEMol, explicit_H: Optional[str] = None): if explicit_H == 'all': oechem.OEAddExplicitHydrogens(mol) elif explicit_H == 'polar': oechem.OESuppressHydrogens(mol, explicit_H) elif explicit_H is None: oechem.OESuppressHydrogens(mol) else: raise ValueError oechem.OEAssignAromaticFlags(mol) oechem.OEAssignHybridization(mol) oechem.OEAssignFormalCharges(mol) mol.Sweep()
def calculate_t142_central_wbo(mol: oechem.OEMol, params: Dict[str, List[List[int]]]) -> float: """Calculates the WBO between the central atoms in the t142 param in the molecule. (WBO is Wiberg Bond Order.) The `params` argument contains the parameters of the molecule (see `calculate_mol_params`). Returns -1 if the calculation fails. """ # Only use first occurrence of the parameter. indices = params['t142'][0] # For torsion parameters such as t142, the central atoms should be at the # second and third index. central_indices = [indices[1], indices[2]] # Generate molecule conformer. oechem.OEAddExplicitHydrogens(mol) omega = oeomega.OEOmega() omega.SetMaxConfs(1) omega.SetCanonOrder(False) omega.SetSampleHydrogens(True) omega.SetEnergyWindow(15.0) #unit? omega.SetRMSThreshold(1.0) # Don't generate random stereoisomer if not specified. omega.SetStrictStereo(True) status = omega(mol) if status is False: omega.SetStrictStereo(False) new_status = omega(mol) if new_status is False: logger.error("Failed to generate conformer for %s", oechem.OEMolToSmiles(mol)) return -1 # Calculate the WBO between the two central atoms. conf = next(iter(mol.GetConfs())) charged_copy = oechem.OEMol(conf) results = oequacpac.OEAM1Results() if not AM1_CALCULATOR.CalcAM1(results, charged_copy): logger.error("Failed to assign partial charges to %s", oechem.OEMolToSmiles(mol)) return -2 return results.GetBondOrder(central_indices[0], central_indices[1])
def size_and_wbo_fingerprint(mol: oechem.OEMol): """Fingerprint with number of atoms in the mol and WBO between central atoms in the t142 param. (WBO is Wiberg Bond Order.) Note: The indices are the same in OpenEye and OFF molecules, so the parameter indices are correct, even though they were calculated for the OFF mol. """ oechem.OEAddExplicitHydrogens(mol) params = smirnoff_param_utils.read_params_from_mol(mol) return ( mol.NumAtoms(), smirnoff_param_utils.calculate_t142_central_wbo(mol, params), )
def mol2_to_OEMol(conversion): """ Convert the input MOL2 file to a list of OpenEye OEMols. """ ifs = oemolistream() ifs.SetFlavor(OEFormat_MOL2, OEIFlavor_MOL2_Forcefield) ifs.open(conversion.input_mol2) # Read in molecules molecules = [] for i, mol in enumerate(ifs.GetOEMols()): if i > 0: raise RuntimeError("Only single residue molecules are currently supported") molecules.append(OEMol(mol)) return molecules
def _calc_properties(mol: oechem.OEMol) -> danceprops.DanceProperties: """ Calculates properties of the given molecule and returns a DanceProperties object holding them. Based on Victoria Lim's am1wib.py - see https://github.com/vtlim/misc/blob/master/oechem/am1wib.py """ props = danceprops.DanceProperties() for conf in mol.GetConfs(): charged_copy = oechem.OEMol(conf) results = oequacpac.OEAM1Results() if not AM1.CalcAM1(results, charged_copy): logging.debug( f"failed to assign partial charges to {mol.GetTitle()}") return props DanceGenerator._add_charge_props(mol, charged_copy, results) # Sum bond orders, bond lengths, and bond angles for atom in charged_copy.GetAtoms(oechem.OEIsInvertibleNitrogen()): nbors = list(atom.GetAtoms()) # (neighbors) ang1 = math.degrees( oechem.OEGetAngle(charged_copy, nbors[0], atom, nbors[1])) ang2 = math.degrees( oechem.OEGetAngle(charged_copy, nbors[1], atom, nbors[2])) ang3 = math.degrees( oechem.OEGetAngle(charged_copy, nbors[2], atom, nbors[0])) props.tri_n_bond_angle = ang1 + ang2 + ang3 for nbor in nbors: bond_order = results.GetBondOrder(atom.GetIdx(), nbor.GetIdx()) bond_length = oechem.OEGetDistance(charged_copy, atom, nbor) element = nbor.GetAtomicNum() props.tri_n_bonds.append( danceprops.DanceTriNBond(bond_order, bond_length, element)) props.tri_n_bond_order += bond_order props.tri_n_bond_length += bond_length break # terminate after one trivalent nitrogen break # terminate after one conformation return props
def deserialize(self, oeS): """ Reconstruct an OE molecule from the input string serialization (OE binary). The deserialized molecule is used to initialize the internal OE molecule within this object. Returns True for success or False otherwise. """ self.__clear() ims = oemolistream() ims.SetFormat(OEFormat_OEB) ims.openstring(oeS) nmol = 0 mList = [] # for mol in ims.GetOEGraphMols(): for mol in ims.GetOEMols(): if (self.__debug): self.__lfh.write("OeBuildModelMol(deserialize) SMILES %s\n" % OECreateCanSmiString(mol)) self.__lfh.write("OeBuildModelMol(deserialize) title %s\n" % mol.GetTitle()) self.__lfh.write("OeBuildModelMol(deserialize) atoms %d\n" % mol.NumAtoms()) # mList.append(OEGraphMol(mol)) mList.append(OEMol(mol)) nmol += 1 # if nmol >= 1: self.__oeMol = mList[0] self.__ccId = self.__oeMol.GetTitle() # if (self.__debug): self.__lfh.write("OeBuildModelMol(deserialize) mols %d\n" % nmol) self.__lfh.write("OeBuildModelMol(deserialize) id %s\n" % self.__ccId) self.__lfh.write("OeBuildModelMol(deserialize) atoms %d\n" % self.__oeMol.NumAtoms()) return True else: return False
def overlay_molecules( reference_molecule: oechem.OEGraphMol, fit_molecule: oechem.OEMol, return_overlay: bool = True, ) -> (int, List[oechem.OEGraphMol]): """ Overlay two molecules and calculate TanimotoCombo score. Parameters ---------- reference_molecule: oechem.OEGraphMol An OpenEye molecule holding the reference molecule for overlay. fit_molecule: oechem.OEMol An OpenEye multi-conformer molecule holding the fit molecule for overlay. return_overlay: bool If the best scored overlay of molecules should be returned. Returns ------- : int or int and list of oechem.OEGraphMol The TanimotoCombo score of the best overlay and the overlay if score_only is set False. """ from openeye import oechem, oeshape prep = oeshape.OEOverlapPrep() prep.Prep(reference_molecule) overlay = oeshape.OEOverlay() overlay.SetupRef(reference_molecule) prep.Prep(fit_molecule) score = oeshape.OEBestOverlayScore() overlay.BestOverlay(score, fit_molecule, oeshape.OEHighestTanimoto()) if not return_overlay: return score.GetTanimotoCombo() else: overlay = [reference_molecule] fit_molecule = oechem.OEGraphMol( fit_molecule.GetConf(oechem.OEHasConfIdx(score.GetFitConfIdx()))) score.Transform(fit_molecule) overlay.append(fit_molecule) return score.GetTanimotoCombo(), overlay
def write_host(host_oemol: oechem.OEMol, filepath: str): """ Write out the host molecule on its own. Parameters ---------- host_oemol : oechem.OEMol the OEMol containing the host filepath : str where to write the OEMol """ ostream = oechem.oemolostream() ostream.open(filepath) # set title to avoid template name collision: host_oemol.SetTitle("host") # set tripos atom names oechem.OETriposAtomNames(host_oemol) oechem.OEWriteMolecule(ostream, host_oemol) ostream.close()
def add_h(mol: oechem.OEMol): """Add explicit hydrogens for test cases""" for atom in mol.GetAtoms(): oechem.OEAddExplicitHydrogens(mol, atom)
def set_dance_property(mol: oechem.OEMol, key: int): """ Sets the DANCE_PROPS_KEY data of a molecule. """ mol.SetData(DANCE_PROPS_KEY, key)
def _relevant_if_contains_nitrogen(mol: oechem.OEMol) -> bool: return any(atom.GetAtomicNum() == _NITROGEN for atom in mol.GetAtoms())
def add_h(mol: oechem.OEMol): """Add explicit hydrogens to a molecule""" for atom in mol.GetAtoms(): oechem.OEAddExplicitHydrogens(mol, atom)
def _get_node_features(self, mol: oechem.OEMol) -> np.ndarray: node_feat = [self._featurize_atom(a) for a in mol.GetAtoms()] node_feat = np.array(node_feat) return node_feat
def __init__(self, mol: oechem.OEMol): """ mol (oechem.OEMol): OEMol object """ self.num_atoms = mol.NumAtoms() self.adj_mat = self._get_adjacency_matrix(mol)