def generate_vacuum_hybrid_topology(mol_name="naphthalene", ref_mol_name="benzene"): from topology_proposal import SmallMoleculeSetProposalEngine, TopologyProposal import simtk.openmm.app as app from openmoltools import forcefield_generators from perses.tests.utils import createOEMolFromIUPAC, createSystemFromIUPAC, get_data_filename m, unsolv_old_system, pos_old, top_old = createSystemFromIUPAC(mol_name) refmol = createOEMolFromIUPAC(ref_mol_name) initial_smiles = oechem.OEMolToSmiles(m) final_smiles = oechem.OEMolToSmiles(refmol) gaff_xml_filename = get_data_filename("data/gaff.xml") forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml') forcefield.registerTemplateGenerator(forcefield_generators.gaffTemplateGenerator) solvated_system = forcefield.createSystem(top_old) gaff_filename = get_data_filename('data/gaff.xml') system_generator = SystemGenerator([gaff_filename, 'amber99sbildn.xml', 'tip3p.xml']) geometry_engine = FFAllAngleGeometryEngine() proposal_engine = SmallMoleculeSetProposalEngine( [initial_smiles, final_smiles], system_generator, residue_name=mol_name) #generate topology proposal topology_proposal = proposal_engine.propose(solvated_system, top_old) #generate new positions with geometry engine new_positions, _ = geometry_engine.propose(topology_proposal, pos_old, beta) return topology_proposal, pos_old, new_positions
def AtomPathLength(ifs, ofs, itf, atm1, atm2): for mol in ifs.GetOEGraphMols(): oechem.OETriposAtomNames(mol) a1 = None a2 = None for atm in mol.GetAtoms(): if atm.GetName() == atm1: a1 = atm if atm.GetName() == atm2: a2 = atm if a1 is not None and a2 is not None: break if a1 is None or a2 is None: oechem.OEThrow.Warning( "Failed to find atoms %s and %s in molecule" % (atm1, atm2)) continue pathlen = oechem.OEGetPathLength(a1, a2) if itf.GetBool("-verbose") or not itf.HasString("-o"): print("Path length: %s in %s" % (pathlen, oechem.OEMolToSmiles(mol))) spath = oechem.OEShortestPath(a1, a2) spathmol = oechem.OEGraphMol() adjustHCount = True oechem.OESubsetMol(spathmol, mol, oechem.OEIsAtomMember(spath), adjustHCount) spathsmiles = oechem.OEMolToSmiles(spathmol) if itf.HasString("-o"): oechem.OEWriteMolecule(ofs, spathmol) elif itf.GetBool("-verbose"): print(spathsmiles)
def extract_transformation(run, compound_microstates, project): import perses import openmoltools import numpy as np from openeye import oechem from fah_xchem.schema import Transformation try: npz = np.load(f'{project}/RUNS/RUN{run}/htf.npz', allow_pickle=True) x = npz['arr_0'] htf = x.item() old_smiles = oechem.OEMolToSmiles( htf._topology_proposal.old_topology.residue_oemol) new_smiles = oechem.OEMolToSmiles( htf._topology_proposal.new_topology.residue_oemol) if (old_smiles not in compound_microstates): print(f'{old_smiles} not found') return None elif (new_smiles not in compound_microstates): print(f'{new_smiles} not found') return None #print(run, old_smiles, new_smiles, compound_microstates[old_smiles].microstate_id, compound_microstates[new_smiles].microstate_id) transformation = Transformation( run_id=run, xchem_fragment_id=xchem_fragment_id, initial_microstate=compound_microstates[old_smiles], final_microstate=compound_microstates[new_smiles]) except Exception as e: print(e) return None return transformation
def generate_solvated_hybrid_test_topology(current_mol_name="naphthalene", proposed_mol_name="benzene"): """ Generate a test solvated topology proposal, current positions, and new positions triplet from two IUPAC molecule names. Parameters ---------- current_mol_name : str, optional name of the first molecule proposed_mol_name : str, optional name of the second molecule Returns ------- topology_proposal : perses.rjmc.topology_proposal The topology proposal representing the transformation current_positions : np.array, unit-bearing The positions of the initial system new_positions : np.array, unit-bearing The positions of the new system """ import simtk.openmm.app as app from openmoltools import forcefield_generators from perses.tests.utils import createOEMolFromIUPAC, createSystemFromIUPAC, get_data_filename current_mol, unsolv_old_system, pos_old, top_old = createSystemFromIUPAC(current_mol_name) proposed_mol = createOEMolFromIUPAC(proposed_mol_name) initial_smiles = oechem.OEMolToSmiles(current_mol) final_smiles = oechem.OEMolToSmiles(proposed_mol) gaff_xml_filename = get_data_filename("data/gaff.xml") forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml') forcefield.registerTemplateGenerator(forcefield_generators.gaffTemplateGenerator) modeller = app.Modeller(top_old, pos_old) modeller.addSolvent(forcefield, model='tip3p', padding=9.0*unit.angstrom) solvated_topology = modeller.getTopology() solvated_positions = modeller.getPositions() solvated_system = forcefield.createSystem(solvated_topology, nonbondedMethod=app.PME, removeCMMotion=False) barostat = openmm.MonteCarloBarostat(1.0*unit.atmosphere, temperature, 50) solvated_system.addForce(barostat) gaff_filename = get_data_filename('data/gaff.xml') system_generator = SystemGenerator([gaff_filename, 'amber99sbildn.xml', 'tip3p.xml'], barostat=barostat, forcefield_kwargs={'removeCMMotion': False, 'nonbondedMethod': app.PME}) geometry_engine = geometry.FFAllAngleGeometryEngine() proposal_engine = SmallMoleculeSetProposalEngine( [initial_smiles, final_smiles], system_generator, residue_name=current_mol_name) #generate topology proposal topology_proposal = proposal_engine.propose(solvated_system, solvated_topology) #generate new positions with geometry engine new_positions, _ = geometry_engine.propose(topology_proposal, solvated_positions, beta) return topology_proposal, solvated_positions, new_positions
def SmartsPathLength(ifs, ofs, itf, ss1, ss2): for mol in ifs.GetOEGraphMols(): oechem.OEPrepareSearch(mol, ss1) oechem.OEPrepareSearch(mol, ss2) if not (ss1.SingleMatch(mol) and ss2.SingleMatch(mol)): oechem.OEThrow.Warning( "Unable to find SMARTS matches in %s, skipping" % mol.GetTitle()) continue unique = True allminlen = sys.maxsize for match1 in ss1.Match(mol, unique): for match2 in ss2.Match(mol, unique): minlen = sys.maxsize for atom1 in match1.GetTargetAtoms(): for atom2 in match2.GetTargetAtoms(): pathlen = oechem.OEGetPathLength(atom1, atom2) if minlen > pathlen: minlen = pathlen atompairs = [] atompairs.append([atom1, atom2]) elif minlen == pathlen: atompairs.append([atom1, atom2]) if minlen < allminlen: allminlen = minlen allatompairs = atompairs[:] elif minlen == allminlen: allatompairs += atompairs[:] if itf.GetBool("-verbose") or not itf.HasString("-o"): print("Shortest path length: %s in %s" % (allminlen, oechem.OEMolToSmiles(mol))) spathlist = set() for satom1, satom2, in allatompairs: spath = oechem.OEShortestPath(satom1, satom2) spathmol = oechem.OEGraphMol() oechem.OESubsetMol(spathmol, mol, oechem.OEIsAtomMember(spath)) spathsmiles = oechem.OEMolToSmiles(spathmol) if spathsmiles in spathlist: continue spathlist.add(spathsmiles) if itf.HasString("-o"): oechem.OEWriteMolecule(ofs, spathmol) elif itf.GetBool("-verbose"): print(spathsmiles) return
def test_remove_atom_map(): from openeye import oechem mapped_smiles = '[H:5][C:1]([H:6])([C:2]([H:7])([H:8])[O:4][H:10])[O:3][H:9]' mapped_mol = oechem.OEMol() oechem.OESmilesToMol(mapped_mol, mapped_smiles) chemi.remove_map(mapped_mol) assert oechem.OEMolToSmiles(mapped_mol) == 'C(CO)O' chemi.restore_map(mapped_mol) assert oechem.OEMolToSmiles(mapped_mol) == mapped_smiles
def generate_poses(receptor, refmol, target_molecules, output_filename): """ Parameters ---------- receptor : openeye.oechem.OEGraphMol Receptor (already prepped for docking) for identifying optimal pose refmol : openeye.oechem.OEGraphMol Reference molecule which shares some part in common with the proposed molecule target_molecules : list of OEMol List of molecules to build output_filename : str Output filename for generated conformers """ # Expand uncertain stereochemistry print('Expanding uncertain stereochemistry...') target_molecules = expand_stereochemistry(target_molecules) print(f' There are {len(target_molecules)} target molecules') # TODO: Expand protonation states # Identify optimal conformer for each molecule with oechem.oemolostream(output_filename) as ofs: from rich.progress import track from multiprocessing import Pool from tqdm import tqdm pool = Pool() args = [(receptor, refmol, mol) for mol in target_molecules] for pose in track(pool.imap_unordered( generate_restricted_conformers_star, args), total=len(args), description='Enumerating conformers...'): #for pose in map(generate_restricted_conformers_star, args): # DEBUG if pose is not None: # DEBUG if 'EDJ-MED-e4b030d8-2' in pose.GetTitle(): msg = 'Writing docked pose for: ' msg += pose.GetTitle() + '\n' msg += f'{"":5s} ' + oechem.OEMolToSmiles(pose) + '\n' print(msg) oechem.OEWriteMolecule(ofs, pose) if 'EDJ-MED-e4b030d8-2' in pose.GetTitle(): msg = 'Wrote docked pose for: ' msg += pose.GetTitle() + '\n' msg += f'{"":5s} ' + oechem.OEMolToSmiles(pose) + '\n' print(msg) pool.close() pool.join()
def test_keep_track_of_map(): from openeye import oechem smiles = 'c1ccc(cc1)Nc2ncccn2' mapped_mol = oechem.OEMol() oechem.OESmilesToMol(mapped_mol, smiles) frags = fragmenter.fragment.CombinatorialFragmenter(mapped_mol) frags.fragment() #frags._fragment_all_bonds_not_in_ring_systems() #frags._combine_fragments(min_rotors=1, max_rotors=frags.n_rotors+1, restore_maps=True) keys = list(frags.fragments.keys()) assert oechem.OEMolToSmiles(frags.fragments[keys[0]][0]) == '[H:14][c:1]1[c:2]([c:4]([c:9]([c:5]([c:3]1[H:16])[H:18])[NH:13][H:22])[H:17])[H:15]' assert oechem.OEMolToSmiles(frags.fragments[keys[1]][0]) == '[H:19][c:6]1[c:7]([n:11][c:10]([n:12][c:8]1[H:21])[NH:13][H:22])[H:20]'
def test_smiles_to_oemol(): from openeye import oechem mol = chemi.smiles_to_oemol('CCCC') assert isinstance(mol, oechem.OEMol) assert oechem.OEMolToSmiles(mol) == 'CCCC' assert mol.GetTitle() == 'butane' mol = chemi.smiles_to_oemol('CCCC', normalize=False) assert mol.GetTitle() == '' mol = chemi.smiles_to_oemol('CCCC', add_atom_map=True) assert oechem.OEMolToSmiles( mol ) == '[H:5][C:1]([H:6])([H:7])[C:3]([H:11])([H:12])[C:4]([H:13])([H:14])[C:2]([H:8])([H:9])[H:10]'
def generate_vacuum_topology_proposal(current_mol_name="benzene", proposed_mol_name="toluene"): """ Generate a test vacuum topology proposal, current positions, and new positions triplet from two IUPAC molecule names. Parameters ---------- current_mol_name : str, optional name of the first molecule proposed_mol_name : str, optional name of the second molecule Returns ------- topology_proposal : perses.rjmc.topology_proposal The topology proposal representing the transformation current_positions : np.array, unit-bearing The positions of the initial system new_positions : np.array, unit-bearing The positions of the new system """ from openmoltools import forcefield_generators from perses.tests.utils import createOEMolFromIUPAC, createSystemFromIUPAC, get_data_filename current_mol, unsolv_old_system, pos_old, top_old = createSystemFromIUPAC(current_mol_name) proposed_mol = createOEMolFromIUPAC(proposed_mol_name) initial_smiles = oechem.OEMolToSmiles(current_mol) final_smiles = oechem.OEMolToSmiles(proposed_mol) gaff_xml_filename = get_data_filename("data/gaff.xml") forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml') forcefield.registerTemplateGenerator(forcefield_generators.gaffTemplateGenerator) solvated_system = forcefield.createSystem(top_old, removeCMMotion=False) gaff_filename = get_data_filename('data/gaff.xml') system_generator = SystemGenerator([gaff_filename, 'amber99sbildn.xml', 'tip3p.xml'], forcefield_kwargs={'removeCMMotion': False, 'nonbondedMethod': app.NoCutoff}) geometry_engine = geometry.FFAllAngleGeometryEngine() proposal_engine = SmallMoleculeSetProposalEngine( [initial_smiles, final_smiles], system_generator, residue_name=current_mol_name) #generate topology proposal topology_proposal = proposal_engine.propose(solvated_system, top_old, current_mol=current_mol, proposed_mol=proposed_mol) #generate new positions with geometry engine new_positions, _ = geometry_engine.propose(topology_proposal, pos_old, beta) return topology_proposal, pos_old, new_positions
def expand_stereochemistry(mols): """Expand stereochemistry when uncertain Parameters ---------- mols : openeye.oechem.OEGraphMol Molecules to be expanded Returns ------- expanded_mols : openeye.oechem.OEMol Expanded molecules """ expanded_mols = list() from openeye import oechem, oeomega omegaOpts = oeomega.OEOmegaOptions() omega = oeomega.OEOmega(omegaOpts) maxcenters = 12 forceFlip = False enumNitrogen = False warts = True # add suffix for stereoisomers for mol in mols: compound_title = mol.GetTitle() compound_smiles = oechem.OEMolToSmiles(mol) enantiomers = list() for enantiomer in oeomega.OEFlipper(mol, maxcenters, forceFlip, enumNitrogen, warts): enantiomer = oechem.OEMol(enantiomer) enantiomer_smiles = oechem.OEMolToSmiles(enantiomer) oechem.OESetSDData(enantiomer, 'compound', compound_title) oechem.OESetSDData(enantiomer, 'compound_smiles', compound_smiles) oechem.OESetSDData(enantiomer, 'enantiomer_smiles', enantiomer_smiles) enantiomers.append(enantiomer) expanded_mols += enantiomers # DEBUG if 'EDJ-MED-e4b030d8-2' in mol.GetTitle(): msg = 'Enumerated microstates for compound: ' msg += mol.GetTitle() + '\n' msg += f'{"":5s} ' + oechem.OEMolToSmiles(mol) + '\n' for index, m in enumerate(enantiomers): msg += f'{index:5d} : ' + oechem.OEMolToSmiles(m) + '\n' print(msg) return expanded_mols
def calculate_t142_central_wbo(mol: oechem.OEMol, params: Dict[str, List[List[int]]]) -> float: """Calculates the WBO between the central atoms in the t142 param in the molecule. (WBO is Wiberg Bond Order.) The `params` argument contains the parameters of the molecule (see `calculate_mol_params`). Returns -1 if the calculation fails. """ # Only use first occurrence of the parameter. indices = params['t142'][0] # For torsion parameters such as t142, the central atoms should be at the # second and third index. central_indices = [indices[1], indices[2]] # Generate molecule conformer. oechem.OEAddExplicitHydrogens(mol) omega = oeomega.OEOmega() omega.SetMaxConfs(1) omega.SetCanonOrder(False) omega.SetSampleHydrogens(True) omega.SetEnergyWindow(15.0) #unit? omega.SetRMSThreshold(1.0) # Don't generate random stereoisomer if not specified. omega.SetStrictStereo(True) status = omega(mol) if status is False: omega.SetStrictStereo(False) new_status = omega(mol) if new_status is False: logger.error("Failed to generate conformer for %s", oechem.OEMolToSmiles(mol)) return -1 # Calculate the WBO between the two central atoms. conf = next(iter(mol.GetConfs())) charged_copy = oechem.OEMol(conf) results = oequacpac.OEAM1Results() if not AM1_CALCULATOR.CalcAM1(results, charged_copy): logger.error("Failed to assign partial charges to %s", oechem.OEMolToSmiles(mol)) return -2 return results.GetBondOrder(central_indices[0], central_indices[1])
def gen_canonical_isomeric_smiles(oemol): # 1. Create an OpenFF molecule from the OpenEye molecule, guessing the # stereochemistry if needed. oe_molecule = oechem.OEMol(oemol) try: molecule = Molecule.from_openeye(oe_molecule) except: molecule = Molecule.from_openeye(oe_molecule, allow_undefined_stereo=True) stereoisomers = molecule.enumerate_stereoisomers(undefined_only=True, max_isomers=1) if len(stereoisomers) > 0: molecule = stereoisomers[0] # 2. Canonically order the molecule molecule = molecule.canonical_order_atoms() # 3. Figure out which atoms in the canonical molecule should be tagged. mapped_smiles = oechem.OEMolToSmiles(oe_molecule) torsion_match = molecule.chemical_environment_matches(mapped_smiles)[0] # 4. Generate a canonical isomeric mapped smiles molecule.properties["atom_map"] = { j: i + 1 for i, j in enumerate(torsion_match) } center_bond = set(molecule.properties["atom_map"].keys()) canonical_isomeric_smiles = molecule.to_smiles(isomeric=True, explicit_hydrogens=True, mapped=False) return molecule, canonical_isomeric_smiles, center_bond
def get_mols_from_frags(this_smiles, old_smiles=None): if old_smiles is None: old_smiles = [] fragfunc = GetFragmentationFunction() mol = oechem.OEGraphMol() oechem.OESmilesToMol(mol, this_smiles) frags = [f for f in fragfunc(mol)] len_frags = len(frags) for smile in old_smiles: mol2 = oechem.OEGraphMol() oechem.OESmilesToMol(mol2, smile) frags += [f for f in fragfunc(mol2)] oechem.OEThrow.Info("%d number of fragments generated" % len(frags)) fragcombs = GetFragmentCombinations(mol, frags, frag_number=len_frags) oechem.OEThrow.Info("%d number of fragment combinations generated" % len(fragcombs)) smiles = set() for frag in fragcombs: if oechem.OEDetermineComponents(frag)[0] == 1: smiles = smiles.union(oechem.OEMolToSmiles(frag)) return smiles
def test_n_valence(): from openeye import oechem json_molecule = { 'symbols': [ 'C', 'C', 'C', 'C', 'C', 'C', 'N', 'N', 'N', 'N', 'N', 'H', 'H', 'H', 'H' ], 'geometry': np.array([ 11.02088236, 0.30802536, 2.96687012, 10.37270642, 2.8383686, 2.75522059, 9.32012957, -1.48532476, 2.09948562, 8.06346176, 3.48843435, 1.68941515, 6.98820713, -0.772898, 1.02801107, 5.21186447, -2.73065435, 0.12850138, 5.70508328, -5.1797392, 0.28345893, 6.45152507, 1.7536658, 0.86549457, 2.97820833, -2.31491455, -0.90706852, 3.71709131, -6.31357514, -0.68408084, 2.05980154, -4.57124733, -1.40784597, 12.76887939, -0.24566439, 3.77189345, 11.61992628, 4.26322222, 3.39583795, 9.76610505, -3.43174262, 2.23743576, 7.53811768, 5.41217579, 1.50989122 ]), 'connectivity': [[0, 1, 1], [0, 2, 2], [0, 11, 1], [1, 3, 2], [1, 12, 1], [2, 4, 1], [2, 13, 1], [3, 7, 1], [3, 14, 1], [4, 5, 1], [4, 7, 2], [5, 6, 1], [5, 8, 2], [6, 9, 1], [8, 10, 1], [9, 10, 2]] } mol = utils.load_molecule(json_molecule) assert utils.has_explicit_hydrogen(mol) assert oechem.OEMolToSmiles(mol) == 'c1ccnc(c1)c2[n-]nnn2'
def create_mapped_smiles(mol): """ Generate an index-tagged explicit hydrogen SMILES. Exmaple: SMILES string for carbon monoxide "CO" With index-tagged explicit hydrogen SMILES this becomes '[H:3][C:1]([H:4])([H:5])[O:2][H:6]' Parameters ---------- mol: OEMOl Returns ------- index-tagged explicit hydrogen SMILES str """ # Check if molecule already has explicit hydrogens HAS_HYDROGENS = oechem.OEHasExplicitHydrogens(mol) if not HAS_HYDROGENS: # Add explicit hydrogens oechem.OEAddExplicitHydrogens(mol) for atom in mol.GetAtoms(): atom.SetMapIdx(atom.GetIdx() + 1) return oechem.OEMolToSmiles(mol)
def convert_extension(infile, outfile, canonical=False): """ Convert one molecule file format into another using OpenEye tools. The user may also assign canonical smiles as name before writing output. """ # open input file mols = reader.read_mols(infile) # open output file ofs = oechem.oemolostream() if not ofs.open(outfile): oechem.OEThrow.Fatal("Unable to open %s for writing" % outfile) # write to output for mol in mols: if canonical: smi = oechem.OEMolToSmiles(mol) for conf in mol.GetConfs(): if canonical: conf.SetTitle(smi) oechem.OEWriteConstMolecule(ofs, conf) # close filestreams ofs.close()
def test_small_molecule_proposals(): """ Make sure the small molecule proposal engine generates molecules """ list_of_smiles = ['CCCC','CCCCC','CCCCCC'] list_of_mols = [] for smi in list_of_smiles: mol = smiles_to_oemol(smi) list_of_mols.append(mol) molecules = [Molecule.from_openeye(mol) for mol in list_of_mols] stats_dict = defaultdict(lambda: 0) system_generator = SystemGenerator(forcefields = forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs, small_molecule_forcefield = small_molecule_forcefield, molecules=molecules, cache=None) proposal_engine = topology_proposal.SmallMoleculeSetProposalEngine(list_of_mols, system_generator) initial_system, initial_positions, initial_topology, = OEMol_to_omm_ff(list_of_mols[0], system_generator) proposal = proposal_engine.propose(initial_system, initial_topology) for i in range(50): #positions are ignored here, and we don't want to run the geometry engine new_proposal = proposal_engine.propose(proposal.old_system, proposal.old_topology) stats_dict[new_proposal.new_chemical_state_key] += 1 #check that the molecule it generated is actually the smiles we expect matching_molecules = [res for res in proposal.new_topology.residues() if res.name=='MOL'] if len(matching_molecules) != 1: raise ValueError("More than one residue with the same name!") mol_res = matching_molecules[0] oemol = generateOEMolFromTopologyResidue(mol_res) smiles = SmallMoleculeSetProposalEngine.canonicalize_smiles(oechem.OEMolToSmiles(oemol)) assert smiles == proposal.new_chemical_state_key proposal = new_proposal
def get_result(self, query_id): cur_rank = list() url = self.args.url + "/queries/{}/".format(query_id) response = None tries = 0 while response == None or data["status"]["job"] != "COMPLETED": time.sleep(60 * tries) tries += 1 response = requests.get(url) data = response.json() results_url = data["results"] results_data = requests.get(self.args.url + results_url) with tempfile.NamedTemporaryFile(suffix='.oeb', mode='wb', delete=False) as temp: temp.write(results_data.content) temp.flush() with oechem.oemolistream(temp.name) as results: for mol in results.GetOEGraphMols(): cur_rank.append( (oechem.OEMolToSmiles(mol), mol.GetTitle(), float(oechem.OEGetSDData(mol, 'TanimotoCombo')), self.baitset[0], False)) os.remove(temp.name) return cur_rank
def update_ranking(self, mol, max_tanimoto, ka_tag): index = 0 if len(self.ranking) >= self.args.topn and max_tanimoto < self.ranking[ len(self.ranking) - 1][2]: pass else: for top_mol in self.ranking: if max_tanimoto < top_mol[2]: index = self.ranking.index(top_mol) + 1 else: break upper = self.ranking[:index] lower = self.ranking[index:] self.ranking = upper + [(oechem.OEMolToSmiles(mol), mol.GetTitle(), max_tanimoto, self.baitset[0], ka_tag) ] + lower i = self.args.topn - 1 while i < len(self.ranking) - 1: if self.ranking[i][2] != self.ranking[i + 1][2]: self.ranking = self.ranking[:i + 1] break else: i += 1
def test_select_with_longer_fingerprints(pipeline_test_files): (smiles_file, filter_output_oeb, fingerprint_output_oeb, smiles_dataset_file, sorted_by_fingerprint_oeb) = pipeline_test_files smiles_file.write_text("\n".join(TEST_SMILES)) dp = DancePipeline("SMILES", smiles_file) dp.filter(_relevant_always, filter_output_oeb) # Each fingerprint consists of (1, 1, num_atoms), to ensure that fingerprints # are sorted correctly when the fingerprint is longer. dp.assign_fingerprint(lambda mol: (1, 1, mol.NumAtoms()), fingerprint_output_oeb) dp.select(3, "SMILES", smiles_dataset_file, sorted_by_fingerprint_oeb, in_memory_sorting_threshold=3) # Check that the molecules are sorted by fingerprint. outputted_smiles = \ [oechem.OEMolToSmiles(mol) for mol in utils.get_mols_from_oeb(sorted_by_fingerprint_oeb)] assert outputted_smiles == \ utils.get_list_of_canonical_isomeric_smiles(["N", "N#N", "C#N", "O=C=O"]) or \ outputted_smiles == \ utils.get_list_of_canonical_isomeric_smiles(["N", "C#N", "N#N", "O=C=O"]) # Check that the correct molecules were selected. utils.assert_smiles_in_file_are_equal(smiles_dataset_file, utils.get_list_of_canonical_isomeric_smiles(["N", "O=C=O"]))
def test_small_molecule_proposals(): """ Make sure the small molecule proposal engine generates molecules """ from perses.rjmc import topology_proposal from openmoltools import forcefield_generators import openeye.oechem as oechem list_of_smiles = ['CCCC','CCCCC','CCCCCC'] gaff_xml_filename = get_data_filename('data/gaff.xml') stats_dict = {smiles : 0 for smiles in list_of_smiles} system_generator = topology_proposal.SystemGenerator([gaff_xml_filename]) proposal_engine = topology_proposal.SmallMoleculeSetProposalEngine(list_of_smiles, system_generator) initial_molecule = generate_initial_molecule('CCCC') initial_system, initial_positions, initial_topology = oemol_to_omm_ff(initial_molecule, "MOL") proposal = proposal_engine.propose(initial_system, initial_topology) for i in range(50): #positions are ignored here, and we don't want to run the geometry engine new_proposal = proposal_engine.propose(proposal.old_system, proposal.old_topology) stats_dict[new_proposal.new_chemical_state_key] += 1 #check that the molecule it generated is actually the smiles we expect matching_molecules = [res for res in proposal.new_topology.residues() if res.name=='MOL'] if len(matching_molecules) != 1: raise ValueError("More than one residue with the same name!") mol_res = matching_molecules[0] oemol = forcefield_generators.generateOEMolFromTopologyResidue(mol_res) assert oechem.OEMolToSmiles(oemol) == proposal.new_chemical_state_key proposal = new_proposal
def UniqMol(ifs, ofs): smiles = {} for mol in ifs.GetOEMols(): smi = oechem.OEMolToSmiles(mol) if smi not in smiles: smiles[smi] = True oechem.OEWriteMolecule(ofs, mol)
def xyz_to_smiles(filename): """ Parse an .xyz file, heuristically perceive chemistry, and return canonical isomeric SMILES """ from openeye import oechem ifs = oechem.oemolistream(filename) for oemol in ifs.GetOEGraphMols(): return oechem.OEMolToSmiles(oemol)
def test_build_fragment(): from openeye import oechem smiles = 'CCCCCC' mol = chemi.smiles_to_oemol(smiles) f = fragmenter.fragment.WBOFragmenter(mol) f.calculate_wbo() f._get_rotor_wbo() setattr(f, 'threshold', 0.05) for bond in f.rotors_wbo: f._build_fragment(bond) assert len(f.fragments) == 3 remove_atom_map(f.fragments[(3, 5)]) assert oechem.OEMolToSmiles(f.fragments[(3, 5)]) == 'CCCCC' remove_atom_map(f.fragments[(4, 6)]) assert oechem.OEMolToSmiles(f.fragments[(4, 6)]) == 'CCCCC' remove_atom_map(f.fragments[(5, 6)]) assert oechem.OEMolToSmiles(f.fragments[(5, 6)]) == 'CCCCCC'
def test_selects_just_one_smallest_molecule(self, tmpdir): output_file = str(tmpdir / "select-final.smi") self.create_smiles_in_tmpdir(tmpdir) self.invoke_select_final(1, str(tmpdir), output_file) ifs = oechem.oemolistream(output_file) mol = oechem.OEMol() while oechem.OEReadMolecule(ifs, mol): assert oechem.OEMolToSmiles(mol) == "N"
def get_smiles(self): """ Returns ------- smiles: str SMILES string for the molecule """ smiles = oechem.OEMolToSmiles(self.mol) return smiles
def test_load_molecule(toolkit): """Test load molecules""" mol = utils.load_molecule( '[H]C([H])([H])C([H])([H])C([H])([H])C([H])([H])[H]', toolkit=toolkit) if toolkit == 'openeye': from openeye import oechem assert oechem.OEMolToSmiles(mol) == 'CCCC' if toolkit == 'rdkit': from rdkit import Chem assert Chem.MolToSmiles(mol) == 'CCCC'
def get_new_action_set(self, aligner=None): with self.logger("get_new_action_set") as logger: if aligner is not None: self.set_mole_aligner(aligner) mols = self.fastrocs_query(self.mol_aligner, self.config.space_size, self.config.host) # mols = [self.mol_aligner_conformers.from_oemol(mol) for mol in mols] # mols = list(filter(lambda x : x is not None, mols)) smiles = [oechem.OEMolToSmiles(mol) for mol in mols] return mols, smiles
def _oe_stream_from_file(file_path: str, as_smiles=False): # pragma: no cover from openeye import oechem from openff.toolkit.topology import Molecule input_molecule_stream = oechem.oemolistream() input_molecule_stream.open(file_path) for oe_molecule in input_molecule_stream.GetOEMols(): yield (oechem.OEMolToSmiles(oe_molecule) if as_smiles else Molecule.from_openeye(oe_molecule, allow_undefined_stereo=True))