def test_small_molecule_proposals(): """ Make sure the small molecule proposal engine generates molecules """ list_of_smiles = ['CCCC','CCCCC','CCCCCC'] list_of_mols = [] for smi in list_of_smiles: mol = smiles_to_oemol(smi) list_of_mols.append(mol) molecules = [Molecule.from_openeye(mol) for mol in list_of_mols] stats_dict = defaultdict(lambda: 0) system_generator = SystemGenerator(forcefields = forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs, small_molecule_forcefield = small_molecule_forcefield, molecules=molecules, cache=None) proposal_engine = topology_proposal.SmallMoleculeSetProposalEngine(list_of_mols, system_generator) initial_system, initial_positions, initial_topology, = OEMol_to_omm_ff(list_of_mols[0], system_generator) proposal = proposal_engine.propose(initial_system, initial_topology) for i in range(50): #positions are ignored here, and we don't want to run the geometry engine new_proposal = proposal_engine.propose(proposal.old_system, proposal.old_topology) stats_dict[new_proposal.new_chemical_state_key] += 1 #check that the molecule it generated is actually the smiles we expect matching_molecules = [res for res in proposal.new_topology.residues() if res.name=='MOL'] if len(matching_molecules) != 1: raise ValueError("More than one residue with the same name!") mol_res = matching_molecules[0] oemol = generateOEMolFromTopologyResidue(mol_res) smiles = SmallMoleculeSetProposalEngine.canonicalize_smiles(oechem.OEMolToSmiles(oemol)) assert smiles == proposal.new_chemical_state_key proposal = new_proposal
def test_small_molecule_proposals(): """ Make sure the small molecule proposal engine generates molecules """ from perses.rjmc import topology_proposal from openmoltools import forcefield_generators from collections import defaultdict from perses.rjmc.topology_proposal import SmallMoleculeSetProposalEngine import openeye.oechem as oechem list_of_smiles = ['CCCC','CCCCC','CCCCCC'] gaff_xml_filename = get_data_filename('data/gaff.xml') stats_dict = defaultdict(lambda: 0) system_generator = topology_proposal.SystemGenerator([gaff_xml_filename]) proposal_engine = topology_proposal.SmallMoleculeSetProposalEngine(list_of_smiles, system_generator) initial_molecule = generate_initial_molecule('CCCC') initial_system, initial_positions, initial_topology = oemol_to_omm_ff(initial_molecule, "MOL") proposal = proposal_engine.propose(initial_system, initial_topology) for i in range(50): #positions are ignored here, and we don't want to run the geometry engine new_proposal = proposal_engine.propose(proposal.old_system, proposal.old_topology) stats_dict[new_proposal.new_chemical_state_key] += 1 #check that the molecule it generated is actually the smiles we expect matching_molecules = [res for res in proposal.new_topology.residues() if res.name=='MOL'] if len(matching_molecules) != 1: raise ValueError("More than one residue with the same name!") mol_res = matching_molecules[0] oemol = forcefield_generators.generateOEMolFromTopologyResidue(mol_res) smiles = SmallMoleculeSetProposalEngine.canonicalize_smiles(oechem.OEMolToSmiles(oemol)) assert smiles == proposal.new_chemical_state_key proposal = new_proposal
topology = topology.to_openmm() topology.setPeriodicBoxVectors(system.getDefaultPeriodicBoxVectors()) ifs = oechem.oemolistream() ifs.open(ligand_filename) # get the list of molecules mol_list = [oechem.OEMol(mol) for mol in ifs.GetOEMols()] for idx, mol in enumerate(mol_list): mol.SetTitle("MOL{}".format(idx)) oechem.OETriposAtomNames(mol) initial_mol = mol_list[initial_ligand] proposal_mol = mol_list[proposal_ligand] proposal_smiles = SmallMoleculeSetProposalEngine.canonicalize_smiles( oechem.OECreateCanSmiString(proposal_mol)) current_smiles = SmallMoleculeSetProposalEngine.canonicalize_smiles( oechem.OECreateCanSmiString(initial_mol)) barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere, temperature, 50) system_generator = SystemGenerator( [ 'amber14/protein.ff14SB.xml', 'gaff.xml', 'amber14/tip3p.xml', 'MCL1_ligands.xml' ], barostat=barostat, forcefield_kwargs={ 'nonbondedMethod': app.PME, 'constraints': app.HBonds,
def __init__(self, molecules: List[str], output_filename: str, ncmc_switching_times: Dict[str, int], equilibrium_steps: Dict[str, int], timestep: unit.Quantity, initial_molecule: str=None, geometry_options: Dict=None): self._molecules = [SmallMoleculeSetProposalEngine.canonicalize_smiles(molecule) for molecule in molecules] environments = ['explicit', 'vacuum'] temperature = 298.15 * unit.kelvin pressure = 1.0 * unit.atmospheres constraints = app.HBonds self._storage = NetCDFStorage(output_filename) self._ncmc_switching_times = ncmc_switching_times self._n_equilibrium_steps = equilibrium_steps self._geometry_options = geometry_options # Create a system generator for our desired forcefields. from perses.rjmc.topology_proposal import SystemGenerator system_generators = dict() from pkg_resources import resource_filename gaff_xml_filename = resource_filename('perses', 'data/gaff.xml') barostat = openmm.MonteCarloBarostat(pressure, temperature) system_generators['explicit'] = SystemGenerator([gaff_xml_filename, 'tip3p.xml'], forcefield_kwargs={'nonbondedCutoff': 9.0 * unit.angstrom, 'implicitSolvent': None, 'constraints': constraints, 'ewaldErrorTolerance': 1e-5, 'hydrogenMass': 3.0*unit.amu}, periodic_forcefield_kwargs = {'nonbondedMethod': app.PME} barostat=barostat) system_generators['vacuum'] = SystemGenerator([gaff_xml_filename], forcefield_kwargs={'implicitSolvent': None, 'constraints': constraints, 'hydrogenMass': 3.0*unit.amu}, nonperiodic_forcefield_kwargs = {'nonbondedMethod': app.NoCutoff}) # # Create topologies and positions # topologies = dict() positions = dict() from openmoltools import forcefield_generators forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml') forcefield.registerTemplateGenerator(forcefield_generators.gaffTemplateGenerator) # Create molecule in vacuum. from perses.utils.openeye import extractPositionsFromOEMol from openmoltools.openeye import smiles_to_oemol, generate_conformers if initial_molecule: smiles = initial_molecule else: smiles = np.random.choice(molecules) molecule = smiles_to_oemol(smiles) molecule = generate_conformers(molecule, max_confs=1) topologies['vacuum'] = forcefield_generators.generateTopologyFromOEMol(molecule) positions['vacuum'] = extractPositionsFromOEMol(molecule) # Create molecule in solvent. modeller = app.Modeller(topologies['vacuum'], positions['vacuum']) modeller.addSolvent(forcefield, model='tip3p', padding=9.0 * unit.angstrom) topologies['explicit'] = modeller.getTopology() positions['explicit'] = modeller.getPositions() # Set up the proposal engines. proposal_metadata = {} proposal_engines = dict() for environment in environments: proposal_engines[environment] = SmallMoleculeSetProposalEngine(self._molecules, system_generators[environment]) # Generate systems systems = dict() for environment in environments: systems[environment] = system_generators[environment].build_system(topologies[environment]) # Define thermodynamic state of interest. thermodynamic_states = dict() thermodynamic_states['explicit'] = states.ThermodynamicState(system=systems['explicit'], temperature=temperature, pressure=pressure) thermodynamic_states['vacuum'] = states.ThermodynamicState(system=systems['vacuum'], temperature=temperature) # Create SAMS samplers from perses.samplers.samplers import ExpandedEnsembleSampler, SAMSSampler mcmc_samplers = dict() exen_samplers = dict() sams_samplers = dict() for environment in environments: storage = NetCDFStorageView(self._storage, envname=environment) if self._geometry_options: n_torsion_divisions = self._geometry_options['n_torsion_divsions'][environment] use_sterics = self._geometry_options['use_sterics'][environment] else: n_torsion_divisions = 180 use_sterics = False geometry_engine = geometry.FFAllAngleGeometryEngine(storage=storage, n_torsion_divisions=n_torsion_divisions, use_sterics=use_sterics) move = mcmc.LangevinSplittingDynamicsMove(timestep=timestep, splitting="V R O R V", n_restart_attempts=10) chemical_state_key = proposal_engines[environment].compute_state_key(topologies[environment]) if environment == 'explicit': sampler_state = states.SamplerState(positions=positions[environment], box_vectors=systems[environment].getDefaultPeriodicBoxVectors()) else: sampler_state = states.SamplerState(positions=positions[environment]) mcmc_samplers[environment] = mcmc.MCMCSampler(thermodynamic_states[environment], sampler_state, move) exen_samplers[environment] = ExpandedEnsembleSampler(mcmc_samplers[environment], topologies[environment], chemical_state_key, proposal_engines[environment], geometry_engine, options={'nsteps': self._ncmc_switching_times[environment]}, storage=storage, ncmc_write_interval=self._ncmc_switching_times[environment]) exen_samplers[environment].verbose = True sams_samplers[environment] = SAMSSampler(exen_samplers[environment], storage=storage) sams_samplers[environment].verbose = True # Create test MultiTargetDesign sampler. from perses.samplers.samplers import MultiTargetDesign target_samplers = {sams_samplers['explicit']: 1.0, sams_samplers['vacuum']: -1.0} designer = MultiTargetDesign(target_samplers, storage=self._storage) # Store things. self.molecules = molecules self.environments = environments self.topologies = topologies self.positions = positions self.system_generators = system_generators self.proposal_engines = proposal_engines self.thermodynamic_states = thermodynamic_states self.mcmc_samplers = mcmc_samplers self.exen_samplers = exen_samplers self.sams_samplers = sams_samplers self.designer = designer
topology = topology.to_openmm() topology.setPeriodicBoxVectors(system.getDefaultPeriodicBoxVectors()) ifs = oechem.oemolistream() ifs.open(ligand_filename) # get the list of molecules mol_list = [oechem.OEMol(mol) for mol in ifs.GetOEMols()] for idx, mol in enumerate(mol_list): mol.SetTitle("MOL{}".format(idx)) oechem.OETriposAtomNames(mol) initial_mol = mol_list[initial_ligand] proposal_mol = mol_list[proposal_ligand] proposal_smiles = SmallMoleculeSetProposalEngine.canonicalize_smiles(oechem.OECreateCanSmiString(proposal_mol)) current_smiles = SmallMoleculeSetProposalEngine.canonicalize_smiles(oechem.OECreateCanSmiString(initial_mol)) barostat = openmm.MonteCarloBarostat(1.0*unit.atmosphere, temperature, 50) system_generator = SystemGenerator(['amber14/protein.ff14SB.xml', 'gaff.xml', 'amber14/tip3p.xml', 'MCL1_ligands.xml'], barostat=barostat, forcefield_kwargs={'nonbondedMethod': app.PME, 'constraints': app.HBonds, 'hydrogenMass': 4 * unit.amus}, use_antechamber=False) atom_mapper_filename = os.path.join(setup_directory, "{}_atom_mapper.json".format(project_prefix)) with open(atom_mapper_filename, 'r') as infile: atom_mapper = SmallMoleculeAtomMapper.from_json(infile.read()) proposal_engine = PremappedSmallMoleculeSetProposalEngine(atom_mapper, system_generator) topology_proposal = proposal_engine.propose(system, topology, current_smiles=current_smiles, proposed_mol=proposal_mol, map_index=map_index)
def generate_solvated_hybrid_test_topology(current_mol_name="naphthalene", proposed_mol_name="benzene"): """ Generate a test solvated topology proposal, current positions, and new positions triplet from two IUPAC molecule names. Parameters ---------- current_mol_name : str, optional name of the first molecule proposed_mol_name : str, optional name of the second molecule Returns ------- topology_proposal : perses.rjmc.topology_proposal The topology proposal representing the transformation current_positions : np.array, unit-bearing The positions of the initial system new_positions : np.array, unit-bearing The positions of the new system """ import simtk.openmm.app as app from openmoltools import forcefield_generators from openeye import oechem from perses.rjmc.topology_proposal import SystemGenerator, SmallMoleculeSetProposalEngine from perses.rjmc import geometry from perses.utils.data import get_data_filename current_mol, unsolv_old_system, pos_old, top_old = createSystemFromIUPAC(current_mol_name) proposed_mol = iupac_to_oemol(proposed_mol_name) proposed_mol.SetTitle("MOL") initial_smiles = oechem.OEMolToSmiles(current_mol) final_smiles = oechem.OEMolToSmiles(proposed_mol) gaff_xml_filename = get_data_filename("data/gaff.xml") forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml') forcefield.registerTemplateGenerator(forcefield_generators.gaffTemplateGenerator) modeller = app.Modeller(top_old, pos_old) modeller.addSolvent(forcefield, model='tip3p', padding=9.0*unit.angstrom) solvated_topology = modeller.getTopology() solvated_positions = modeller.getPositions() solvated_system = forcefield.createSystem(solvated_topology, nonbondedMethod=app.PME, removeCMMotion=False) barostat = openmm.MonteCarloBarostat(1.0*unit.atmosphere, temperature, 50) solvated_system.addForce(barostat) gaff_filename = get_data_filename('data/gaff.xml') system_generator = SystemGenerator([gaff_filename, 'amber99sbildn.xml', 'tip3p.xml'], barostat=barostat, forcefield_kwargs={'removeCMMotion': False},periodic_forcefield_kwargs={'nonbondedMethod': app.PME}) geometry_engine = geometry.FFAllAngleGeometryEngine() canonicalized_smiles_list = [SmallMoleculeSetProposalEngine.canonicalize_smiles(smiles) for smiles in [initial_smiles, final_smiles]] proposal_engine = SmallMoleculeSetProposalEngine( canonicalized_smiles_list, system_generator, residue_name="MOL") #generate topology proposal topology_proposal = proposal_engine.propose(solvated_system, solvated_topology) #generate new positions with geometry engine new_positions, _ = geometry_engine.propose(topology_proposal, solvated_positions, beta) return topology_proposal, solvated_positions, new_positions
def __init__(self, molecules: List[str], output_filename: str, ncmc_switching_times: Dict[str, int], equilibrium_steps: Dict[str, int], timestep: unit.Quantity, initial_molecule: str=None, geometry_options: Dict=None): self._molecules = [SmallMoleculeSetProposalEngine.canonicalize_smiles(molecule) for molecule in molecules] environments = ['explicit', 'vacuum'] temperature = 298.15 * unit.kelvin pressure = 1.0 * unit.atmospheres constraints = app.HBonds self._storage = NetCDFStorage(output_filename) self._ncmc_switching_times = ncmc_switching_times self._n_equilibrium_steps = equilibrium_steps self._geometry_options = geometry_options # Create a system generator for our desired forcefields. from perses.rjmc.topology_proposal import SystemGenerator system_generators = dict() from pkg_resources import resource_filename gaff_xml_filename = resource_filename('perses', 'data/gaff.xml') barostat = openmm.MonteCarloBarostat(pressure, temperature) system_generators['explicit'] = SystemGenerator([gaff_xml_filename, 'tip3p.xml'], forcefield_kwargs={'nonbondedMethod': app.PME, 'nonbondedCutoff': 9.0 * unit.angstrom, 'implicitSolvent': None, 'constraints': constraints, 'ewaldErrorTolerance': 1e-5, 'hydrogenMass': 3.0*unit.amu}, barostat=barostat) system_generators['vacuum'] = SystemGenerator([gaff_xml_filename], forcefield_kwargs={'nonbondedMethod': app.NoCutoff, 'implicitSolvent': None, 'constraints': constraints, 'hydrogenMass': 3.0*unit.amu}) # # Create topologies and positions # topologies = dict() positions = dict() from openmoltools import forcefield_generators forcefield = app.ForceField(gaff_xml_filename, 'tip3p.xml') forcefield.registerTemplateGenerator(forcefield_generators.gaffTemplateGenerator) # Create molecule in vacuum. from perses.tests.utils import createOEMolFromSMILES, extractPositionsFromOEMOL if initial_molecule: smiles = initial_molecule else: smiles = np.random.choice(molecules) molecule = createOEMolFromSMILES(smiles) topologies['vacuum'] = forcefield_generators.generateTopologyFromOEMol(molecule) positions['vacuum'] = extractPositionsFromOEMOL(molecule) # Create molecule in solvent. modeller = app.Modeller(topologies['vacuum'], positions['vacuum']) modeller.addSolvent(forcefield, model='tip3p', padding=9.0 * unit.angstrom) topologies['explicit'] = modeller.getTopology() positions['explicit'] = modeller.getPositions() # Set up the proposal engines. proposal_metadata = {} proposal_engines = dict() for environment in environments: proposal_engines[environment] = SmallMoleculeSetProposalEngine(self._molecules, system_generators[environment]) # Generate systems systems = dict() for environment in environments: systems[environment] = system_generators[environment].build_system(topologies[environment]) # Define thermodynamic state of interest. thermodynamic_states = dict() thermodynamic_states['explicit'] = states.ThermodynamicState(system=systems['explicit'], temperature=temperature, pressure=pressure) thermodynamic_states['vacuum'] = states.ThermodynamicState(system=systems['vacuum'], temperature=temperature) # Create SAMS samplers from perses.samplers.samplers import ExpandedEnsembleSampler, SAMSSampler mcmc_samplers = dict() exen_samplers = dict() sams_samplers = dict() for environment in environments: storage = NetCDFStorageView(self._storage, envname=environment) if self._geometry_options: n_torsion_divisions = self._geometry_options['n_torsion_divsions'][environment] use_sterics = self._geometry_options['use_sterics'][environment] else: n_torsion_divisions = 180 use_sterics = False geometry_engine = geometry.FFAllAngleGeometryEngine(storage=storage, n_torsion_divisions=n_torsion_divisions, use_sterics=use_sterics) move = mcmc.LangevinSplittingDynamicsMove(timestep=timestep, splitting="V R O R V", n_restart_attempts=10) chemical_state_key = proposal_engines[environment].compute_state_key(topologies[environment]) if environment == 'explicit': sampler_state = states.SamplerState(positions=positions[environment], box_vectors=systems[environment].getDefaultPeriodicBoxVectors()) else: sampler_state = states.SamplerState(positions=positions[environment]) mcmc_samplers[environment] = mcmc.MCMCSampler(thermodynamic_states[environment], sampler_state, move) exen_samplers[environment] = ExpandedEnsembleSampler(mcmc_samplers[environment], topologies[environment], chemical_state_key, proposal_engines[environment], geometry_engine, options={'nsteps': self._ncmc_switching_times[environment]}, storage=storage, ncmc_write_interval=self._ncmc_switching_times[environment]) exen_samplers[environment].verbose = True sams_samplers[environment] = SAMSSampler(exen_samplers[environment], storage=storage) sams_samplers[environment].verbose = True # Create test MultiTargetDesign sampler. from perses.samplers.samplers import MultiTargetDesign target_samplers = {sams_samplers['explicit']: 1.0, sams_samplers['vacuum']: -1.0} designer = MultiTargetDesign(target_samplers, storage=self._storage) # Store things. self.molecules = molecules self.environments = environments self.topologies = topologies self.positions = positions self.system_generators = system_generators self.proposal_engines = proposal_engines self.thermodynamic_states = thermodynamic_states self.mcmc_samplers = mcmc_samplers self.exen_samplers = exen_samplers self.sams_samplers = sams_samplers self.designer = designer