def test_create_with_template_generator(self): """Test SystemGenerator creation with small molecule residue template generators""" SMALL_MOLECULE_FORCEFIELDS = SystemGenerator.SMALL_MOLECULE_FORCEFIELDS if not CI else [ 'gaff-2.11', 'openff-1.1.0' ] for small_molecule_forcefield in SMALL_MOLECULE_FORCEFIELDS: # Create a generator that defines AMBER and small molecule force fields generator = SystemGenerator( forcefields=self.amber_forcefields, small_molecule_forcefield=small_molecule_forcefield) # Create a generator that also has a database cache with tempfile.TemporaryDirectory() as tmpdirname: cache = os.path.join(tmpdirname, 'db.json') # Create a new database file generator = SystemGenerator( forcefields=self.amber_forcefields, cache=cache, small_molecule_forcefield=small_molecule_forcefield) del generator # Reopen it (with cache still empty) generator = SystemGenerator( forcefields=self.amber_forcefields, cache=cache, small_molecule_forcefield=small_molecule_forcefield) del generator
def test_cache(self): """Test that SystemGenerator correctly manages a cache""" from openmmforcefields.generators import SystemGenerator from openmmforcefields.utils import Timer timing = dict( ) # timing[(small_molecule_forcefield, smiles)] is the time (in seconds) to parameterize molecule the first time with tempfile.TemporaryDirectory() as tmpdirname: # Create a single shared cache for all force fields cache = os.path.join(tmpdirname, 'db.json') # Test that we can parameterize all molecules for all test systems SMALL_MOLECULE_FORCEFIELDS = SystemGenerator.SMALL_MOLECULE_FORCEFIELDS if not CI else [ 'gaff-2.11', 'openff-1.1.0' ] for small_molecule_forcefield in SMALL_MOLECULE_FORCEFIELDS: # Create a SystemGenerator generator = SystemGenerator( forcefields=self.amber_forcefields, small_molecule_forcefield=small_molecule_forcefield, cache=cache) # Add molecules for each test system separately for name, testsystem in self.testsystems.items(): molecules = testsystem['molecules'] # Add molecules generator.add_molecules(molecules) # Parameterize molecules for molecule in molecules: openmm_topology = molecule.to_topology().to_openmm() with Timer() as timer: system = generator.create_system(openmm_topology) assert system.getNumParticles() == molecule.n_atoms # Record time timing[(small_molecule_forcefield, molecule.to_smiles())] = timer.interval() # Molecules should now be cached; test timing is faster the second time # Test that we can parameterize all molecules for all test systems SMALL_MOLECULE_FORCEFIELDS = SystemGenerator.SMALL_MOLECULE_FORCEFIELDS if not CI else [ 'gaff-2.11', 'openff-1.1.0' ] for small_molecule_forcefield in SMALL_MOLECULE_FORCEFIELDS: # Create a SystemGenerator generator = SystemGenerator( forcefields=self.amber_forcefields, small_molecule_forcefield=small_molecule_forcefield, cache=cache) # Add molecules for each test system separately for name, testsystem in self.testsystems.items(): molecules = testsystem['molecules'] # We don't need to add molecules that are already defined in the cache # Parameterize molecules for molecule in molecules: openmm_topology = molecule.to_topology().to_openmm() with Timer() as timer: system = generator.create_system(openmm_topology) assert system.getNumParticles() == molecule.n_atoms
def generate_atp(phase = 'vacuum'): """ modify the AlanineDipeptideVacuum test system to be parametrized with amber14ffsb in vac or solvent (tip3p) """ import openmmtools.testsystems as ts from openmmforcefields.generators import SystemGenerator atp = ts.AlanineDipeptideVacuum(constraints = app.HBonds, hydrogenMass = 4 * unit.amus) forcefield_files = ['gaff.xml', 'amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'] if phase == 'vacuum': barostat = None system_generator = SystemGenerator(forcefield_files, barostat = barostat, forcefield_kwargs = {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}, nonperiodic_forcefield_kwargs = {'nonbondedMethod': app.NoCutoff}, small_molecule_forcefield = 'gaff-2.11', molecules = None, cache = None) atp.system = system_generator.create_system(atp.topology) #update the parametrization scheme to amberff14sb elif phase == 'solvent': barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50) system_generator = SystemGenerator(forcefield_files, barostat = barostat, forcefield_kwargs = {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'nonbondedMethod': app.PME, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}, small_molecule_forcefield = 'gaff-2.11', molecules = None, cache = None) if phase == 'solvent': modeller = app.Modeller(atp.topology, atp.positions) modeller.addSolvent(system_generator._forcefield, model='tip3p', padding=9*unit.angstroms, ionicStrength=0.15*unit.molar) solvated_topology = modeller.getTopology() solvated_positions = modeller.getPositions() # canonicalize the solvated positions: turn tuples into np.array atp.positions = unit.quantity.Quantity(value = np.array([list(atom_pos) for atom_pos in solvated_positions.value_in_unit_system(unit.md_unit_system)]), unit = unit.nanometers) atp.topology = solvated_topology atp.system = system_generator.create_system(atp.topology) return atp, system_generator
def test_small_molecule_proposals(): """ Make sure the small molecule proposal engine generates molecules """ list_of_smiles = ['CCCC','CCCCC','CCCCCC'] list_of_mols = [] for smi in list_of_smiles: mol = smiles_to_oemol(smi) list_of_mols.append(mol) molecules = [Molecule.from_openeye(mol) for mol in list_of_mols] stats_dict = defaultdict(lambda: 0) system_generator = SystemGenerator(forcefields = forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs, small_molecule_forcefield = small_molecule_forcefield, molecules=molecules, cache=None) proposal_engine = topology_proposal.SmallMoleculeSetProposalEngine(list_of_mols, system_generator) initial_system, initial_positions, initial_topology, = OEMol_to_omm_ff(list_of_mols[0], system_generator) proposal = proposal_engine.propose(initial_system, initial_topology) for i in range(50): #positions are ignored here, and we don't want to run the geometry engine new_proposal = proposal_engine.propose(proposal.old_system, proposal.old_topology) stats_dict[new_proposal.new_chemical_state_key] += 1 #check that the molecule it generated is actually the smiles we expect matching_molecules = [res for res in proposal.new_topology.residues() if res.name=='MOL'] if len(matching_molecules) != 1: raise ValueError("More than one residue with the same name!") mol_res = matching_molecules[0] oemol = generateOEMolFromTopologyResidue(mol_res) smiles = SmallMoleculeSetProposalEngine.canonicalize_smiles(oechem.OEMolToSmiles(oemol)) assert smiles == proposal.new_chemical_state_key proposal = new_proposal
def test_mapping_strength_levels(pairs_of_smiles=[('Cc1ccccc1','c1ccc(cc1)N'),('CC(c1ccccc1)','O=C(c1ccccc1)'),('Oc1ccccc1','Sc1ccccc1')],test=True): correct_results = {0:{'default': (3,2), 'weak':(3,2), 'strong':(4,3)}, 1:{'default': (7,3), 'weak':(6,2), 'strong':(7,3)}, 2:{'default': (1,1), 'weak':(1,1), 'strong':(2,2)}} mapping = ['weak','default','strong'] for example in mapping: for index, (lig_a, lig_b) in enumerate(pairs_of_smiles): print(f"conducting {example} mapping with ligands {lig_a}, {lig_b}") initial_molecule = smiles_to_oemol(lig_a) proposed_molecule = smiles_to_oemol(lig_b) molecules = [Molecule.from_openeye(mol) for mol in [initial_molecule, proposed_molecule]] system_generator = SystemGenerator(forcefields = forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs,nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs, small_molecule_forcefield = 'gaff-1.81', molecules=molecules, cache=None) proposal_engine = SmallMoleculeSetProposalEngine([initial_molecule, proposed_molecule], system_generator) initial_system, initial_positions, initial_topology = OEMol_to_omm_ff(initial_molecule, system_generator) print(f"running now with map strength {example}") proposal = proposal_engine.propose(initial_system, initial_topology, map_strength = example) print(lig_a, lig_b,'length OLD and NEW atoms',len(proposal.unique_old_atoms), len(proposal.unique_new_atoms)) if test: render_atom_mapping(f'{index}-{example}.png', initial_molecule, proposed_molecule, proposal._new_to_old_atom_map) assert ( (len(proposal.unique_old_atoms), len(proposal.unique_new_atoms)) == correct_results[index][example]), f"the mapping failed, correct results are {correct_results[index][example]}" print(f"the mapping worked!!!") print()
def system_generator_wrapper(oemols, barostat = None, forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'], forcefield_kwargs = {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}, nonperiodic_forcefield_kwargs = {'nonbondedMethod': app.NoCutoff}, small_molecule_forcefield = 'gaff-2.11', **kwargs ): """ make a system generator (vacuum) for a small molecule Parameters ---------- oemols : list of openeye.oechem.OEMol oemols barostat : openmm.MonteCarloBarostat, default None barostat forcefield_files : list of str pointers to protein forcefields and solvent forcefield_kwargs : dict dict of forcefield_kwargs nonperiodic_forcefield_kwargs : dict dict of args for non-periodic system small_molecule_forcefield : str pointer to small molecule forcefield to use Returns ------- system_generator : openmmforcefields.generators.SystemGenerator """ from openff.toolkit.topology import Molecule from openmmforcefields.generators import SystemGenerator system_generator = SystemGenerator(forcefields = forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs,nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs, small_molecule_forcefield = small_molecule_forcefield, molecules=[Molecule.from_openeye(oemol) for oemol in oemols], cache=None) return system_generator
def test_complex(self): """Test parameterizing a protein:ligand complex in vacuum""" from openmmforcefields.generators import SystemGenerator for name, testsystem in self.testsystems.items(): print(f'Testing parameterization of {name} in vacuum') molecules = testsystem['molecules'] # Select a complex from the set ligand_index = 0 complex_structure = testsystem['complex_structures'][ligand_index] molecule = molecules[ligand_index] openmm_topology = complex_structure.topology cache = os.path.join(get_data_filename(os.path.join('perses_jacs_systems', name)), 'cache.json') # Create a system in vacuum generator = SystemGenerator(forcefields=self.amber_forcefields, molecules=molecules, cache=cache) system = generator.create_system(openmm_topology) assert system.getNumParticles() == len(complex_structure.atoms) # Create solvated structure from simtk.openmm import app from simtk import unit modeller = app.Modeller(complex_structure.topology, complex_structure.positions) modeller.addSolvent(generator.forcefield, padding=0*unit.angstroms, ionicStrength=300*unit.millimolar) # Create a system with solvent and ions system = generator.create_system(modeller.topology) assert system.getNumParticles() == len(list(modeller.topology.atoms())) with open('test.pdb', 'w') as outfile: app.PDBFile.writeFile(modeller.topology, modeller.positions, outfile)
def __setup_system_ex_mm(self): with self.logger("__setup_system_ex_mm") as logger: if "openmm_system_generator" not in self.__dict__: amber_forcefields = ['amber/protein.ff14SB.xml', 'amber/phosaa10', 'amber/tip3p_standard.xml'] small_molecule_forcefield = 'openff-1.1.0' # small_molecule_forcefield = 'gaff-2.11' self.openmm_system_generator = SystemGenerator(forcefields=amber_forcefields, forcefield_kwargs=self.params, molecules=[self.mol], small_molecule_forcefield=small_molecule_forcefield, ) else: self.openmm_system_generator.add_molecules([self.mol]) self.modeller = app.Modeller(self.topology, self.positions) self.modeller.addSolvent(self.openmm_system_generator.forcefield, model='tip3p', ionicStrength=100 * unit.millimolar, padding=1.0 * unit.nanometers) self.boxvec = self.modeller.getTopology().getPeriodicBoxVectors() self.topology, self.positions = self.modeller.getTopology(), self.modeller.getPositions() self.system = self.openmm_system_generator.create_system(self.topology) self.system.setDefaultPeriodicBoxVectors(*self.modeller.getTopology().getPeriodicBoxVectors()) with open("{}".format(self.config.pdb_file_name), 'w') as f: app.PDBFile.writeFile(self.topology, self.positions, file=f, keepIds=True) logger.log("wrote ", "{}".format(self.config.pdb_file_name)) with open("{}".format(self.config.pdb_file_name), 'r') as f: self.pdb = app.PDBFile(f) return self.system, self.topology, self.positions
def generate_parameters(molecule, basepath='json-files', small_molecule_forcefield='openff-1.1.0'): """ Generate JSON parameter cache for a molecule in f'{basepath}/{molecule.name}.json' Parameters ---------- molecule : openforcefield.topology.Molecule The molecule to parameterize """ # Create generator import os cache_filename = f'parallel/{molecule.name}.json' if os.path.exists: return # Generate and cache parameters from openmmforcefields.generators import SystemGenerator system_generator = SystemGenerator( small_molecule_forcefield=small_molecule_forcefield, molecules=[molecule], cache=cache_filename) try: system_generator.create_system(molecule.to_topology().to_openmm()) except Exception as e: print(f'FAILED: {molecule.smiles}') print(e) del system_generator
def test_parameterize_molecules_from_creation(self): """Test that SystemGenerator can parameterize pre-specified molecules in vacuum""" from openmmforcefields.generators import SystemGenerator for name, testsystem in self.testsystems.items(): print(testsystem) molecules = testsystem['molecules'] for small_molecule_forcefield in SystemGenerator.SMALL_MOLECULE_FORCEFIELDS: # Create a SystemGenerator for this force field generator = SystemGenerator(forcefields=self.amber_forcefields, small_molecule_forcefield=small_molecule_forcefield, molecules=molecules) # Parameterize molecules from openmmforcefields.utils import Timer for molecule in molecules: openmm_topology = molecule.to_topology().to_openmm() with Timer() as t1: system = generator.create_system(openmm_topology) assert system.getNumParticles() == molecule.n_atoms # Molecule should now be cached with Timer() as t2: system = generator.create_system(openmm_topology) assert system.getNumParticles() == molecule.n_atoms assert (t2.interval() < t1.interval())
def test_add_molecules(self): """Test that Molecules can be added to SystemGenerator later""" from openmmforcefields.generators import SystemGenerator for small_molecule_forcefield in SystemGenerator.SMALL_MOLECULE_FORCEFIELDS: # Create a SystemGenerator for this force field generator = SystemGenerator(forcefields=self.amber_forcefields, small_molecule_forcefield=small_molecule_forcefield) # Add molecules for each test system separately for name, testsystem in self.testsystems.items(): molecules = testsystem['molecules'] # Add molecules generator.add_molecules(molecules) # Parameterize molecules from openmmforcefields.utils import Timer for molecule in molecules: openmm_topology = molecule.to_topology().to_openmm() with Timer() as t1: system = generator.create_system(openmm_topology) assert system.getNumParticles() == molecule.n_atoms # Molecule should now be cached with Timer() as t2: system = generator.create_system(openmm_topology) assert system.getNumParticles() == molecule.n_atoms assert (t2.interval() < t1.interval())
def omm_system(input_sdf, input_system, forcefield, input_path, ff_files=[], template_ff='gaff-2.11'): from openmmforcefields.generators import SystemGenerator, GAFFTemplateGenerator from openff.toolkit.topology import Molecule # maybe possible to add same parameters that u give forcefield.createSystem() function forcefield_kwargs ={'constraints' : app.HBonds, 'rigidWater' : True, 'removeCMMotion' : False, 'hydrogenMass' : 4*amu } system_generator = SystemGenerator(forcefields=ff_files, small_molecule_forcefield=template_ff, forcefield_kwargs=forcefield_kwargs, cache='db.json') input_sdfs=[] for idx, sdf in enumerate(input_sdf, 1): path_sdf=f'{input_path}/{sdf}' if not os.path.exists(path_sdf): print(f'\tFile {path_sdf} not found!') else: print(f'\tAdding extra SDF file {idx} to pre-system: {path_sdf}') input_sdfs.append(path_sdf) molecules = Molecule.from_file(*input_sdfs, file_format='sdf') print(molecules) system = system_generator.create_system(topology=input_system.topology)#, molecules=molecules) gaff = GAFFTemplateGenerator(molecules=molecules, forcefield=template_ff) gaff.add_molecules(molecules) print(gaff) forcefield.registerTemplateGenerator(gaff.generator) #forcefield.registerResidueTemplate(template) print(system) print(forcefield) return system, forcefield
def create_simple_protein_system_generator(): from openmmforcefields.generators import SystemGenerator barostat = None forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'] forcefield_kwargs = {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus} nonperiodic_forcefield_kwargs={'nonbondedMethod': app.NoCutoff} system_generator = SystemGenerator(forcefields = forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs, small_molecule_forcefield = 'gaff-2.11', molecules=None, cache=None) return system_generator
def test_barostat(self): """Test that barostat addition works correctly""" # Create a protein SystemGenerator generator = SystemGenerator(forcefields=self.amber_forcefields) # Create a template barostat from simtk.openmm import MonteCarloBarostat from simtk import unit pressure = 0.95 * unit.atmospheres temperature = 301.0 * unit.kelvin frequency = 23 generator.barostat = MonteCarloBarostat(pressure, temperature, frequency) # Load a PDB file import os from simtk.openmm.app import PDBFile pdb_filename = get_data_filename( os.path.join('perses_jacs_systems', 'bace', 'Bace_protein_fixed.pdb')) pdbfile = PDBFile(pdb_filename) # Delete hydrogens from terminal protein residues # TODO: Fix the input files so we don't need to do this from simtk.openmm import app modeller = app.Modeller(pdbfile.topology, pdbfile.positions) residues = [ residue for residue in modeller.topology.residues() if residue.name != 'UNL' ] termini_ids = [residues[0].id, residues[-1].id] #hs = [atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.name != 'UNL'] hs = [ atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.id in termini_ids ] modeller.delete(hs) from simtk.openmm.app import PDBFile modeller.addHydrogens() # Create a System system = generator.create_system(modeller.topology) # Check barostat is present forces = { force.__class__.__name__: force for force in system.getForces() } assert 'MonteCarloBarostat' in forces.keys() # Check barostat parameters force = forces['MonteCarloBarostat'] assert force.getDefaultPressure() == pressure assert force.getDefaultTemperature() == temperature assert force.getFrequency() == frequency
def test_forcefield_kwargs(self): """Test that forcefield_kwargs and nonbonded method specifications work correctly""" from simtk import unit forcefield_kwargs = {'hydrogenMass': 4 * unit.amu} from openmmforcefields.generators import SystemGenerator for name, testsystem in self.testsystems.items(): print(testsystem) molecules = testsystem['molecules'] for small_molecule_forcefield in SystemGenerator.SMALL_MOLECULE_FORCEFIELDS: # Create a SystemGenerator for this force field from simtk import openmm from simtk.openmm import app generator = SystemGenerator( forcefields=self.amber_forcefields, small_molecule_forcefield=small_molecule_forcefield, forcefield_kwargs=forcefield_kwargs, periodic_forcefield_kwargs={'nonbondedMethod': app.LJPME}, nonperiodic_forcefield_kwargs={ 'nonbondedMethod': app.CutoffNonPeriodic }, molecules=molecules) # Parameterize molecules for molecule in molecules: # Create non-periodic Topology nonperiodic_openmm_topology = molecule.to_topology( ).to_openmm() system = generator.create_system( nonperiodic_openmm_topology) forces = { force.__class__.__name__: force for force in system.getForces() } assert forces['NonbondedForce'].getNonbondedMethod( ) == openmm.NonbondedForce.CutoffNonPeriodic, "Expected CutoffNonPeriodic, got {forces['NonbondedForce'].getNonbondedMethod()}" # Create periodic Topology import numpy as np import copy box_vectors = unit.Quantity(np.diag([30, 30, 30]), unit.angstrom) periodic_openmm_topology = copy.deepcopy( nonperiodic_openmm_topology) periodic_openmm_topology.setPeriodicBoxVectors(box_vectors) system = generator.create_system(periodic_openmm_topology) forces = { force.__class__.__name__: force for force in system.getForces() } assert forces['NonbondedForce'].getNonbondedMethod( ) == openmm.NonbondedForce.LJPME, "Expected LJPME, got {forces['NonbondedForce'].getNonbondedMethod()}"
def test_OEMol_to_omm_ff(molecule=smiles_to_oemol('CC')): """ Generating openmm objects for simulation from an OEMol object Parameters ---------- molecule : openeye.oechem.OEMol Returns ------- system : openmm.System openmm system object positions : unit.quantity positions of the system topology : app.topology.Topology openmm compatible topology object """ import simtk.openmm.app as app import simtk.unit as unit from perses.utils.openeye import OEMol_to_omm_ff from simtk import openmm from openmmforcefields.generators import SystemGenerator from openff.toolkit.topology import Molecule #default arguments for SystemGenerators barostat = None forcefield_files = ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'] forcefield_kwargs = { 'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'nonbondedMethod': app.NoCutoff, 'constraints': app.HBonds, 'hydrogenMass': 4 * unit.amus } small_molecule_forcefield = 'gaff-2.11' system_generator = SystemGenerator( forcefields=forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, small_molecule_forcefield=small_molecule_forcefield, molecules=[Molecule.from_openeye(molecule)], cache=None) system, positions, topology = OEMol_to_omm_ff(molecule, system_generator) assert (type(system) == type(openmm.System()) ), "An openmm.System has not been generated from OEMol_to_omm_ff()" return system, positions, topology
def baseline_energy(self, g, suffix=None): if suffix is None: suffix = "_" + self.forcefield from openmmforcefields.generators import SystemGenerator # define a system generator system_generator = SystemGenerator( small_molecule_forcefield=self.forcefield, ) mol = g.mol # mol.assign_partial_charges("formal_charge") # create system system = system_generator.create_system( topology=mol.to_topology().to_openmm(), molecules=mol, ) # parameterize topology topology = g.mol.to_topology().to_openmm() integrator = openmm.LangevinIntegrator(TEMPERATURE, COLLISION_RATE, STEP_SIZE) # create simulation simulation = Simulation(topology=topology, system=system, integrator=integrator) us = [] xs = (Quantity( g.nodes["n1"].data["xyz"].detach().numpy(), esp.units.DISTANCE_UNIT, ).value_in_unit(unit.nanometer).transpose((1, 0, 2))) for x in xs: simulation.context.setPositions(x) us.append( simulation.context.getState( getEnergy=True).getPotentialEnergy().value_in_unit( esp.units.ENERGY_UNIT)) g.nodes["g"].data["u%s" % suffix] = torch.tensor(us)[None, :] return g
def test_parameterize_molecules_specified_during_create_system(self): """Test that SystemGenerator can parameterize molecules specified during create_system""" from openmmforcefields.generators import SystemGenerator for name, testsystem in self.testsystems.items(): molecules = testsystem['molecules'] for small_molecule_forcefield in SystemGenerator.SMALL_MOLECULE_FORCEFIELDS: # Create a SystemGenerator for this force field generator = SystemGenerator(forcefields=self.amber_forcefields, small_molecule_forcefield=small_molecule_forcefield) # Parameterize molecules from openmmforcefields.utils import Timer for molecule in molecules: openmm_topology = molecule.to_topology().to_openmm() # Specify molecules during system creation system = generator.create_system(openmm_topology, molecules=molecules)
def simulation_from_graph(self, g): """ Create simulation from moleucle """ # assign partial charge if self.charge_method is not None: g.mol.assign_partial_charges(self.charge_method) # parameterize topology topology = g.mol.to_topology().to_openmm() generator = SystemGenerator( small_molecule_forcefield=self.forcefield, molecules=[g.mol], ) # create openmm system system = generator.create_system(topology, ) # set epsilon minimum to 0.05 kJ/mol for force in system.getForces(): if "Nonbonded" in force.__class__.__name__: force.setNonbondedMethod(openmm.NonbondedForce.NoCutoff) for particle_index in range(force.getNumParticles()): charge, sigma, epsilon = force.getParticleParameters( particle_index) if epsilon < EPSILON_MIN: force.setParticleParameters(particle_index, charge, sigma, EPSILON_MIN) # use langevin integrator integrator = openmm.LangevinIntegrator(self.temperature, self.collision_rate, self.step_size) # initialize simulation simulation = Simulation( topology=topology, system=system, integrator=integrator, platform=openmm.Platform.getPlatformByName("Reference"), ) return simulation
def hmr_driver(mol, ff_name): """Given an OpenFF Molecule, run a short 4 fs HMR simulation. This function is adapted from https://github.com/openforcefield/openforcefields/issues/19#issuecomment-689816995""" print( f"Running HMR with force field {ff_name} and molecule with SMILES {mol.to_smiles()}" ) forcefield_kwargs = { "constraints": app.HBonds, "rigidWater": True, "removeCMMotion": False, "hydrogenMass": 4 * unit.amu, # Does this also _subtract_ mass from heavy atoms?:w } system_generator = SystemGenerator( small_molecule_forcefield=ff_name, forcefield_kwargs=forcefield_kwargs, molecules=mol, ) system = system_generator.create_system(mol.to_topology().to_openmm()) temperature = 300 * unit.kelvin collision_rate = 1.0 / unit.picoseconds timestep = 4.0 * unit.femtoseconds integrator = openmm.LangevinIntegrator(temperature, collision_rate, timestep) context = openmm.Context(system, integrator) mol.generate_conformers(n_conformers=1) context.setPositions(mol.conformers[0]) # Run for 10 ps integrator.step(2500) state = context.getState(getEnergy=True) pot = state.getPotentialEnergy() # OpenMM will silenty "fail" if energies aren't explicitly checked if np.isnan(pot / pot.unit): raise NANEnergyError()
receptor_structure = parmed.load_file(receptor_file) ligand_structure = parmed.load_file( f'{output_prefix}/LIG{ligand_ndx}_h.pdb') complex_structure = receptor_structure + ligand_structure barostat = openmm.MonteCarloBarostat(pressure, temperature) forcefield_kwargs = { 'removeCMMotion': False, 'ewaldErrorTolerance': 5e-04, 'nonbondedMethod': app.PME, 'constraints': None, 'rigidWater': False } system_generator = SystemGenerator( forcefields=[protein_forcefield, solvation_forcefield], barostat=barostat, forcefield_kwargs=forcefield_kwargs, molecules=[ligand], small_molecule_forcefield=small_molecule_forcefield) modeller = app.Modeller(complex_structure.topology, complex_structure.positions) modeller.addSolvent(system_generator.forcefield, model='tip3p', padding=solvent_padding, ionicStrength=ionic_strength) system = system_generator.create_system(modeller.topology) solvated_structure = parmed.openmm.load_topology(modeller.topology, system, xyz=modeller.positions)
def run(): # Create initial model system, topology, and positions. smiles_list = ["CC", "CCC", "CCCC"] initial_molecule = smiles_to_oemol("CC") molecules = [Molecule.from_openeye(initial_molecule)] system_generator = SystemGenerator(molecules=molecules) initial_sys, initial_pos, initial_top = OEMol_to_omm_ff( initial_molecule, system_generator) smiles = "CC" stats = {ms: 0 for ms in smiles_list} # Run parameters temperature = 300.0 * unit.kelvin # temperature pressure = 1.0 * unit.atmospheres # pressure collision_rate = 5.0 / unit.picoseconds # collision rate for Langevin dynamics # Create proposal metadata, such as the list of molecules to sample (SMILES here) # proposal_metadata = {"smiles_list": smiles_list} list_of_oemols = [] for smile in smiles_list: oemol = smiles_to_oemol(smile) list_of_oemols.append(oemol) transformation = topology_proposal.SmallMoleculeSetProposalEngine( list_of_oemols=list_of_oemols, system_generator=system_generator) # transformation = topology_proposal.SingleSmallMolecule(proposal_metadata) # Initialize weight calculation engine, along with its metadata bias_calculator = bias_engine.MinimizedPotentialBias(smiles_list) # Initialize NCMC engines. switching_timestep = (1.0 * unit.femtosecond ) # Timestep for NCMC velocity Verlet integrations switching_nsteps = 10 # Number of steps to use in NCMC integration switching_functions = { # Functional schedules to use in terms of `lambda`, which is switched from 0->1 for creation and 1->0 for deletion "lambda_sterics": "lambda", "lambda_electrostatics": "lambda", "lambda_bonds": "lambda", "lambda_angles": "sqrt(lambda)", "lambda_torsions": "lambda", } ncmc_engine = ncmc_switching.NCMCEngine( temperature=temperature, timestep=switching_timestep, nsteps=switching_nsteps, functions=switching_functions, ) # Initialize GeometryEngine geometry_metadata = {"data": 0} # currently ignored geometry_engine = geometry.FFAllAngleGeometryEngine(geometry_metadata) # Run a number of iterations. niterations = 50 system = initial_sys topology = initial_top positions = initial_pos current_log_weight = bias_calculator.g_k(smiles) n_accepted = 0 propagate = True for i in range(niterations): # Store old (system, topology, positions). # Propose a transformation from one chemical species to another. state_metadata = {"molecule_smiles": smiles} top_proposal = transformation.propose( system, topology, positions, state_metadata) # Get a new molecule # QUESTION: What about instead initializing StateWeight once, and then using # log_state_weight = state_weight.computeLogStateWeight(new_topology, new_system, new_metadata)? log_weight = bias_calculator.g_k( top_proposal.metadata["molecule_smiles"]) # Perform alchemical transformation. # Alchemically eliminate atoms being removed. [ncmc_old_positions, ncmc_elimination_logp] = ncmc_engine.integrate(top_proposal, positions, direction="delete") # Generate coordinates for new atoms and compute probability ratio of old and new probabilities. # QUESTION: Again, maybe we want to have the geometry engine initialized once only? geometry_proposal = geometry_engine.propose( top_proposal.new_to_old_atom_map, top_proposal.new_system, system, ncmc_old_positions, ) # Alchemically introduce new atoms. [ncmc_new_positions, ncmc_introduction_logp ] = ncmc_engine.integrate(top_proposal, geometry_proposal.new_positions, direction="insert") # Compute total log acceptance probability, including all components. logp_accept = (top_proposal.logp_proposal + geometry_proposal.logp + ncmc_elimination_logp + ncmc_introduction_logp + log_weight / log_weight.unit - current_log_weight / current_log_weight.unit) # Accept or reject. if ((logp_accept >= 0.0) or (np.random.uniform() < np.exp(logp_accept))) and not np.any( np.isnan(ncmc_new_positions)): # Accept. n_accepted += 1 (system, topology, positions, current_log_weight, smiles) = ( top_proposal.new_system, top_proposal.new_topology, ncmc_new_positions, log_weight, top_proposal.metadata["molecule_smiles"], ) else: # Reject. logging.debug("reject") stats[smiles] += 1 print(positions) if propagate: p_system = copy.deepcopy(system) integrator = openmm.LangevinIntegrator(temperature, collision_rate, switching_timestep) context = openmm.Context(p_system, integrator) context.setPositions(positions) print(context.getState(getEnergy=True).getPotentialEnergy()) integrator.step(1000) state = context.getState(getPositions=True) positions = state.getPositions(asNumpy=True) del context, integrator, p_system print("The total number accepted was %d out of %d iterations" % (n_accepted, niterations)) print(stats)
def __init__(self, protein_filename, mutation_chain_id, mutation_residue_id, proposed_residue, phase='complex', conduct_endstate_validation=True, ligand_input=None, ligand_index=0, water_model='tip3p', ionic_strength=0.15 * unit.molar, forcefield_files=['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'], barostat=openmm.MonteCarloBarostat(1.0 * unit.atmosphere, temperature, 50), forcefield_kwargs={'removeCMMotion': False, 'ewaldErrorTolerance': 0.00025, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus}, periodic_forcefield_kwargs={'nonbondedMethod': app.PME}, nonperiodic_forcefield_kwargs=None, small_molecule_forcefields='gaff-2.11', complex_box_dimensions=None, apo_box_dimensions=None, flatten_torsions=False, flatten_exceptions=False, repartitioned_endstate=None, **kwargs): """ arguments protein_filename : str path to protein (to mutate); .pdb mutation_chain_id : str name of the chain to be mutated mutation_residue_id : str residue id to change proposed_residue : str three letter code of the residue to mutate to phase : str, default complex if phase == vacuum, then the complex will not be solvated with water; else, it will be solvated with tip3p conduct_endstate_validation : bool, default True whether to conduct an endstate validation of the HybridTopologyFactory. If using the RepartitionedHybridTopologyFactory, endstate validation cannot and will not be conducted. ligand_file : str, default None path to ligand of interest (i.e. small molecule or protein); .sdf or .pdb ligand_index : int, default 0 which ligand to use water_model : str, default 'tip3p' solvent model to use for solvation ionic_strength : float * unit.molar, default 0.15 * unit.molar the total concentration of ions (both positive and negative) to add using Modeller. This does not include ions that are added to neutralize the system. Note that only monovalent ions are currently supported. forcefield_files : list of str, default ['amber14/protein.ff14SB.xml', 'amber14/tip3p.xml'] forcefield files for proteins and solvent barostat : openmm.MonteCarloBarostat, default openmm.MonteCarloBarostat(1.0 * unit.atmosphere, 300 * unit.kelvin, 50) barostat to use forcefield_kwargs : dict, default {'removeCMMotion': False, 'ewaldErrorTolerance': 1e-4, 'constraints' : app.HBonds, 'hydrogenMass' : 4 * unit.amus} forcefield kwargs for system parametrization periodic_forcefield_kwargs : dict, default {'nonbondedMethod': app.PME} periodic forcefield kwargs for system parametrization nonperiodic_forcefield_kwargs : dict, default None non-periodic forcefield kwargs for system parametrization small_molecule_forcefields : str, default 'gaff-2.11' the forcefield string for small molecule parametrization complex_box_dimensions : Vec3, default None define box dimensions of complex phase; if None, padding is 1nm apo_box_dimensions : Vec3, default None define box dimensions of apo phase phase; if None, padding is 1nm flatten_torsions : bool, default False in the htf, flatten torsions involving unique new atoms at lambda = 0 and unique old atoms are lambda = 1 flatten_exceptions : bool, default False in the htf, flatten exceptions involving unique new atoms at lambda = 0 and unique old atoms at lambda = 1 repartitioned_endstate : int, default None the endstate (0 or 1) at which to build the RepartitionedHybridTopologyFactory. By default, this is None, meaning a vanilla HybridTopologyFactory will be built. TODO : allow argument for spectator ligands besides the 'ligand_file' """ # First thing to do is load the apo protein to mutate... protein_pdbfile = open(protein_filename, 'r') protein_pdb = app.PDBFile(protein_pdbfile) protein_pdbfile.close() protein_positions, protein_topology, protein_md_topology = protein_pdb.positions, protein_pdb.topology, md.Topology.from_openmm(protein_pdb.topology) protein_topology = protein_md_topology.to_openmm() protein_n_atoms = protein_md_topology.n_atoms # Load the ligand, if present molecules = [] if ligand_input: if isinstance(ligand_input, str): if ligand_input.endswith('.sdf'): # small molecule ligand_mol = createOEMolFromSDF(ligand_input, index=ligand_index) molecules.append(Molecule.from_openeye(ligand_mol, allow_undefined_stereo=False)) ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_mol), forcefield_generators.generateTopologyFromOEMol(ligand_mol) ligand_md_topology = md.Topology.from_openmm(ligand_topology) ligand_n_atoms = ligand_md_topology.n_atoms if ligand_input.endswith('pdb'): # protein ligand_pdbfile = open(ligand_input, 'r') ligand_pdb = app.PDBFile(ligand_pdbfile) ligand_pdbfile.close() ligand_positions, ligand_topology, ligand_md_topology = ligand_pdb.positions, ligand_pdb.topology, md.Topology.from_openmm( ligand_pdb.topology) ligand_n_atoms = ligand_md_topology.n_atoms elif isinstance(ligand_input, oechem.OEMol): # oemol object molecules.append(Molecule.from_openeye(ligand_input, allow_undefined_stereo=False)) ligand_positions, ligand_topology = extractPositionsFromOEMol(ligand_input), forcefield_generators.generateTopologyFromOEMol(ligand_input) ligand_md_topology = md.Topology.from_openmm(ligand_topology) ligand_n_atoms = ligand_md_topology.n_atoms else: _logger.warning(f'ligand filetype not recognised. Please provide a path to a .pdb or .sdf file') return # Now create a complex complex_md_topology = protein_md_topology.join(ligand_md_topology) complex_topology = complex_md_topology.to_openmm() complex_positions = unit.Quantity(np.zeros([protein_n_atoms + ligand_n_atoms, 3]), unit=unit.nanometers) complex_positions[:protein_n_atoms, :] = protein_positions complex_positions[protein_n_atoms:, :] = ligand_positions # Now for a system_generator self.system_generator = SystemGenerator(forcefields=forcefield_files, barostat=barostat, forcefield_kwargs=forcefield_kwargs, periodic_forcefield_kwargs=periodic_forcefield_kwargs, nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs, small_molecule_forcefield=small_molecule_forcefields, molecules=molecules, cache=None) # Solvate apo and complex... apo_input = list(self._solvate(protein_topology, protein_positions, water_model, phase, ionic_strength, apo_box_dimensions)) inputs = [apo_input] if ligand_input: inputs.append(self._solvate(complex_topology, complex_positions, water_model, phase, ionic_strength, complex_box_dimensions)) geometry_engine = FFAllAngleGeometryEngine(metadata=None, use_sterics=False, n_bond_divisions=100, n_angle_divisions=180, n_torsion_divisions=360, verbose=True, storage=None, bond_softening_constant=1.0, angle_softening_constant=1.0, neglect_angles = False, use_14_nonbondeds = True) # Run pipeline... htfs = [] for (top, pos, sys) in inputs: point_mutation_engine = PointMutationEngine(wildtype_topology=top, system_generator=self.system_generator, chain_id=mutation_chain_id, # Denote the chain id allowed to mutate (it's always a string variable) max_point_mutants=1, residues_allowed_to_mutate=[mutation_residue_id], # The residue ids allowed to mutate allowed_mutations=[(mutation_residue_id, proposed_residue)], # The residue ids allowed to mutate with the three-letter code allowed to change aggregate=True) # Always allow aggregation topology_proposal = point_mutation_engine.propose(sys, top) # Only validate energy bookkeeping if the WT and proposed residues do not involve rings old_res = [res for res in top.residues() if res.id == mutation_residue_id][0] validate_bool = False if old_res.name in ring_amino_acids or proposed_residue in ring_amino_acids else True new_positions, logp_proposal = geometry_engine.propose(topology_proposal, pos, beta, validate_energy_bookkeeping=validate_bool) logp_reverse = geometry_engine.logp_reverse(topology_proposal, new_positions, pos, beta, validate_energy_bookkeeping=validate_bool) if repartitioned_endstate is None: factory = HybridTopologyFactory elif repartitioned_endstate in [0, 1]: factory = RepartitionedHybridTopologyFactory forward_htf = factory(topology_proposal=topology_proposal, current_positions=pos, new_positions=new_positions, use_dispersion_correction=False, functions=None, softcore_alpha=None, bond_softening_constant=1.0, angle_softening_constant=1.0, soften_only_new=False, neglected_new_angle_terms=[], neglected_old_angle_terms=[], softcore_LJ_v2=True, softcore_electrostatics=True, softcore_LJ_v2_alpha=0.85, softcore_electrostatics_alpha=0.3, softcore_sigma_Q=1.0, interpolate_old_and_new_14s=flatten_exceptions, omitted_terms=None, endstate=repartitioned_endstate, flatten_torsions=flatten_torsions) if not topology_proposal.unique_new_atoms: assert geometry_engine.forward_final_context_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.forward_final_context_reduced_potential})" assert geometry_engine.forward_atoms_with_positions_reduced_potential == None, f"There are no unique new atoms but the geometry_engine's forward atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.forward_atoms_with_positions_reduced_potential})" else: added_valence_energy = geometry_engine.forward_final_context_reduced_potential - geometry_engine.forward_atoms_with_positions_reduced_potential if not topology_proposal.unique_old_atoms: assert geometry_engine.reverse_final_context_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's final context reduced potential is not None (i.e. {self._geometry_engine.reverse_final_context_reduced_potential})" assert geometry_engine.reverse_atoms_with_positions_reduced_potential == None, f"There are no unique old atoms but the geometry_engine's atoms-with-positions-reduced-potential in not None (i.e. { self._geometry_engine.reverse_atoms_with_positions_reduced_potential})" subtracted_valence_energy = 0.0 else: subtracted_valence_energy = geometry_engine.reverse_final_context_reduced_potential - geometry_engine.reverse_atoms_with_positions_reduced_potential if conduct_endstate_validation and repartitioned_endstate is None: zero_state_error, one_state_error = validate_endstate_energies(forward_htf._topology_proposal, forward_htf, added_valence_energy, subtracted_valence_energy, beta=beta, ENERGY_THRESHOLD=ENERGY_THRESHOLD) if zero_state_error > ENERGY_THRESHOLD: _logger.warning(f"Reduced potential difference of the nonalchemical and alchemical Lambda = 0 state is above the threshold ({ENERGY_THRESHOLD}): {zero_state_error}") if one_state_error > ENERGY_THRESHOLD: _logger.warning(f"Reduced potential difference of the nonalchemical and alchemical Lambda = 1 state is above the threshold ({ENERGY_THRESHOLD}): {one_state_error}") else: pass htfs.append(forward_htf) self.apo_htf = htfs[0] self.complex_htf = htfs[1] if ligand_input else None
def create_systems(topologies_dict, positions_dict, output_directory, project_prefix, solvate=True): """ Generate the systems ready for equilibrium simulations from a dictionary of topologies and positions Parameters ---------- topologies_dict : dict of str: app.Topoology A dictionary of the topologies to prepare, indexed by SMILES strings positions_dict : dict of str: unit.Quantity array A dictionary of positions for the corresponding topologies, indexed by SMILES strings output_directory : str Location of output files project_prefix : str What to prepend to the names of files for this run solvate : bool, default True Whether to solvate the systems """ barostat = openmm.MonteCarloBarostat(1.0 * unit.atmosphere, temperature, 50) system_generator = SystemGenerator( [ 'amber14/protein.ff14SB.xml', 'gaff.xml', 'amber14/tip3p.xml', 'MCL1_ligands.xml' ], barostat=barostat, forcefield_kwargs={ 'constraints': app.HBonds, 'hydrogenMass': 4 * unit.amus }, periodic_forcefield_kwargs={'nonbondedMethod': app.PME}) list_of_smiles = list(topologies_dict.keys()) initial_smiles = list_of_smiles[0] initial_topology = topologies_dict[initial_smiles] initial_positions = positions_dict[initial_smiles] if solvate: solvated_initial_positions, solvated_topology, solvated_system = solvate_system( initial_topology.to_openmm(), initial_positions, system_generator) else: solvated_initial_positions = initial_positions solvated_topology = initial_topology solvated_system = system_generator.create_system(solvated_topology) md_topology = md.Topology.from_openmm(solvated_topology) if solvate: num_added = md_topology.n_residues - initial_topology.n_residues if not os.path.exists(output_directory): os.mkdir(output_directory) np.save("{}/{}_{}_initial.npy".format(output_directory, project_prefix, 0), (solvated_initial_positions, md_topology, solvated_system, initial_smiles)) for i in tqdm.trange(1, len(list_of_smiles)): smiles = list_of_smiles[i] topology = topologies_dict[smiles] positions = positions_dict[smiles] if solvate: solvated_positions, solvated_topology, solvated_system = solvate_system( topology.to_openmm(), positions, system_generator, padding=None, num_added=num_added) else: solvated_positions = initial_positions solvated_topology = initial_topology solvated_system = system_generator.create_system(solvated_topology) np.save( "{}/{}_{}_initial.npy".format(output_directory, project_prefix, i), (solvated_positions, md.Topology.from_openmm(solvated_topology), solvated_system, smiles))
def _generate_openmm_system(self, molecule: "offtop.Molecule", method: str, keywords: Dict = None) -> "openmm.System": """ Generate an OpenMM System object from the input molecule method and basis. """ from openmmforcefields.generators import SystemGenerator from simtk.openmm import app from simtk import unit # create a hash based on the input options hashstring = molecule.to_smiles( isomeric=True, explicit_hydrogens=True, mapped=True) + method for value in keywords.values(): hashstring += str(value) key = hashlib.sha256(hashstring.encode()).hexdigest() # now look for the system? if key in self._CACHE: system = self._get_cache(key) else: # make the system from the inputs # set up available options for openmm _constraint_types = { "hbonds": app.HBonds, "allbonds": app.AllBonds, "hangles": app.HAngles } _periodic_nonbond_types = { "ljpme": app.LJPME, "pme": app.PME, "ewald": app.Ewald } _non_periodic_nonbond_types = { "nocutoff": app.NoCutoff, "cutoffnonperiodic": app.CutoffNonPeriodic } if "constraints" in keywords: constraints = keywords["constraints"] try: forcefield_kwargs = { "constraints": _constraint_types[constraints.lower()] } except (KeyError, AttributeError): raise ValueError( f"constraint '{constraints}' not supported, valid constraints are {_constraint_types.keys()}" ) else: forcefield_kwargs = None nonbondedmethod = keywords.get("nonbondedMethod", None) if nonbondedmethod is not None: if nonbondedmethod.lower() in _periodic_nonbond_types: periodic_forcefield_kwargs = { "nonbondedMethod": _periodic_nonbond_types[nonbondedmethod.lower()] } nonperiodic_forcefield_kwargs = None elif nonbondedmethod.lower() in _non_periodic_nonbond_types: periodic_forcefield_kwargs = None nonperiodic_forcefield_kwargs = { "nonbondedMethod": _non_periodic_nonbond_types[nonbondedmethod.lower()] } else: raise ValueError( f"nonbondedmethod '{nonbondedmethod}' not supported, valid nonbonded methods are periodic: {_periodic_nonbond_types.keys()}" f" or non_periodic: {_non_periodic_nonbond_types.keys()}." ) else: periodic_forcefield_kwargs = None nonperiodic_forcefield_kwargs = None # now start the system generator system_generator = SystemGenerator( small_molecule_forcefield=method, forcefield_kwargs=forcefield_kwargs, nonperiodic_forcefield_kwargs=nonperiodic_forcefield_kwargs, periodic_forcefield_kwargs=periodic_forcefield_kwargs, ) topology = molecule.to_topology() system = system_generator.create_system( topology=topology.to_openmm(), molecules=[molecule]) self._cache_it(key, system) return system
def setup_fah_run(destination_path, protein_pdb_filename, oemol=None, cache=None, restrain_rmsd=False): """ Prepare simulation Parameters ---------- destination_path : str The path to the RUN to be created protein_pdb_filename : str Path to protein PDB file oemol : openeye.oechem.OEMol, optional, default=None The molecule to parameterize, with SDData attached If None, don't include the small molecule restrain_rmsd : bool, optional, default=False If True, restrain RMSD during first equilibration phase """ # Parameters from simtk import unit, openmm protein_forcefield = 'amber14/protein.ff14SB.xml' solvent_forcefield = 'amber14/tip3p.xml' small_molecule_forcefield = 'openff-1.2.0' water_model = 'tip3p' solvent_padding = 10.0 * unit.angstrom ionic_strength = 70 * unit.millimolar # assay buffer: 20 mM HEPES pH 7.3, 1 mM TCEP, 50 mM NaCl, 0.01% Tween-20, 10% glycerol pressure = 1.0 * unit.atmospheres collision_rate = 1.0 / unit.picoseconds temperature = 300.0 * unit.kelvin timestep = 4.0 * unit.femtoseconds iterations = 1000 # 1 ns equilibration nsteps_per_iteration = 250 # Prepare phases import os system_xml_filename = os.path.join(destination_path, 'system.xml.bz2') integrator_xml_filename = os.path.join(destination_path, 'integrator.xml.bz2') state_xml_filename = os.path.join(destination_path, 'state.xml.bz2') # Check if we can skip setup openmm_files_exist = os.path.exists( system_xml_filename) and os.path.exists( state_xml_filename) and os.path.exists(integrator_xml_filename) if openmm_files_exist: return # Create barostat barostat = openmm.MonteCarloBarostat(pressure, temperature) # Create RUN directory if it does not yet exist os.makedirs(destination_path, exist_ok=True) # Load any molecule(s) molecule = None if oemol is not None: from openforcefield.topology import Molecule molecule = Molecule.from_openeye(oemol, allow_undefined_stereo=True) molecule.name = 'MOL' # Ensure residue is MOL print([res for res in molecule.to_topology().to_openmm().residues()]) # Create SystemGenerator import os from simtk.openmm import app forcefield_kwargs = { 'removeCMMotion': False, 'hydrogenMass': 3.0 * unit.amu, 'constraints': app.HBonds, 'rigidWater': True } periodic_kwargs = { 'nonbondedMethod': app.PME, 'ewaldErrorTolerance': 2.5e-04 } forcefields = [protein_forcefield, solvent_forcefield] from openmmforcefields.generators import SystemGenerator openmm_system_generator = SystemGenerator( forcefields=forcefields, molecules=molecule, small_molecule_forcefield=small_molecule_forcefield, cache=cache, barostat=barostat, forcefield_kwargs=forcefield_kwargs, periodic_forcefield_kwargs=periodic_kwargs) # Read protein print(f'Reading protein from {protein_pdb_filename}...') pdbfile = app.PDBFile(protein_pdb_filename) modeller = app.Modeller(pdbfile.topology, pdbfile.positions) if oemol is not None: # Add small molecule to the system modeller.add(molecule.to_topology().to_openmm(), molecule.conformers[0]) # DEBUG : Check residue name with open(os.path.join(destination_path, 'initial-complex.pdb'), 'wt') as outfile: app.PDBFile.writeFile(modeller.topology, modeller.positions, outfile) # Add solvent print('Adding solvent...') kwargs = {'padding': solvent_padding} modeller.addSolvent(openmm_system_generator.forcefield, model='tip3p', ionicStrength=ionic_strength, **kwargs) # Create an OpenMM system print('Creating OpenMM system...') system = openmm_system_generator.create_system(modeller.topology) # Add a virtual bond between protein and ligand to make sure they are not imaged separately if oemol is not None: import mdtraj as md mdtop = md.Topology.from_openmm( modeller.topology) # excludes solvent and ions for res in mdtop.residues: print(res) protein_atom_indices = mdtop.select( '(protein and name CA)') # protein CA atoms ligand_atom_indices = mdtop.select( '((resname MOL) and (mass > 1))') # ligand heavy atoms protein_atom_index = int(protein_atom_indices[0]) ligand_atom_index = int(ligand_atom_indices[0]) force = openmm.CustomBondForce('0') force.addBond(protein_atom_index, ligand_atom_index, []) system.addForce(force) # Add RMSD restraints if requested if restrain_rmsd: print('Adding RMSD restraint...') kB = unit.AVOGADRO_CONSTANT_NA * unit.BOLTZMANN_CONSTANT_kB kT = kB * temperature import mdtraj as md mdtop = md.Topology.from_openmm( pdbfile.topology) # excludes solvent and ions #heavy_atom_indices = mdtop.select('mass > 1') # heavy solute atoms rmsd_atom_indices = mdtop.select( '(protein and (name CA)) or ((resname MOL) and (mass > 1))' ) # CA atoms and ligand heavy atoms rmsd_atom_indices = [int(index) for index in rmsd_atom_indices] custom_cv_force = openmm.CustomCVForce('(K_RMSD/2)*RMSD^2') custom_cv_force.addGlobalParameter('K_RMSD', kT / unit.angstrom**2) rmsd_force = openmm.RMSDForce(modeller.positions, rmsd_atom_indices) custom_cv_force.addCollectiveVariable('RMSD', rmsd_force) force_index = system.addForce(custom_cv_force) # Create OpenM Context platform = openmm.Platform.getPlatformByName('OpenCL') platform.setPropertyDefaultValue('Precision', 'mixed') from openmmtools import integrators integrator = integrators.LangevinIntegrator(temperature, collision_rate, timestep) context = openmm.Context(system, integrator, platform) context.setPositions(modeller.positions) # Report initial potential energy state = context.getState(getEnergy=True) print( f'Initial potential energy is {state.getPotentialEnergy()/unit.kilocalories_per_mole:.3f} kcal/mol' ) # Store snapshots in MDTraj trajectory to examine RMSD import mdtraj as md import numpy as np mdtop = md.Topology.from_openmm(pdbfile.topology) atom_indices = mdtop.select('all') # all solute atoms protein_atom_indices = mdtop.select( 'protein and (mass > 1)') # heavy solute atoms if oemol is not None: ligand_atom_indices = mdtop.select( '(resname MOL) and (mass > 1)') # ligand heavy atoms trajectory = md.Trajectory( np.zeros([iterations + 1, len(atom_indices), 3], np.float32), mdtop) trajectory.xyz[0, :, :] = context.getState(getPositions=True).getPositions( asNumpy=True)[atom_indices] / unit.nanometers # Minimize print('Minimizing...') openmm.LocalEnergyMinimizer.minimize(context) # Equilibrate (with RMSD restraint if needed) import numpy as np from rich.progress import track import time initial_time = time.time() for iteration in track(range(iterations), 'Equilibrating...'): integrator.step(nsteps_per_iteration) trajectory.xyz[iteration + 1, :, :] = context.getState( getPositions=True).getPositions( asNumpy=True)[atom_indices] / unit.nanometers elapsed_time = (time.time() - initial_time) * unit.seconds ns_per_day = (context.getState().getTime() / elapsed_time) / (unit.nanoseconds / unit.day) print(f'Performance: {ns_per_day:8.3f} ns/day') if restrain_rmsd: # Disable RMSD restraint context.setParameter('K_RMSD', 0.0) print('Minimizing...') openmm.LocalEnergyMinimizer.minimize(context) for iteration in track(range(iterations), 'Equilibrating without RMSD restraint...'): integrator.step(nsteps_per_iteration) # Retrieve state state = context.getState(getPositions=True, getVelocities=True, getEnergy=True, getForces=True) system.setDefaultPeriodicBoxVectors(*state.getPeriodicBoxVectors()) modeller.topology.setPeriodicBoxVectors(state.getPeriodicBoxVectors()) print( f'Final potential energy is {state.getPotentialEnergy()/unit.kilocalories_per_mole:.3f} kcal/mol' ) # Equilibrate again if we restrained the RMSD if restrain_rmsd: print('Removing RMSD restraint from system...') system.removeForce(force_index) #if oemol is not None: # # Check final RMSD # print('checking RMSD...') # trajectory.superpose(trajectory, atom_indices=protein_atom_indices) # protein_rmsd = md.rmsd(trajectory, trajectory[-1], atom_indices=protein_atom_indices)[-1] * 10 # Angstroms # oechem.OESetSDData(oemol, 'equil_protein_rmsd', f'{protein_rmsd:.2f} A') # ligand_rmsd = md.rmsd(trajectory, trajectory[-1], atom_indices=ligand_atom_indices)[-1] * 10 # Angstroms # oechem.OESetSDData(oemol, 'equil_ligand_rmsd', f'{ligand_rmsd:.2f} A') # print('RMSD after equilibration: protein {protein_rmsd:8.2f} A | ligand {ligand_rmsd:8.3f} A') # Save as OpenMM print('Exporting for OpenMM FAH simulation...') import bz2 with bz2.open(integrator_xml_filename, 'wt') as f: f.write(openmm.XmlSerializer.serialize(integrator)) with bz2.open(state_xml_filename, 'wt') as f: f.write(openmm.XmlSerializer.serialize(state)) with bz2.open(system_xml_filename, 'wt') as f: f.write(openmm.XmlSerializer.serialize(system)) with bz2.open(os.path.join(destination_path, 'equilibrated-all.pdb.gz'), 'wt') as f: app.PDBFile.writeFile(modeller.topology, state.getPositions(), f) with open(os.path.join(destination_path, 'equilibrated-solute.pdb'), 'wt') as f: import mdtraj mdtraj_topology = mdtraj.Topology.from_openmm(modeller.topology) mdtraj_trajectory = mdtraj.Trajectory( [state.getPositions(asNumpy=True) / unit.nanometers], mdtraj_topology) selection = mdtraj_topology.select('not water') mdtraj_trajectory = mdtraj_trajectory.atom_slice(selection) app.PDBFile.writeFile(mdtraj_trajectory.topology.to_openmm(), mdtraj_trajectory.openmm_positions(0), f) if oemol is not None: # Write molecule as SDF, SMILES, and mol2 for extension in ['sdf', 'mol2', 'smi', 'csv']: filename = os.path.join(destination_path, f'molecule.{extension}') with oechem.oemolostream(filename) as ofs: oechem.OEWriteMolecule(ofs, oemol) # Clean up del context, integrator
platform = openmm.Platform.getPlatformByName('OpenCL') platform.setPropertyDefaultValue('Precision', 'mixed') ''' ---SYSTEM PREPARATION--- setup AM1-BCC charges for the solute, add solvent, set non-bonded method etc ''' ligand_mol = Molecule.from_file('ethanol.sdf', file_format='sdf') forcefield_kwargs = {'constraints': app.HBonds, 'rigidWater': True, 'removeCMMotion': True, 'hydrogenMass': 4 * unit.amu } system_generator = SystemGenerator( forcefields=['amber/ff14SB.xml', 'amber/tip4pew_standard.xml'], small_molecule_forcefield='gaff-2.11', molecules=[ligand_mol], forcefield_kwargs=forcefield_kwargs) ligand_pdb = PDBFile('ethanol.pdb') modeller = Modeller(ligand_pdb.topology, ligand_pdb.positions) modeller.addSolvent(system_generator.forcefield, model='tip4pew', padding=12.0 * unit.angstroms) system = system_generator.forcefield.createSystem(modeller.topology, nonbondedMethod=PME, nonbondedCutoff=9.0 * unit.angstroms, constraints=HBonds) ''' ---FINISHED SYSTEM PREPARATION--- '''
checkpoint_filename = "equilibrated_checkpoint_5ns.chk" traj_output_filename = "equilibrated_traj_5ns.xtc" # Define the barostat for the system barostat = mm.MonteCarloBarostat(pressure, temperature) # Load and sort ligands molecules = Molecule.from_file(input_ligands_sdf) ligand_names = ["larotrectinib", "selitrectinib", "repotrectinib"] ligand_dict = dict(zip(ligand_names, molecules)) # Create dict for easy access later # Make the SystemGenerator system_generator = SystemGenerator( forcefields=[protein_forcefield, solvation_forcefield], barostat=barostat, periodic_forcefield_kwargs={"nonbondedMethod": app.PME}, small_molecule_forcefield=small_molecule_forcefield, molecules=ligand_dict[chosen_ligand], ) # Read in the PDB and create an OpenMM topology pdbfile = app.PDBFile(input_pdb) protein_topology, protein_positions = pdbfile.topology, pdbfile.positions # Add ligand to topology - credit to @hannahbrucemacdonald for help here print("--> Combining protein and ligand topologies") off_ligand_topology = Topology.from_molecules(ligand_dict[chosen_ligand]) ligand_topology = off_ligand_topology.to_openmm() ligand_positions = ligand_dict[chosen_ligand].conformers[0] md_protein_topology = md.Topology.from_openmm(
def test_create(self): """Test SystemGenerator creation with only OpenMM ffxml force fields""" # Create an empty system generator generator = SystemGenerator()
pdb = app.PDBFile('bstate.pdb') molecule = Molecule.from_smiles('CCCCO') forcefield_kwargs = {'constraints': app.HBonds, 'removeCMMotion': False} periodic_forcefield_kwargs = { 'nonbondedMethod': app.LJPME, 'nonbondedCutoff': 1 * nanometer } membrane_barostat = MonteCarloMembraneBarostat( 1 * bar, 0.0 * bar * nanometer, 308 * kelvin, MonteCarloMembraneBarostat.XYIsotropic, MonteCarloMembraneBarostat.ZFree, 15) system_generator = SystemGenerator( forcefields=['amber/lipid17.xml', 'amber/tip3p_standard.xml'], small_molecule_forcefield='gaff-2.11', barostat=membrane_barostat, forcefield_kwargs=forcefield_kwargs, periodic_forcefield_kwargs=periodic_forcefield_kwargs) system = system_generator.create_system(pdb.topology, molecules=molecule) integrator = LangevinIntegrator(300 * kelvin, 1 / picosecond, 0.002 * picosecond) platform = Platform.getPlatformByName('CUDA') simulation = app.Simulation(pdb.topology, system, integrator, platform) simulation.context.setPositions(pdb.positions) simulation.loadState('parent.xml') simulation.reporters.append( StateDataReporter('seg.nfo',