def test_jacs_ligands(self): """Use template generator to parameterize the Schrodinger JACS set of ligands""" from simtk.openmm.app import ForceField, NoCutoff jacs_systems = { #'bace' : { 'prefix' : 'Bace' }, #'cdk2' : { 'prefix' : 'CDK2' }, 'jnk1' : { 'prefix' : 'Jnk1' }, 'mcl1' : { 'prefix' : 'MCL1' }, #'p38' : { 'prefix' : 'p38' }, 'ptp1b' : { 'prefix' : 'PTP1B' }, 'thrombin' : { 'prefix' : 'Thrombin' }, #'tyk2' : { 'prefix' : 'Tyk2' }, } for system_name in jacs_systems: prefix = jacs_systems[system_name]['prefix'] # Load molecules ligand_sdf_filename = get_data_filename(os.path.join('perses_jacs_systems', system_name, prefix + '_ligands.sdf')) print(f'Reading molecules from {ligand_sdf_filename} ...') from openforcefield.topology import Molecule molecules = Molecule.from_file(ligand_sdf_filename, allow_undefined_stereo=True) # Ensure this is a list try: nmolecules = len(molecules) except TypeError: molecules = [molecules] print(f'Read {len(molecules)} molecules from {ligand_sdf_filename}') #molecules = self.filter_molecules(molecules) MAX_MOLECULES = len(molecules) if 'TRAVIS' in os.environ: MAX_MOLECULES = 3 molecules = molecules[:MAX_MOLECULES] print(f'{len(molecules)} molecules remain after filtering') # Create template generator with local cache cache = os.path.join(get_data_filename(os.path.join('perses_jacs_systems', system_name)), 'cache.json') generator = self.TEMPLATE_GENERATOR(molecules=molecules, cache=cache) # Create a ForceField forcefield = ForceField() # Register the template generator forcefield.registerTemplateGenerator(generator.generator) # Parameterize all molecules print(f'Caching all molecules for {system_name} at {cache} ...') n_success = 0 n_failure = 0 for molecule in molecules: openmm_topology = molecule.to_topology().to_openmm() try: forcefield.createSystem(openmm_topology, nonbondedMethod=NoCutoff) n_success += 1 except Exception as e: n_failure += 1 print(e) print(f'{n_failure}/{n_success+n_failure} ligands failed to parameterize for {system_name}')
def setUp(self): self.testsystems = dict() for (system_name, prefix) in [ # TODO: Uncomment these after we fix input files #('bace', 'Bace'), #('cdk1', 'CDK2'), ('jnk1', 'Jnk1'), #('mcl1', 'MCL1'), #('p38', 'p38'), #('ptp1b', 'PTP1B'), #('thrombin', 'Thrombin'), #('tyk2', 'Tyk2'), ]: # Load protein from simtk.openmm.app import PDBFile pdb_filename = get_data_filename(os.path.join('perses_jacs_systems', system_name, prefix + '_protein_fixed.pdb')) pdbfile = PDBFile(pdb_filename) # Load molecules from openforcefield.topology import Molecule sdf_filename = get_data_filename(os.path.join('perses_jacs_systems', system_name, prefix + '_ligands.sdf')) molecules = Molecule.from_file(sdf_filename, allow_undefined_stereo=True) print(f'Read {len(molecules)} molecules from {sdf_filename}') # Filter molecules as appropriate molecules = self.filter_molecules(molecules) n_molecules = len(molecules) print(f'{n_molecules} molecules remain after filtering') if n_molecules == 0: continue # Create structures import parmed protein_structure = parmed.load_file(pdb_filename) molecules_structure = parmed.load_file(sdf_filename) complex_structures = [ (protein_structure + molecules_structure[index]) for index in range(n_molecules) ] # Store testsystem = { 'name' : system_name, 'protein_pdbfile' : pdbfile, 'molecules' : molecules, 'complex_structures' : complex_structures } self.testsystems[system_name] = testsystem # TODO: Create other test topologies # TODO: Protein-only # TODO: Protein-ligand topology # TODO: Solvated protein-ligand topology # TODO: Host-guest topology # Suppress DEBUG logging from various packages import logging for name in ['parmed', 'matplotlib']: logging.getLogger(name).setLevel(logging.WARNING)
def test_complex(self): """Test parameterizing a protein:ligand complex in vacuum""" from openmmforcefields.generators import SystemGenerator for name, testsystem in self.testsystems.items(): print(f'Testing parameterization of {name} in vacuum') molecules = testsystem['molecules'] # Select a complex from the set ligand_index = 0 complex_structure = testsystem['complex_structures'][ligand_index] molecule = molecules[ligand_index] openmm_topology = complex_structure.topology cache = os.path.join(get_data_filename(os.path.join('perses_jacs_systems', name)), 'cache.json') # Create a system in vacuum generator = SystemGenerator(forcefields=self.amber_forcefields, molecules=molecules, cache=cache) system = generator.create_system(openmm_topology) assert system.getNumParticles() == len(complex_structure.atoms) # Create solvated structure from simtk.openmm import app from simtk import unit modeller = app.Modeller(complex_structure.topology, complex_structure.positions) modeller.addSolvent(generator.forcefield, padding=0*unit.angstroms, ionicStrength=300*unit.millimolar) # Create a system with solvent and ions system = generator.create_system(modeller.topology) assert system.getNumParticles() == len(list(modeller.topology.atoms())) with open('test.pdb', 'w') as outfile: app.PDBFile.writeFile(modeller.topology, modeller.positions, outfile)
def test_barostat(self): """Test that barostat addition works correctly""" # Create a protein SystemGenerator generator = SystemGenerator(forcefields=self.amber_forcefields) # Create a template barostat from simtk.openmm import MonteCarloBarostat from simtk import unit pressure = 0.95 * unit.atmospheres temperature = 301.0 * unit.kelvin frequency = 23 generator.barostat = MonteCarloBarostat(pressure, temperature, frequency) # Load a PDB file import os from simtk.openmm.app import PDBFile pdb_filename = get_data_filename( os.path.join('perses_jacs_systems', 'bace', 'Bace_protein_fixed.pdb')) pdbfile = PDBFile(pdb_filename) # Delete hydrogens from terminal protein residues # TODO: Fix the input files so we don't need to do this from simtk.openmm import app modeller = app.Modeller(pdbfile.topology, pdbfile.positions) residues = [ residue for residue in modeller.topology.residues() if residue.name != 'UNL' ] termini_ids = [residues[0].id, residues[-1].id] #hs = [atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.name != 'UNL'] hs = [ atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.id in termini_ids ] modeller.delete(hs) from simtk.openmm.app import PDBFile modeller.addHydrogens() # Create a System system = generator.create_system(modeller.topology) # Check barostat is present forces = { force.__class__.__name__: force for force in system.getForces() } assert 'MonteCarloBarostat' in forces.keys() # Check barostat parameters force = forces['MonteCarloBarostat'] assert force.getDefaultPressure() == pressure assert force.getDefaultTemperature() == temperature assert force.getFrequency() == frequency
def setUp(self): # TODO: Harmonize with test_system_generator.py infrastructure # Read test molecules from openforcefield.topology import Molecule filename = get_data_filename("minidrugbank/MiniDrugBank-without-unspecified-stereochemistry.sdf") molecules = Molecule.from_file(filename, allow_undefined_stereo=True) # Filter molecules as appropriate self.molecules = self.filter_molecules(molecules) # Suppress DEBUG logging from various packages import logging for name in ['parmed', 'matplotlib']: logging.getLogger(name).setLevel(logging.WARNING)
def test_jacs_complexes(self): """Use template generator to parameterize the Schrodinger JACS set of complexes""" # TODO: Uncomment working systems when we have cleaned up the input files jacs_systems = { #'bace' : { 'prefix' : 'Bace' }, #'cdk2' : { 'prefix' : 'CDK2' }, #'jnk1' : { 'prefix' : 'Jnk1' }, 'mcl1' : { 'prefix' : 'MCL1' }, #'p38' : { 'prefix' : 'p38' }, #'ptp1b' : { 'prefix' : 'PTP1B' }, #'thrombin' : { 'prefix' : 'Thrombin' }, #'tyk2' : { 'prefix' : 'Tyk2' }, } for system_name in jacs_systems: prefix = jacs_systems[system_name]['prefix'] # Read molecules ligand_sdf_filename = get_data_filename(os.path.join('perses_jacs_systems', system_name, prefix + '_ligands.sdf')) print(f'Reading molecules from {ligand_sdf_filename} ...') from openforcefield.topology import Molecule molecules = Molecule.from_file(ligand_sdf_filename, allow_undefined_stereo=True) try: nmolecules = len(molecules) except TypeError: molecules = [molecules] print(f'Read {len(molecules)} molecules from {ligand_sdf_filename}') # Read ParmEd Structures import parmed from simtk import unit protein_pdb_filename = get_data_filename(os.path.join('perses_jacs_systems', system_name, prefix + '_protein.pdb')) from simtk.openmm.app import PDBFile print(f'Reading protein from {protein_pdb_filename} ...') #protein_structure = parmed.load_file(protein_pdb_filename) # NOTE: This mis-interprets distorted geometry and sequentially-numbered residues that span chain breaks pdbfile = PDBFile(protein_pdb_filename) protein_structure = parmed.openmm.load_topology(pdbfile.topology, xyz=pdbfile.positions.value_in_unit(unit.angstroms)) ligand_structures = parmed.load_file(ligand_sdf_filename) try: nmolecules = len(ligand_structures) except TypeError: ligand_structures = [ligand_structures] assert len(ligand_structures) == len(molecules) # Filter molecules if 'TRAVIS' in os.environ: MAX_MOLECULES = 3 else: MAX_MOLECULES = 6 molecules = molecules[:MAX_MOLECULES] ligand_structures = ligand_structures[:MAX_MOLECULES] print(f'{len(molecules)} molecules remain after filtering') # Create complexes complex_structures = [ (protein_structure + ligand_structure) for ligand_structure in ligand_structures ] # Create template generator with local cache cache = os.path.join(get_data_filename(os.path.join('perses_jacs_systems', system_name)), 'cache.json') generator = self.TEMPLATE_GENERATOR(molecules=molecules, cache=cache) # Create a ForceField from simtk.openmm.app import ForceField forcefield = ForceField(*self.amber_forcefields) # Register the template generator forcefield.registerTemplateGenerator(generator.generator) # Parameterize all complexes print(f'Caching all molecules for {system_name} at {cache} ...') for ligand_index, complex_structure in enumerate(complex_structures): openmm_topology = complex_structure.topology molecule = molecules[ligand_index] # Delete hydrogens from terminal protein residues # TODO: Fix the input files so we don't need to do this from simtk.openmm import app modeller = app.Modeller(complex_structure.topology, complex_structure.positions) residues = [residue for residue in modeller.topology.residues() if residue.name != 'UNL'] termini_ids = [residues[0].id, residues[-1].id] #hs = [atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.name != 'UNL'] hs = [atom for atom in modeller.topology.atoms() if atom.element.symbol in ['H'] and atom.residue.id in termini_ids] modeller.delete(hs) from simtk.openmm.app import PDBFile modeller.addHydrogens(forcefield) # Parameterize protein:ligand complex in vacuum print(f' Parameterizing {system_name} : {molecule.to_smiles()} in vacuum...') from simtk.openmm.app import NoCutoff forcefield.createSystem(modeller.topology, nonbondedMethod=NoCutoff) # Parameterize protein:ligand complex in solvent print(f' Parameterizing {system_name} : {molecule.to_smiles()} in explicit solvent...') from simtk.openmm.app import PME modeller.addSolvent(forcefield, padding=0*unit.angstroms, ionicStrength=300*unit.millimolar) forcefield.createSystem(modeller.topology, nonbondedMethod=PME)
def setUp(self): self.testsystems = dict() for (system_name, prefix) in [ # TODO: Uncomment these after we fix input files ('bace', 'Bace'), #('cdk1', 'CDK2'), #('jnk1', 'Jnk1'), #('mcl1', 'MCL1'), #('p38', 'p38'), #('ptp1b', 'PTP1B'), #('thrombin', 'Thrombin'), #('tyk2', 'Tyk2'), ]: # Load protein from simtk.openmm.app import PDBFile pdb_filename = get_data_filename( os.path.join('perses_jacs_systems', system_name, prefix + '_protein.pdb')) pdbfile = PDBFile(pdb_filename) # Load molecules from openforcefield.topology import Molecule sdf_filename = get_data_filename( os.path.join('perses_jacs_systems', system_name, prefix + '_ligands_shifted.sdf')) molecules = Molecule.from_file(sdf_filename, allow_undefined_stereo=True) print(f'Read {len(molecules)} molecules from {sdf_filename}') n_molecules = len(molecules) # Limit number of molecules for testing MAX_MOLECULES = 10 if not CI else 2 if (n_molecules > MAX_MOLECULES): print(f'Limiting to {MAX_MOLECULES} for testing...') n_molecules = MAX_MOLECULES molecules = [molecules[index] for index in range(n_molecules)] # Create structures import parmed # NOTE: This does not work because parmed does not correctly assign bonds for HID #protein_structure = parmed.load_file(pdb_filename) # NOTE: This is the workaround protein_structure = parmed.openmm.load_topology( pdbfile.topology, xyz=pdbfile.positions) molecules_structure = parmed.load_file(sdf_filename) molecules_structure = [ molecules_structure[index] for index in range(n_molecules) ] complex_structures = [ (molecules_structure[index] + protein_structure) for index in range(n_molecules) ] complex_structures = [ molecules_structure[index] for index in range(n_molecules) ] # DEBUG # Store testsystem = { 'name': system_name, 'protein_pdbfile': pdbfile, 'molecules': molecules, 'complex_structures': complex_structures } self.testsystems[system_name] = testsystem # DEBUG for name, testsystem in self.testsystems.items(): from simtk.openmm import app filename = f'testsystem-{name}.pdb' print(filename) structure = testsystem['complex_structures'][0] #structure.save(filename, overwrite=True) with open(filename, 'w') as outfile: app.PDBFile.writeFile(structure.topology, structure.positions, outfile) testsystem['molecules'][0].to_file( f'testsystem-{name}-molecule.sdf', file_format="SDF") testsystem['molecules'][0].to_file( f'testsystem-{name}-molecule.pdb', file_format="PDB") # TODO: Create other test topologies # TODO: Protein-only # TODO: Protein-ligand topology # TODO: Solvated protein-ligand topology # TODO: Host-guest topology # Suppress DEBUG logging from various packages import logging for name in ['parmed', 'matplotlib']: logging.getLogger(name).setLevel(logging.WARNING)