def pdb_fix_pdbfixer(pdbid, file_pathway, ph, chains_to_remove): """ Args: pdbid: 4 letter string specifying the PDB ID of the file yoou want to fix file_pathway: a string containing the pathway specifying how you want to organize the PDB files once written ph: the pH at which hydrogens will be determined and added chains_to_remove: dictionary containing pdbs with chains to remove Returns: nothing, but it does right PDB files """ print(pdbid) # Download the topology from rcsb based on pdbod fixer = PDBFixer(pdbid=pdbid) # Remove chains based on hand curated .csv file if pdbid in chains_to_remove['pdbid']: chains = chains_to_remove['chain_to_remove'][chain_to_remove['pdbid'].index(pdbid)] chains_list = chains.split() fixer.removeChains(chainIds=chains_list) # Determine the first and last residue resolved in chain 0 chains = [chain for chain in fixer.topology.chains()] resindices = [residue.index for residue in chains[0].residues()] resindices = natsorted(resindices) first_resindex = resindices[0] last_resindex = resindices[-1] # Find Missing residues and determine if they are C or N terminal fragments (which will be removed) fixer.findMissingResidues() if len(fixer.missingResidues) > 0: if sorted(fixer.missingResidues.keys())[0][-1] <= first_resindex: fixer.missingResidues.pop((sorted(fixer.missingResidues.keys())[0])) if sorted(fixer.missingResidues.keys())[-1][-1] >= last_resindex: fixer.missingResidues.pop((sorted(fixer.missingResidues.keys())[-1])) fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(ph) # Write fixed PDB file, with all of the waters and ligands PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway, '%s_fixed_ph%s.pdb' % (pdbid, ph)), 'w'), keepIds=keepNumbers) # Remove the ligand and write a pdb file fixer.removeHeterogens(True) PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway, '%s_fixed_ph%s_apo.pdb' % (pdbid, ph)), 'w'), keepIds=keepNumbers) # Remove the waters and write a pdb file fixer.removeHeterogens(False) PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway, '%s_fixed_ph%s_apo_nowater.pdb' % (pdbid, ph)), 'w'), keepIds=keepNumbers)
def _geometry_forward(self, topology_proposal, old_sampler_state): """ Run geometry engine to propose new positions and compute logP Parameters ---------- topology_proposal : TopologyProposal Contains old/new Topology and System objects and atom mappings. old_sampler_state : openmmtools.states.SamplerState Configurational properties of the old system atoms. Returns ------- new_sampler_state : openmmtools.states.SamplerState Configurational properties of new atoms proposed by geometry engine calculation. geometry_logp_propose : float The log probability of the forward-only proposal """ if self.verbose: print("Geometry engine proposal...") # Generate coordinates for new atoms and compute probability ratio of old and new probabilities. initial_time = time.time() new_positions, geometry_logp_propose = self.geometry_engine.propose(topology_proposal, old_sampler_state.positions, self.sampler.thermodynamic_state.beta) if self.verbose: print('proposal took %.3f s' % (time.time() - initial_time)) if self.geometry_pdbfile is not None: print("Writing proposed geometry...") from simtk.openmm.app import PDBFile PDBFile.writeFile(topology_proposal.new_topology, new_positions, file=self.geometry_pdbfile) self.geometry_pdbfile.flush() new_sampler_state = SamplerState(new_positions, box_vectors=old_sampler_state.box_vectors) return new_sampler_state, geometry_logp_propose
def write_trajectory_dcd(netcdf_filename, topology, pdb_trajectory_filename, dcd_trajectory_filename): """ Write trajectory. Parameters ---------- netcdf_filename : str NetCDF filename. topology : Topology Topology object pdb_trajectory_filename : str PDB trajectory output filename dcd_trajectory_filename : str Output trajectory filename. """ ncfile = netCDF4.Dataset(netcdf_filename, 'r') [nsamples, nstates] = ncfile.variables['logZ'].shape # Write reference.pdb file from simtk.openmm.app import PDBFile outfile = open(pdb_trajectory_filename, 'w') positions = unit.Quantity(ncfile.variables['positions'][0,:,:], unit.nanometers) PDBFile.writeFile(topology, positions, file=outfile) outfile.close() # TODO: Export as DCD trajectory with MDTraj from mdtraj.formats import DCDTrajectoryFile with DCDTrajectoryFile(dcd_trajectory_filename, 'w') as f: f.write(ncfile.variables['positions'][:,:,:] * 10.0) # angstroms
def add_missing_atoms(session, m, minimization_steps = 0, keep_waters = False): fname = m.filename from pdbfixer import PDBFixer pf = PDBFixer(filename = fname) pf.findMissingResidues() pf.findNonstandardResidues() pf.replaceNonstandardResidues() pf.findMissingAtoms() pf.addMissingAtoms() pf.removeHeterogens(keep_waters) pf.addMissingHydrogens(7.0) if minimization_steps > 0: minimize(pf, minimization_steps) from os.path import splitext fout = splitext(fname)[0] + '-pdbfixer.pdb' out = open(fout, 'w') from simtk.openmm.app import PDBFile PDBFile.writeFile(pf.topology, pf.positions, out) out.close() mfix = session.models.open([fout])[0] mfix.atoms.displays = True mfix.residues.ribbon_displays = False m.display = False log = session.logger log.info('Wrote %s' % fout)
def add_hydrogens_to_mol(mol): """ Add hydrogens to a molecule object TODO (LESWING) see if there are more flags to add here for default :param mol: Rdkit Mol :return: Rdkit Mol """ molecule_file = None try: pdbblock = Chem.MolToPDBBlock(mol) pdb_stringio = StringIO() pdb_stringio.write(pdbblock) pdb_stringio.seek(0) fixer = PDBFixer(pdbfile=pdb_stringio) fixer.addMissingHydrogens(7.4) hydrogenated_io = StringIO() PDBFile.writeFile(fixer.topology, fixer.positions, hydrogenated_io) hydrogenated_io.seek(0) return Chem.MolFromPDBBlock(hydrogenated_io.read(), sanitize=False, removeHs=False) except ValueError as e: logging.warning("Unable to add hydrogens", e) raise MoleculeLoadException(e) finally: try: os.remove(molecule_file) except (OSError, TypeError): pass
def write_trajectory_dcd(netcdf_filename, topology, pdb_trajectory_filename, dcd_trajectory_filename): """ Write trajectory. Parameters ---------- netcdf_filename : str NetCDF filename. topology : Topology Topology object pdb_trajectory_filename : str PDB trajectory output filename dcd_trajectory_filename : str Output trajectory filename. """ ncfile = netCDF4.Dataset(netcdf_filename, 'r') [nsamples, nstates] = ncfile.variables['logZ'].shape # Write reference.pdb file from simtk.openmm.app import PDBFile outfile = open(pdb_trajectory_filename, 'w') positions = unit.Quantity(ncfile.variables['positions'][0,:,:], unit.angstroms) PDBFile.writeFile(topology, positions, file=outfile) outfile.close() # TODO: Export as DCD trajectory with MDTraj from mdtraj.formats import DCDTrajectoryFile with DCDTrajectoryFile(dcd_trajectory_filename, 'w') as f: f.write(ncfile.variables['positions'][:,:,:])
def add_hydrogens_to_mol(mol): """ Add hydrogens to a molecule object TODO (LESWING) see if there are more flags to add here for default :param mol: Rdkit Mol :return: Rdkit Mol """ molecule_file = None try: pdbblock = Chem.MolToPDBBlock(mol) pdb_stringio = StringIO() pdb_stringio.write(pdbblock) pdb_stringio.seek(0) fixer = PDBFixer(pdbfile=pdb_stringio) fixer.addMissingHydrogens(7.4) hydrogenated_io = StringIO() PDBFile.writeFile(fixer.topology, fixer.positions, hydrogenated_io) hydrogenated_io.seek(0) return Chem.MolFromPDBBlock( hydrogenated_io.read(), sanitize=False, removeHs=False) except ValueError as e: logging.warning("Unable to add hydrogens", e) raise MoleculeLoadException(e) finally: try: os.remove(molecule_file) except (OSError, TypeError): pass
def fix_pdb(pdb_id, pdb_file, pdb_group): chains_to_retain = get_required_chains(pdb_group) chains_to_remove = [] for chain in PDBParser().get_structure(pdb_id, pdb_file)[0]: if chain.get_id() not in chains_to_retain: chains_to_remove.append(chain.get_id()) fixer = PDBFixer(filename=pdb_file) fixer.removeChains(chainIds=chains_to_remove) fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.removeHeterogens(True) # KeepIds flag is critical here, otherwise we loose all information binding pdb_file = dirname(pdb_file) + '/' + pdb_id + '.pdb' PDBFile.writeFile(fixer.topology, fixer.positions, open(pdb_file, 'w'), keepIds=True) return pdb_file
def __init__(self, **kwargs): super(AlanineDipeptideExplicitSimulatedTempering, self).__init__(**kwargs) self.description = 'Alanine dipeptide in explicit solvent simulated tempering simulation' # Create topology, positions, and system. from openmmtools.testsystems import AlanineDipeptideExplicit testsystem = AlanineDipeptideExplicit( nonbondedMethod=app.CutoffPeriodic) self.topology = testsystem.topology self.positions = testsystem.positions self.system = testsystem.system # DEBUG: Write PDB from simtk.openmm.app import PDBFile outfile = open('initial.pdb', 'w') PDBFile.writeFile(self.topology, self.positions, outfile) outfile.close() # Add a MonteCarloBarostat temperature = 270 * unit.kelvin # will be replaced as thermodynamic state is updated pressure = 1.0 * unit.atmospheres barostat = openmm.MonteCarloBarostat(pressure, temperature) self.system.addForce(barostat) # Create thermodynamic states. Tmin = 270 * unit.kelvin Tmax = 600 * unit.kelvin ntemps = 256 # number of temperatures from sams import ThermodynamicState temperatures = unit.Quantity( np.logspace(np.log10(Tmin / unit.kelvin), np.log10(Tmax / unit.kelvin), ntemps), unit.kelvin) self.thermodynamic_states = [ ThermodynamicState(system=self.system, temperature=temperature, pressure=pressure) for temperature in temperatures ] # Create SAMS samplers from sams.samplers import SamplerState, MCMCSampler, ExpandedEnsembleSampler, SAMSSampler thermodynamic_state_index = 0 # initial thermodynamic state index thermodynamic_state = self.thermodynamic_states[ thermodynamic_state_index] sampler_state = SamplerState(positions=self.positions) self.mcmc_sampler = MCMCSampler( sampler_state=sampler_state, thermodynamic_state=thermodynamic_state, ncfile=self.ncfile) #self.mcmc_sampler.pdbfile = open('output.pdb', 'w') self.mcmc_sampler.topology = self.topology self.mcmc_sampler.nsteps = 500 self.mcmc_sampler.timestep = 2.0 * unit.femtoseconds self.mcmc_sampler.verbose = True self.exen_sampler = ExpandedEnsembleSampler(self.mcmc_sampler, self.thermodynamic_states) self.exen_sampler.verbose = True self.sams_sampler = SAMSSampler(self.exen_sampler) self.sams_sampler.verbose = True
def pdb2xyz(inputfile, outputPrefix, keepIntermediate=False): """pdb2xyz: Transform a pdb file to a goccs compatible xyz file with number of atoms, elements and coordinates into an ouputfile, prefixed with outputPrefix.xyz. If you set keepIntermediate to true then the pdb file written by PDBFixer will be kept in the output folder. """ pdbfixedfilename = outputPrefix + "_fixed.pdb" xyzoutfilename = outputPrefix + ".xyz" fixer = pdbfixer.PDBFixer(inputfile) fixer.removeHeterogens(False) PDBFile.writeFile(fixer.topology, fixer.positions, open(pdbfixedfilename, 'w')) parser = PDB.PDBParser() #parser = PDB.MMCIFParser() #in case it's a cif file structure = parser.get_structure("input", pdbfixedfilename) #print(dir(structure)) natoms = sum(1 for _ in structure.get_atoms()) #print("Writing output") outputhandle = open(xyzoutfilename, "w") outputhandle.write("""%d empty line\n""" % (natoms)) for atom in structure.get_atoms(): element = atom.element coords = atom.get_coord() outputhandle.write("%s %.3f %.3f %.3f\n" % (element, coords[0], coords[1], coords[2])) outputhandle.close() if not keepIntermediate: os.remove(pdbfixedfilename)
def write_pdb(self, path): """ Outputs a PDB file with the current contents of the system """ if self.master is None and self.positions is None: raise ValueError('Topology and positions are needed to write output files.') with open(path, 'w') as f: PDBFile.writeFile(self.topology, self.positions, f)
def write_pdb(self, path): """ Outputs a PDB file with the current contents of the system """ if self.master is None and self.positions is None: raise ValueError( 'Topology and positions are needed to write output files.') with open(path, 'w') as f: PDBFile.writeFile(self.topology, self.positions, f)
def _create_chain_pdb(self, topology_pdb, positions): dirname = os.path.dirname(__file__) filename = "{}.pdb".format(self.sequence_str) if self.forceField_str == 'OPLS-AA': filename = "{}_aa.pdb".format(self.sequence_str) file_path = os.path.join(dirname, "data/{}".format(filename)) if not os.path.isfile(file_path) or self.overwrite_pdb: self.overwrite_pdb = False PDBFile.writeFile(topology_pdb, positions, open(file_path, 'w'))
def writePDBFixed(self): 'Write the fixed (initial) structure to a pdb file.' from simtk.openmm.app import PDBFile PDBFile.writeFile(self._topology, self._positions, open(self.getTitle()[:-8] + 'fixed.pdb', 'w'), keepIds=True)
def fix_pdb(pdb_file): fixer = PDBFixer(filename=pdb_file) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(True) fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) PDBFile.writeFile(fixer.topology, fixer.positions, open(pdb_file, 'w'))
def fix_pdb(self, infile, out=None, pH=7): with open(infile, 'r') as f: fixer = PDBFixer(pdbfile=f) fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(pH=pH) if out is None: out = '{0[0]}{1}{0[1]}'.format(os.path.splitext(infile), '_fixed') with open(out, 'w') as f: PDBFile.writeFile(fixer.topology, fixer.positions, f)
def save_frames_pdb(pdb, start=0, stop=0, step=1, where='./', name='frame-%s'): isfname = isinstance(pdb, PDBFile) nformat = name if not isfname else pdb.split('.pdb') + name nformat = (nformat if '%' in nformat else nformat + '%s') + '.pdb' pdbfile = pdb if isfname else PDBFile(pdb) topology = pdbfile.getTopology() stop = stop if stop else pdbfile.getNumFrames() for idx in range(start, stop, step): positions = pdbfile.getPositions(frame=stop) filepath = normpath(where + '/' + nformat % step) PDBFile.writeFile(topology, positions, filepath)
def minimize_energy(pdb: PDBFile, simulation: Simulation, args: ListOfArgs): if args.MINIMIZE: print('Energy minimizing...') simulation.minimizeEnergy(tolerance=0.01 * simtk.unit.kilojoules_per_mole) if not args.MINIMIZED_FILE: base, _ = os.path.splitext(args.INITIAL_STRUCTURE_PATH) minimized_file_name = f'{base}_min.pdb' else: minimized_file_name = args.MINIMIZED_FILE # TODO: Nasty fix print(f' Saving minimized structure in {minimized_file_name}') state = simulation.context.getState(getPositions=True) PDBFile.writeFile(pdb.topology, state.getPositions(), open(minimized_file_name, 'w'))
def pdbfixerTransform(filename, replace_nonstandard_residues, add_missing_residues, add_missing_atoms): """ Adds missing residues and/or missing atoms to a PDB file. Parameters ---------- filename : str Name of the input PDB file. replace_nonstandard_residues : bool Whether to replace nonstandard residues with their standard equivalents. add_missing_residues : bool Whether to add missing residues. add_missing_atoms : bool Whether to add missing atoms. Returns ------- filename_output : str Absolute path to the modified file. """ if not replace_nonstandard_residues and not add_missing_atoms \ and not add_missing_residues: return _os.path.abspath(filename) fix = _pdbfix.PDBFixer(filename=filename) if replace_nonstandard_residues: fix.findNonstandardResidues() fix.replaceNonstandardResidues() if add_missing_residues: fix.findMissingResidues() else: fix.missingResidues = [] if add_missing_atoms: fix.findMissingAtoms() else: fix.missingAtoms = [] fix.missingTerminals = [] fix.addMissingAtoms() filename_output = _os.path.splitext(filename)[0] + "_pdbfixer.pdb" _PDBFile.writeFile(fix.topology, fix.positions, open(filename_output, "w")) return fixPDBFixerPDB(filename_output, filename, replace_nonstandard_residues, add_missing_residues, add_missing_atoms, filename_output)
def cleanPdb(pdb_list, chain=None, fromFolder=None, toFolder="cleaned_pdbs"): os.system(f"mkdir -p {toFolder}") for pdb_id in pdb_list: # print(chain) pdb = f"{pdb_id.lower()[:4]}" pdbFile = pdb + ".pdb" if fromFolder is None: fromFile = os.path.join("original_pdbs", pdbFile) elif fromFolder[:4] == ".pdb": fromFile = fromFolder else: fromFile = os.path.join(fromFolder, pdbFile) if chain is None: # None mean deafult is chain A unless specified. if len(pdb_id) == 5: Chosen_chain = pdb_id[4].upper() else: assert (len(pdb_id) == 4) Chosen_chain = "A" elif chain == "-1" or chain == -1: Chosen_chain = getAllChains(fromFile) else: Chosen_chain = chain # clean pdb fixer = PDBFixer(filename=fromFile) # remove unwanted chains chains = list(fixer.topology.chains()) chains_to_remove = [ i for i, x in enumerate(chains) if x.id not in Chosen_chain ] fixer.removeChains(chains_to_remove) fixer.findMissingResidues() # add missing residues in the middle of a chain, not ones at the start or end of the chain. chains = list(fixer.topology.chains()) keys = fixer.missingResidues.keys() # print(keys) for key in list(keys): chain_tmp = chains[key[0]] if key[1] == 0 or key[1] == len(list(chain_tmp.residues())): del fixer.missingResidues[key] fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(keepWater=False) fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(toFolder, pdbFile), 'w'))
def download_pdb(pdbid, file_pathway): """ Args: pdbid: 4 letter string specifying the PDB ID of the file yoou want to fix file_pathway: a string containing the pathway specifying how you want to organize the PDB files once written Returns: nothing, but it does write the PDB file ***Note: this function does NOT fix any mistakes with the PDB file """ if not os.path.exists(file_pathway): os.makedirs(file_pathway) fixer = PDBFixer(pdbid=pdbid) PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway, '%s.pdb' % pdbid), 'w'))
def __init__(self, **kwargs): super(AlanineDipeptideExplicitSimulatedTempering, self).__init__(**kwargs) self.description = 'Alanine dipeptide in explicit solvent simulated tempering simulation' # Create topology, positions, and system. from openmmtools.testsystems import AlanineDipeptideExplicit testsystem = AlanineDipeptideExplicit(nonbondedMethod=app.CutoffPeriodic) self.topology = testsystem.topology self.positions = testsystem.positions self.system = testsystem.system # DEBUG: Write PDB from simtk.openmm.app import PDBFile outfile = open('initial.pdb', 'w') PDBFile.writeFile(self.topology, self.positions, outfile) outfile.close() # Add a MonteCarloBarostat temperature = 270 * unit.kelvin # will be replaced as thermodynamic state is updated pressure = 1.0 * unit.atmospheres barostat = openmm.MonteCarloBarostat(pressure, temperature) self.system.addForce(barostat) # Create thermodynamic states. Tmin = 270 * unit.kelvin Tmax = 600 * unit.kelvin ntemps = 256 # number of temperatures from sams import ThermodynamicState temperatures = unit.Quantity(np.logspace(np.log10(Tmin / unit.kelvin), np.log10(Tmax / unit.kelvin), ntemps), unit.kelvin) self.thermodynamic_states = [ ThermodynamicState(system=self.system, temperature=temperature, pressure=pressure) for temperature in temperatures ] # Create SAMS samplers from sams.samplers import SamplerState, MCMCSampler, ExpandedEnsembleSampler, SAMSSampler thermodynamic_state_index = 0 # initial thermodynamic state index thermodynamic_state = self.thermodynamic_states[thermodynamic_state_index] sampler_state = SamplerState(positions=self.positions) self.mcmc_sampler = MCMCSampler(sampler_state=sampler_state, thermodynamic_state=thermodynamic_state, ncfile=self.ncfile) #self.mcmc_sampler.pdbfile = open('output.pdb', 'w') self.mcmc_sampler.topology = self.topology self.mcmc_sampler.nsteps = 500 self.mcmc_sampler.timestep = 2.0 * unit.femtoseconds self.mcmc_sampler.verbose = True self.exen_sampler = ExpandedEnsembleSampler(self.mcmc_sampler, self.thermodynamic_states) self.exen_sampler.verbose = True self.sams_sampler = SAMSSampler(self.exen_sampler) self.sams_sampler.verbose = True
def check_hydrogens(molecule, ID): # Check that Hydrogens are in structure if len(molecule.top.select("name == H")) == 0: # If absent, then add Hydrogens using the Amber99sb force-field try: from simtk.openmm.app import PDBFile, Modeller, ForceField pdb = PDBFile(ID + ".pdb") modeller = Modeller(pdb.topology, pdb.positions) forcefield = ForceField('amber99sb.xml','tip3p.xml') modeller.addHydrogens(forcefield) PDBFile.writeFile(modeller.topology, modeller.positions, open(ID + ".pdb", 'w')) molecule = md.load(ID + ".pdb").remove_solvent() except: warnings.warn("""PDB topology missing Hydrogens. Either manually add or install OpenMM through SIMTK to automatically correct.""") pass return molecule
def test_add_molecules(self): """Test that molecules can be added to template generator after its creation""" # Create a generator that does not know about any molecules generator = self.TEMPLATE_GENERATOR() # Create a ForceField from simtk.openmm.app import ForceField forcefield = ForceField() # Register the template generator forcefield.registerTemplateGenerator(generator.generator) # Check that parameterizing a molecule fails molecule = self.molecules[0] from simtk.openmm.app import NoCutoff try: # This should fail with an exception openmm_topology = molecule.to_topology().to_openmm() system = forcefield.createSystem(openmm_topology, nonbondedMethod=NoCutoff) except ValueError as e: # Exception 'No template found...' is expected assert str(e).startswith('No template found') # Now add the molecule to the generator and ensure parameterization passes generator.add_molecules(molecule) openmm_topology = molecule.to_topology().to_openmm() try: system = forcefield.createSystem(openmm_topology, nonbondedMethod=NoCutoff) except Exception as e: print(forcefield._atomTypes.keys()) from simtk.openmm.app import PDBFile PDBFile.writeFile(openmm_topology, molecule.conformers[0]) raise e assert system.getNumParticles() == molecule.n_atoms # Add multiple molecules, including repeats generator.add_molecules(self.molecules) # Ensure all molecules can be parameterized for molecule in self.molecules: openmm_topology = molecule.to_topology().to_openmm() system = forcefield.createSystem(openmm_topology, nonbondedMethod=NoCutoff) assert system.getNumParticles() == molecule.n_atoms
def run_md_simulation(random_seed, simulation, pdb, args): if args.SIM_RUN_SIMULATION: print("Running simulation...") if args.SIM_SET_INITIAL_VELOCITIES: print(f" Setting up initial velocities at temperature {args.SIM_TEMP}") simulation.context.setVelocitiesToTemperature(args.SIM_TEMP, random_seed) reporting_to_screen_freq = max(1, int(round(args.SIM_N_STEPS / args.REP_STATE_N_SCREEN))) reporting_to_file_freq = max(1, int(round(args.SIM_N_STEPS / args.REP_STATE_N_FILE))) trajectory_freq = max(1, int(round(args.SIM_N_STEPS / args.TRJ_FRAMES))) total_time = args.SIM_N_STEPS * args.SIM_TIME_STEP print(" Number of steps: {} steps".format(args.SIM_N_STEPS)) print(" Time step: {}".format(args.SIM_TIME_STEP)) print(" Temperature: {}".format(args.SIM_TEMP)) print(" Total simulation time: {}".format(total_time.in_units_of(simtk.unit.nanoseconds))) print(" Number of state reads: {} reads".format(args.REP_STATE_N_SCREEN)) print(" State reporting to screen every: {} step".format(reporting_to_screen_freq)) print(" State reporting to file every: {} step".format(reporting_to_file_freq)) print(" Number of trajectory frames: {} frames".format(args.TRJ_FRAMES)) print(" Trajectory frame every: {} step".format(trajectory_freq)) print(" Trajectory frame every: {}".format(trajectory_freq * args.SIM_TIME_STEP)) print(' Random seed:', random_seed) print() if args.TRJ_FILENAME_PDB: simulation.reporters.append(PDBReporter(args.TRJ_FILENAME_PDB, trajectory_freq)) if args.TRJ_FILENAME_DCD: simulation.reporters.append(DCDReporter(args.TRJ_FILENAME_DCD, trajectory_freq)) simulation.reporters.append(StateDataReporter(sys.stdout, reporting_to_screen_freq, step=True, progress=True, potentialEnergy=True, totalSteps=args.SIM_N_STEPS)) if args.REP_STATE_FILE_PATH: simulation.reporters.append(StateDataReporter(args.REP_STATE_FILE_PATH, reporting_to_file_freq, step=True, potentialEnergy=True)) if args.REP_STATE_FILE_H5_PATH: simulation.reporters.append(HDF5Reporter(args.REP_STATE_FILE_H5_PATH, reporting_to_file_freq, velocities=True)) print('Running simulation...') simulation.step(args.SIM_N_STEPS) if args.TRJ_LAST_FRAME_PDB: last_frame_file_name = args.TRJ_LAST_FRAME_PDB state = simulation.context.getState(getPositions=True) PDBFile.writeFile(pdb.topology, state.getPositions(), open(last_frame_file_name, 'w')) if args.REP_PLOT_FILE_NAME: plot_data(args.REP_STATE_FILE_PATH, args.REP_PLOT_FILE_NAME)
def pdbfix_protein(input_pdb_path, output_pdb_path, find_missing_residues=True, keep_water=False, ph=None): """Run PDBFixer on the input PDB file. Heterogen atoms are always removed. Parameters ---------- input_pdb_path : str The PDB to fix. output_pdb_path : str The path to the output PDB file. find_missing_residues : bool, optional If True, PDBFixer will try to model the unresolved residues that appear in the amino acid sequence (default is True). keep_water : bool, optional If True, water molecules are not stripped (default is False). ph : float or None, optional If not None, hydrogen atoms will be added at this pH. """ fixer = PDBFixer(filename=input_pdb_path) if find_missing_residues: fixer.findMissingResidues() else: fixer.missingResidues = {} fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(keep_water) fixer.findMissingAtoms() fixer.addMissingAtoms() if ph is not None: fixer.addMissingHydrogens(ph) # print(fixer.nonstandardResidues) # print(fixer.missingAtoms) # print(fixer.missingTerminals) with open(output_pdb_path, 'w') as f: PDBFile.writeFile(fixer.topology, fixer.positions, f)
def check_hydrogens(molecule, ID): # Check that Hydrogens are in structure if len(molecule.top.select("name == H")) == 0: # If absent, then add Hydrogens using the Amber99sb force-field try: from simtk.openmm.app import PDBFile, Modeller, ForceField pdb = PDBFile(ID + ".pdb") modeller = Modeller(pdb.topology, pdb.positions) forcefield = ForceField('amber99sb.xml', 'tip3p.xml') modeller.addHydrogens(forcefield) PDBFile.writeFile(modeller.topology, modeller.positions, open(ID + ".pdb", 'w')) molecule = md.load(ID + ".pdb").remove_solvent() except: warnings.warn( """PDB topology missing Hydrogens. Either manually add or install OpenMM through SIMTK to automatically correct.""") pass return molecule
def _apply_pdbfix(molecule, pH=7.0, add_hydrogens=False): """ Run PDBFixer to ammend potential issues in PDB format. Parameters ---------- molecule : chimera.Molecule Chimera Molecule object to fix. pH : float, optional Target pH for adding missing hydrogens. add_hydrogens : bool, optional Whether to add missing hydrogens or not. Returns ------- memfile : StringIO An in-memory file with the modified PDB contents """ memfile = StringIO() chimera.pdbWrite([molecule], chimera.Xform(), memfile) chimera.openModels.close([molecule]) memfile.seek(0) fixer = PDBFixer(pdbfile=memfile) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.removeHeterogens(True) if add_hydrogens: fixer.addMissingHydrogens(pH) memfile.close() memfile = StringIO() PDBFile.writeFile(fixer.topology, fixer.positions, memfile) memfile.seek(0) molecule = chimera.openModels.open(memfile, type="PDB", identifyAs=molecule.name) chimera.openModels.remove(molecule) memfile.close() return molecule[0]
def pdb_id_to_mol(pdb_id: str) -> Mol: """Transform PDB ID into rdkit Mol. Parameters ---------- pdb_id: str PDB, e.g. '2244' Returns ------- rdkit_mol: rdkit Mol rdkit Mol. """ fixer = PDBFixer(pdbid=pdb_id) PDBFile.writeFile(fixer.topology, fixer.positions, open('tmp.pdb', 'w')) rdkit_mol = Chem.MolFromPDBFile('tmp.pdb', sanitize=True) os.remove('tmp.pdb') return rdkit_mol
def _via_helper_water(cls, **kwargs): """ Helper function for via_rdkit or via_openeye Returns ------------------ system_pmd : parmed.structure The parameterised system as parmed object """ from pdbfixer import PDBFixer # for solvating fixer = PDBFixer(cls.pdb_filename) if "padding" not in kwargs: fixer.addSolvent(padding=cls.default_padding) else: fixer.addSolvent(padding=float(kwargs["padding"])) tmp_dir = tempfile.mkdtemp() cls.pdb_filename = tempfile.mktemp(suffix=".pdb", dir=tmp_dir) with open(cls.pdb_filename, "w") as f: PDBFile.writeFile(fixer.topology, fixer.positions, f) complex = parmed.load_file(cls.pdb_filename) solvent = complex["(:HOH)"] num_solvent = len(solvent.residues) solvent_pmd = cls.solvent_pmd * num_solvent solvent_pmd.positions = solvent.positions cls.system_pmd = cls.ligand_pmd + solvent_pmd cls.system_pmd.box_vectors = complex.box_vectors try: shutil.rmtree("/".join(cls.pdb_filename.split("/")[:-1])) del cls.ligand_pmd except: pass cls.system_pmd.title = cls.smiles return cls.system_pmd
def fixPDB(pdb, pdbname): """ prepares the PDB structure for simulation/minimization usingn the openMM PDBfixer """ add_hyds = True fixer = pdbfixer.PDBFixer(filename=pdb) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.removeHeterogens(keepWater='keep_crystallographic_water') if add_hyds: fixer.addMissingHydrogens(7.0) # only if we want protons! outfile = open(pdbname, 'w') PDBFile.writeFile(fixer.topology, fixer.positions, outfile) outfile.close()
def add_solvent(pdb_filepath: str, ani_input: dict, pdb_output_filepath: str, box_length: unit.quantity.Quantity = (2.5 * unit.nanometer)): assert (type(box_length) == unit.Quantity) pdb = PDBFixer(filename=pdb_filepath) # Step 0: put the ligand in the center #pdb.positions = np.array(pdb.positions.value_in_unit(unit.nanometer)) + box_length/2 # add water l = box_length.value_in_unit(unit.nanometer) pdb.addSolvent(boxVectors=(Vec3(l, 0.0, 0.0), Vec3(0.0, l, 0.0), Vec3(0.0, 0.0, l))) # Step 1: convert coordinates from standard cartesian coordinate to unit # cell coordinates #inv_cell = 1/box_length #coordinates_cell = np.array(pdb.positions.value_in_unit(unit.nanometer)) * inv_cell # Step 2: wrap cell coordinates into [0, 1) #coordinates_cell -= np.floor(coordinates_cell) # Step 3: convert back to coordinates #coordinates_cell = (coordinates_cell * box_length) * unit.nanometer #pdb.positions = coordinates_cell from simtk.openmm.app import PDBFile PDBFile.writeFile(pdb.topology, pdb.positions, open(pdb_output_filepath, 'w')) atom_list = [] coord_list = [] for atom, coor in zip(pdb.topology.atoms(), pdb.positions): if atom.residue.name != 'HOH': continue atom_list.append(atom.element.symbol) coor = coor.value_in_unit(unit.angstrom) coord_list.append([coor[0], coor[1], coor[2]]) ani_input['solvent_atoms'] = ''.join(atom_list) ani_input['solvent_coords'] = np.array(coord_list) * unit.angstrom ani_input['box_length'] = box_length
def pdbfix(receptor: Optional[str] = None, pdbid: Optional[str] = None, pH: float = 7.0, path: str = '.', **kwargs) -> str: if pdbid: fixer = PDBFixer(pdbid=pdbid) else: fixer = PDBFixer(filename=receptor) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(pH) if receptor: outfile = receptor else: outfile = Path(path)/f'{pdbid}.pdb' PDBFile.writeFile(fixer.topology, fixer.positions, open(outfile, 'w')) return outfile
def report(self, simulation, state): """Generate a report. Parameters ---------- simulation : Simulation The Simulation to generate a report for state : State The current state of the simulation """ if self._tempDCD is None: # Create DCDFile object self._tempDCD = DCDFile(self._tempOut, simulation.topology, simulation.integrator.getStepSize(), simulation.currentStep, self._reportInterval, False) # Save molecules and masses for molecule in simulation.context.getMolecules(): self._reportedMolecules.append(np.array(molecule)) self._reportedMoleculesMass.append( self._getMoleculeMass(simulation.system, molecule)) # Create temporary pdb file PDBFile.writeFile(simulation.topology, state.getPositions(), open(self._tempPDBFileName, 'w')) # Get time and step from simulation self._times.append(state.getTime().value_in_unit(unit.picosecond)) if self._step: self._steps.append(simulation.currentStep) # Write to dcd file self._tempDCD.writeModel( state.getPositions(), periodicBoxVectors=state.getPeriodicBoxVectors())
def _fix(self, atoms): try: from pdbfixer import PDBFixer from simtk.openmm.app import PDBFile except ImportError: raise ImportError( 'Please install PDBFixer and OpenMM in order to use ClustENM.') stream = createStringIO() title = atoms.getTitle() writePDBStream(stream, atoms) stream.seek(0) fixed = PDBFixer(pdbfile=stream) stream.close() fixed.missingResidues = {} fixed.findNonstandardResidues() fixed.replaceNonstandardResidues() fixed.removeHeterogens(False) fixed.findMissingAtoms() fixed.addMissingAtoms() fixed.addMissingHydrogens(self._ph) stream = createStringIO() PDBFile.writeFile(fixed.topology, fixed.positions, stream, keepIds=True) stream.seek(0) self._atoms = parsePDBStream(stream) self._atoms.setTitle(title) stream.close() self._topology = fixed.topology self._positions = fixed.positions
from pdbfixer import PDBFixer from simtk.openmm.app import PDBFile fixer = PDBFixer(pdbid='3UE4') fixer.removeChains(chainIds=['B']) # Without fixer.missingResidues = {}, fixer.addMissingAtoms() throw an exception # and if I call fixer.findMissingResidues() several terminal residues are added fixer.missingResidues = {} fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.removeHeterogens(keepWater=False) #fixer.addMissingHydrogens(7.0) PDBFile.writeFile(fixer.topology, fixer.positions, open('../kinases/abl/3UE4-pdbfixer.pdb', 'w'))
def update_state(self): """ Sample the thermodynamic state. """ # Check that system and topology have same number of atoms. old_system = self.sampler.sampler_state.system old_topology = self.topology old_topology_natoms = sum([1 for atom in old_topology.atoms()]) # number of topology atoms old_system_natoms = old_system.getNumParticles() if old_topology_natoms != old_system_natoms: msg = 'ExpandedEnsembleSampler: topology has %d atoms, while system has %d atoms' % (old_topology_natoms, old_system_natoms) raise Exception(msg) if self.scheme == 'ncmc-geometry-ncmc': if self.verbose: print("Updating chemical state with ncmc-geometry-ncmc scheme...") # DEBUG: Check current topology can be built. try: self.proposal_engine._system_generator.build_system(self.topology) except Exception as e: msg = str(e) msg += '\n' msg += 'ExpandedEnsembleSampler.update_sampler: self.topology before ProposalEngine call cannot be built into a system' raise Exception(msg) # Propose new chemical state. if self.verbose: print("Proposing new topology...") [system, topology, positions] = [self.sampler.thermodynamic_state.system, self.topology, self.sampler.sampler_state.positions] topology_proposal = self.proposal_engine.propose(system, topology) if self.verbose: print("Proposed transformation: %s => %s" % (topology_proposal.old_chemical_state_key, topology_proposal.new_chemical_state_key)) # DEBUG: Check current topology can be built. if self.verbose: print("Generating new system...") try: self.proposal_engine._system_generator.build_system(topology_proposal.new_topology) except Exception as e: msg = str(e) msg += '\n' msg += 'ExpandedEnsembleSampler.update_sampler: toology_proposal.new_topology before ProposalEngine call cannot be built into a system' raise Exception(msg) # Check to make sure no out-of-bounds atoms are present in new_to_old_atom_map natoms_old = topology_proposal.old_system.getNumParticles() natoms_new = topology_proposal.new_system.getNumParticles() if not set(topology_proposal.new_to_old_atom_map.values()).issubset(range(natoms_old)): msg = "Some old atoms in TopologyProposal.new_to_old_atom_map are not in span of old atoms (1..%d):\n" % natoms_old msg += str(topology_proposal.new_to_old_atom_map) raise Exception(msg) if not set(topology_proposal.new_to_old_atom_map.keys()).issubset(range(natoms_new)): msg = "Some new atoms in TopologyProposal.new_to_old_atom_map are not in span of old atoms (1..%d):\n" % natoms_new msg += str(topology_proposal.new_to_old_atom_map) raise Exception(msg) # Determine state keys old_state_key = self.state_key new_state_key = topology_proposal.new_chemical_state_key # Determine log weight old_log_weight = self.get_log_weight(old_state_key) new_log_weight = self.get_log_weight(new_state_key) if self.verbose: print("Performing NCMC annihilation") # Alchemically eliminate atoms being removed. [ncmc_old_positions, ncmc_elimination_logp, potential_delete] = self.ncmc_engine.integrate(topology_proposal, positions, direction='delete') # Check that positions are not NaN if np.any(np.isnan(ncmc_old_positions)): raise Exception("Positions are NaN after NCMC delete with %d steps" % switching_nsteps) if self.verbose: print("Geometry engine proposal...") # Generate coordinates for new atoms and compute probability ratio of old and new probabilities. geometry_old_positions = ncmc_old_positions geometry_new_positions, geometry_logp_propose = self.geometry_engine.propose(topology_proposal, geometry_old_positions, self.sampler.thermodynamic_state.beta) if self.geometry_pdbfile is not None: print("Writing proposed geometry...") #self.geometry_pdbfile.write('MODEL %4d\n' % (self.iteration+1)) # PyMOL doesn't render connectivity correctly this way from simtk.openmm.app import PDBFile PDBFile.writeFile(topology_proposal.new_topology, geometry_new_positions, file=self.geometry_pdbfile) #self.geometry_pdbfile.write('ENDMDL\n') self.geometry_pdbfile.flush() geometry_logp_reverse = self.geometry_engine.logp_reverse(topology_proposal, geometry_new_positions, geometry_old_positions, self.sampler.thermodynamic_state.beta) geometry_logp = geometry_logp_reverse - geometry_logp_propose if self.verbose: print("Performing NCMC insertion") # Alchemically introduce new atoms. [ncmc_new_positions, ncmc_introduction_logp, potential_insert] = self.ncmc_engine.integrate(topology_proposal, geometry_new_positions, direction='insert') # Check that positions are not NaN if np.any(np.isnan(ncmc_new_positions)): raise Exception("Positions are NaN after NCMC insert with %d steps" % switching_nsteps) # Compute change in eliminated potential contribution. switch_logp = - (potential_insert - potential_delete) if self.verbose: print('potential before geometry : %12.3f kT' % potential_delete) print('potential after geometry : %12.3f kT' % potential_insert) print('---------------------------------------------------------') print('switch_logp : %12.3f' % switch_logp) print('geometry_logp_propose : %12.3f' % geometry_logp_propose) print('geometry_logp_reverse : %12.3f' % geometry_logp_reverse) # Compute total log acceptance probability, including all components. logp_accept = topology_proposal.logp_proposal + geometry_logp + switch_logp + ncmc_elimination_logp + ncmc_introduction_logp + new_log_weight - old_log_weight if self.verbose: print("logp_accept = %+10.4e [logp_proposal %+10.4e geometry_logp %+10.4e switch_logp %+10.4e ncmc_elimination_logp %+10.4e ncmc_introduction_logp %+10.4e old_log_weight %+10.4e new_log_weight %+10.4e]" % (logp_accept, topology_proposal.logp_proposal, geometry_logp, switch_logp, ncmc_elimination_logp, ncmc_introduction_logp, old_log_weight, new_log_weight)) # Accept or reject. if np.isnan(logp_accept): accept = False print('logp_accept = NaN') else: accept = ((logp_accept>=0.0) or (np.random.uniform() < np.exp(logp_accept))) if self.accept_everything: print('accept_everything option is turned on; accepting') accept = True if accept: self.sampler.thermodynamic_state.system = topology_proposal.new_system self.sampler.sampler_state.system = topology_proposal.new_system self.topology = topology_proposal.new_topology self.sampler.sampler_state.positions = ncmc_new_positions self.state_key = topology_proposal.new_chemical_state_key self.naccepted += 1 if self.verbose: print(" accepted") else: self.nrejected += 1 if self.verbose: print(" rejected") else: raise Exception("Expanded ensemble state proposal scheme '%s' unsupported" % self.scheme) # Update statistics. self.update_statistics()
mcmc_sampler = MCMCSampler(sampler_state=sampler_state, thermodynamic_state=thermodynamic_state, ncfile=ncfile, platform=platform) mcmc_sampler.timestep = timestep mcmc_sampler.nsteps = 500 #mcmc_sampler.pdbfile = open('output.pdb', 'w') # uncomment this if you want to write a PDB trajectory as you simulate; WARNING: LARGE! mcmc_sampler.topology = topology mcmc_sampler.verbose = True exen_sampler = ExpandedEnsembleSampler(mcmc_sampler, thermodynamic_states) exen_sampler.verbose = True sams_sampler = SAMSSampler(exen_sampler) sams_sampler.verbose = True # DEBUG: Write PDB of initial frame print("Writing initial frame to 'initial.pdb'...") from simtk.openmm.app import PDBFile outfile = open('initial.pdb', 'w') PDBFile.writeFile(topology, positions, outfile) outfile.close() # Run the simulation print('Running simulation...') #exen_sampler.update_scheme = 'restricted-range' # scheme for deciding which alchemical state to jump to exen_sampler.update_scheme = 'global-jump' # scheme for deciding which alchemical state to jump to #exen_sampler.locality = thermodynamic_state_neighbors # neighbors to examine for each state sams_sampler.update_method = 'rao-blackwellized' # scheme for updating free energy estimates niterations = 20000 # number of iterations to run sams_sampler.run(niterations) # run sampler ncfile.close() # Analyze from sams import analysis # States
def __init__(self, alchemical_protocol='two-phase', nlambda=50, **kwargs): """ Create an alchemical free energy calculation SAMS test system from the provided system. Parameters ---------- alchemical_protocol : str, optional, default='two-phase' Alchemical protocol scheme to use. ['two-phase', 'fused'] nlambda : int, optional, default=50 Number of alchemical states. """ super(AlchemicalSAMSTestSystem, self).__init__(**kwargs) self.description = 'Alchemical SAMS test system' self.alchemical_protocol = alchemical_protocol if not (hasattr(self, 'topology') and hasattr(self, 'system') and hasattr(self, 'positions') and hasattr(self, 'alchemical_atoms')): raise Exception("%s: 'topology', 'system', 'positions', and 'alchemical_atoms' properties must be defined!" % self.__class__.__name__) if not hasattr(self, 'temperature'): self.temperature = 300 * unit.kelvin if not hasattr(self, 'temperature'): self.temperature = 300 * unit.kelvin if not hasattr(self, 'pressure'): self.pressure = None # Add a MonteCarloBarostat if system does not have one has_barostat = False for force in self.system.getForces(): if force.__class__.__name__ in ['MonteCarloBarostat', 'MonteCarloAnisotropicBarostat']: has_barostat = True if (self.pressure is not None) and (not has_barostat): barostat = openmm.MonteCarloBarostat(self.pressure, self.temperature) self.system.addForce(barostat) # Create alchemically-modified system and populate thermodynamic states. from alchemy import AbsoluteAlchemicalFactory from sams import ThermodynamicState self.thermodynamic_states = list() if alchemical_protocol == 'fused': factory = AbsoluteAlchemicalFactory(self.system, ligand_atoms=self.alchemical_atoms, annihilate_electrostatics=True, annihilate_sterics=False) self.system = factory.createPerturbedSystem() from sams import ThermodynamicState alchemical_lambdas = np.linspace(1.0, 0.0, nlambda) for alchemical_lambda in alchemical_lambdas: parameters = {'lambda_sterics' : alchemical_lambda, 'lambda_electrostatics' : alchemical_lambda} self.thermodynamic_states.append( ThermodynamicState(system=self.system, temperature=self.temperature, pressure=self.pressure, parameters=parameters) ) elif alchemical_protocol == 'two-phase': factory = AbsoluteAlchemicalFactory(self.system, ligand_atoms=self.alchemical_atoms, annihilate_electrostatics=True, annihilate_sterics=False, softcore_beta=0.0) # turn off softcore electrostatics self.system = factory.createPerturbedSystem() nelec = int(nlambda/2.0) nvdw = nlambda - nelec for state in range(nelec+1): parameters = {'lambda_sterics' : 1.0, 'lambda_electrostatics' : (1.0 - float(state)/float(nelec)) } self.thermodynamic_states.append( ThermodynamicState(system=self.system, temperature=self.temperature, pressure=self.pressure, parameters=parameters) ) for state in range(1,nvdw+1): parameters = {'lambda_sterics' : (1.0 - float(state)/float(nvdw)), 'lambda_electrostatics' : 0.0 } self.thermodynamic_states.append( ThermodynamicState(system=self.system, temperature=self.temperature, pressure=self.pressure, parameters=parameters) ) else: raise Exception("'alchemical_protocol' must be one of ['two-phase', 'fused']; scheme '%s' unknown." % alchemical_protocol) # Create SAMS samplers print('Setting up samplers...') from sams.samplers import SamplerState, MCMCSampler, ExpandedEnsembleSampler, SAMSSampler thermodynamic_state_index = 0 # initial thermodynamic state index thermodynamic_state = self.thermodynamic_states[thermodynamic_state_index] sampler_state = SamplerState(positions=self.positions) self.mcmc_sampler = MCMCSampler(sampler_state=sampler_state, thermodynamic_state=thermodynamic_state, ncfile=self.ncfile) self.mcmc_sampler.timestep = 2.0 * unit.femtoseconds self.mcmc_sampler.nsteps = 500 #self.mcmc_sampler.pdbfile = open('output.pdb', 'w') self.mcmc_sampler.topology = self.topology self.mcmc_sampler.verbose = True self.exen_sampler = ExpandedEnsembleSampler(self.mcmc_sampler, self.thermodynamic_states) self.exen_sampler.verbose = True self.sams_sampler = SAMSSampler(self.exen_sampler) self.sams_sampler.verbose = True # DEBUG: Write PDB of initial frame from simtk.openmm.app import PDBFile outfile = open('initial.pdb', 'w') PDBFile.writeFile(self.topology, self.positions, outfile) outfile.close()
from pdbfixer import PDBFixer from simtk.openmm.app import PDBFile fixer = PDBFixer(filename='3UE4.pdb') fixer.removeChains(chainIds=['B']) # Without fixer.missingResidues = {}, fixer.addMissingAtoms() throw an exception # and if I call fixer.findMissingResidues() several terminal residues are added fixer.missingResidues = {} fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.removeHeterogens(keepWater=False) fixer.addMissingHydrogens(7.0) PDBFile.writeFile(fixer.topology, fixer.positions, open('3UE4-pdbfixer.pdb', 'w'))
def __init__(self, **kwargs): super(LoopSoftening, self).__init__(**kwargs) self.description = 'Alchemical Loop Softening script' padding = 9.0*unit.angstrom explicit_solvent_model = 'tip3p' setup_path = 'data/mtor' # Create topology, positions, and system. from pkg_resources import resource_filename gaff_xml_filename = resource_filename('sams', 'data/gaff.xml') system_generators = dict() ffxmls = [gaff_xml_filename, 'amber99sbildn.xml', 'tip3p.xml'] forcefield_kwargs={ 'nonbondedMethod' : app.CutoffPeriodic, 'nonbondedCutoff' : 9.0 * unit.angstrom, 'implicitSolvent' : None, 'constraints' : app.HBonds, 'rigidWater' : True } # Load topologies and positions for all components print('Creating mTOR test system...') forcefield = app.ForceField(*ffxmls) from simtk.openmm.app import PDBFile, Modeller pdb_filename = resource_filename('sams', os.path.join(setup_path, 'mtor_pdbfixer_apo.pdb')) pdbfile = PDBFile(pdb_filename) modeller = app.Modeller(pdbfile.topology, pdbfile.positions) print('Adding solvent...') modeller.addSolvent(forcefield, model=explicit_solvent_model, padding=padding) self.topology = modeller.getTopology() self.positions = modeller.getPositions() print('Creating system...') self.system = forcefield.createSystem(self.topology, **forcefield_kwargs) # DEBUG: Write PDB outfile = open('initial.pdb', 'w') PDBFile.writeFile(self.topology, self.positions, outfile) outfile.close() # Atom Selection using MDtraj res_pairs = [[403, 483], [1052, 1109]] t = md.load(pdb_filename) alchemical_atoms = set() for x in res_pairs: start = min(t.top.select('residue %s' % min(x))) end = max(t.top.select('residue %s' % max(x))) + 1 alchemical_atoms.union(set(range(start, end))) # Create thermodynamic states. print('Creating alchemically-modified system...') temperature = 300 * unit.kelvin pressure = 1.0 * unit.atmospheres from alchemy import AbsoluteAlchemicalFactory factory = AbsoluteAlchemicalFactory(self.system, ligand_atoms=alchemical_atoms, annihilate_electrostatics=True, alchemical_torsions=True, annihilate_sterics=True, softcore_beta=0.0) # turn off softcore electrostatics self.system = factory.createPerturbedSystem() print('Setting up alchemical intermediates...') from sams import ThermodynamicState self.thermodynamic_states = list() for state in range(26): parameters = {'lambda_sterics' : 1.0, 'lambda_electrostatics' : (1.0 - float(state)/25.0) } self.thermodynamic_states.append( ThermodynamicState(system=self.system, temperature=temperature, parameters=parameters) ) for state in range(1,26): parameters = {'lambda_sterics' : (1.0 - float(state)/25.0), 'lambda_electrostatics' : 0.0 } self.thermodynamic_states.append( ThermodynamicState(system=self.system, temperature=temperature, parameters=parameters) ) #minimize(self.system, self.positions) minimize(self.system) # Create SAMS samplers print('Setting up samplers...') from sams.samplers import SamplerState, MCMCSampler, ExpandedEnsembleSampler, SAMSSampler thermodynamic_state_index = 0 # initial thermodynamic state index thermodynamic_state = self.thermodynamic_states[thermodynamic_state_index] sampler_state = SamplerState(positions=self.system.positions) self.mcmc_sampler = MCMCSampler(sampler_state=sampler_state, thermodynamic_state=thermodynamic_state, ncfile=self.ncfile) self.mcmc_sampler.pdbfile = open('output.pdb', 'w') self.mcmc_sampler.topology = self.topology self.mcmc_sampler.verbose = True self.exen_sampler = ExpandedEnsembleSampler(self.mcmc_sampler, self.thermodynamic_states) self.exen_sampler.verbose = True self.sams_sampler = SAMSSampler(self.exen_sampler) self.sams_sampler.verbose = True
def run(options): fixer = PDBFixer(options['pdb']) fixer.addMissingHydrogens(7.0) fixer.addSolvent(boxSize=Vec3(2.62,2.62,2.62)*nanometers, padding=None, positiveIon='Na+', negativeIon='Cl-', ionicStrength=0.0*molar) PDBFile.writeFile(fixer.topology, fixer.positions, open(options['outfile'], 'w'))
fixer.findMissingResidues() # only add missing residues in the middle of the chain, do not add terminal ones chains = list(fixer.topology.chains()) keys = fixer.missingResidues.keys() missingResidues = dict() for key in keys: chain = chains[key[0]] if not (key[1] == 0 or key[1] == len(list(chain.residues()))): missingResidues[key] = fixer.missingResidues[key] fixer.missingResidues = missingResidues fixer.findMissingAtoms() fixer.addMissingAtoms() PDBFile.writeFile(fixer.topology, fixer.positions, open('4h12_fixed.pdb', 'w')) # keep only protein and zinc ions traj = md.load('4h12_fixed.pdb') traj = traj.atom_slice(traj.top.select('(protein and not resname SAH) or resname ZN')) # implement changes necessary for the use of the dummy atom Zn2+ model # change residue name of the zincs from ZN to ZNB, and atom names from ZN to Zn for residue in traj.top.chain(1).residues: residue.name = 'ZNB' for atom in traj.top.chain(1).atoms: atom.name = 'Zn' # change name of cysteines coordinating zincs to CYM (deprotonated cysteine) for residue in traj.top.chain(0).residues: if residue.index in [86, 92, 82, 69, 54, 52, 73, 184, 233, 238, 231]:
def build_pdb(sequence, filename, n_cap=None, c_cap=None, pH=7.0): """Build a PDB from a sequence and save to disk. Parameters ---------- sequence : str String representation of protein sequence as 1 letter codes. filename : str name of output filename n_cap : str, optional, default=None Either None or "ACE" c_cap : str, optional, default=None Either None, "NME", or "NH2" pH : float, optional, default=7.0 pH to use when building amino acids. """ chain = pmx.Chain().create(sequence) if c_cap is not None: chain.add_cterm_cap() if n_cap is not None: chain.add_nterm_cap() temp_file = tempfile.NamedTemporaryFile(suffix=".pdb") temp_file.close chain.write(temp_file.name) # Now fix errors in element entries in CAP atoms # Also convert traj = mdtraj.load(temp_file.name) top, bonds = traj.top.to_dataframe() if n_cap == "ACE": ind = np.where((top.name == "H3")&(top.resName == "ACE"))[0][0] top.element.ix[ind] = "H" if c_cap in ["NME", "NH2"]: ind = np.where((top.name == "H3")&(top.resName == "NME"))[0][0] top.element.ix[ind] = "H" if c_cap == "NH2": # Keep all atoms except the 3 NME methyl protons keep_ind = np.where((top.resName != "NME") | ((top.name != "H1") & (top.name != "H2") & (top.name != "H3")))[0] #Convert the NME carbon into a proton convert_ind = np.where((top.resName == "NME") & (top.name == "C"))[0][0] top.element.ix[convert_ind] = "H" top.name.ix[convert_ind] = "HN2" convert_ind = np.where((top.resName == "NME") & (top.name == "H"))[0][0] top.name.ix[convert_ind] = "HN1" top.resName.ix[np.where((top.resName == "NME"))[0]] = "NH2" traj._topology = mdtraj.Topology.from_dataframe(top, bonds) traj.restrict_atoms(keep_ind) top, bonds = traj.top.to_dataframe() if n_cap or c_cap: traj._topology = mdtraj.Topology.from_dataframe(top, bonds) traj.save(temp_file.name) # Save output with fixed element names in caps. # Now fix missing charged termini. #structure = pdbfixer.pdbfixer.PdbStructure(open(temp_file.name)) fixer = pdbfixer.pdbfixer.PDBFixer(temp_file.name) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(pH) PDBFile.writeFile(fixer.topology, fixer.positions, open(filename, 'w'))