output_filename = '%s-pdbfixer.pdb' % pdbid print('Writing PDB file to "%s"...' % output_filename) app.PDBFile.writeFile(fixer.topology, fixer.positions, open(output_filename, 'w')) # Create OpenMM System. print('Creating OpenMM system...') system = forcefield.createSystem(fixer.topology, nonbondedMethod=nonbondedMethod, constraints=constraints, rigidWater=True, removeCMMotion=False) # Minimimze to update positions. print('Minimizing...') integrator = openmm.VerletIntegrator(1.0 * unit.femtosecond) context = openmm.Context(system, integrator) context.setPositions(fixer.positions) openmm.LocalEnergyMinimizer.minimize(context) state = context.getState(getPositions=True) fixer.positions = state.getPositions() # Write final coordinates. output_filename = '%s-minimized.pdb' % pdbid print('Writing PDB file to "%s"...' % output_filename) app.PDBFile.writeFile(fixer.topology, fixer.positions, open(output_filename, 'w')) # Serialize final coordinates. print('Serializing to XML...') system_filename = 'system.xml' integrator_filename = 'integrator.xml' state_filename = 'state.xml' write_file(system_filename, openmm.XmlSerializer.serialize(system)) write_file(integrator_filename, openmm.XmlSerializer.serialize(integrator)) state = context.getState(getPositions=True, getVelocities=True, getForces=True, getEnergy=True, getParameters=True, enforcePeriodicBox=True) write_file(state_filename, openmm.XmlSerializer.serialize(state))
def add_droplet( self, topology: md.Topology, coordinates: unit.quantity.Quantity, diameter: unit.quantity.Quantity = (30.0 * unit.angstrom), restrain_hydrogen_bonds: bool = True, restrain_hydrogen_angles: bool = False, top_file: str = "", ) -> md.Trajectory: """ Adding a droplet with a given diameter around a small molecule. Parameters ---------- topology: md.Topology topology of the molecule coordinates: np.array, unit'd diameter: float, unit'd top_file: str if top_file is provided the final droplet pdb is either kept and can be reused or if top_file already exists it will be used to create the same droplet. Returns ---------- A mdtraj.Trajectory object with the ligand centered in the solvent for inspection. """ assert type(diameter) == unit.Quantity assert type(topology) == md.Topology assert type(coordinates) == unit.Quantity if restrain_hydrogen_bonds: logger.debug("Hydrogen bonds are restraint.") if restrain_hydrogen_angles: logger.warning("HOH angles are restraint.") # get topology from mdtraj to PDBfixer via pdb file radius = diameter.value_in_unit(unit.angstrom) / 2 center = np.array([radius, radius, radius]) # if no solvated pdb file is provided generate one if top_file: # read in the file with the defined droplet pdb_filepath = top_file else: # generage a one time droplet pdb_filepath = f"tmp{random.randint(1,10000000)}.pdb" if not os.path.exists(pdb_filepath): logger.info(f"Generating droplet for {pdb_filepath}...") # mdtraj works with nanomter md.Trajectory(coordinates.value_in_unit(unit.nanometer), topology).save_pdb(pdb_filepath) pdb = PDBFixer(filename=pdb_filepath) os.remove(pdb_filepath) # put the ligand in the center l_in_nanometer = diameter.value_in_unit(unit.nanometer) pdb.positions = np.array( pdb.positions.value_in_unit( unit.nanometer)) + (l_in_nanometer / 2) # add water pdb.addSolvent(boxVectors=( Vec3(l_in_nanometer, 0.0, 0.0), Vec3(0.0, l_in_nanometer, 0.0), Vec3(0.0, 0.0, l_in_nanometer), )) # get topology from PDBFixer to mdtraj # NOTE: a second tmpfile - not happy about this from simtk.openmm.app import PDBFile PDBFile.writeFile(pdb.topology, pdb.positions, open(pdb_filepath, "w")) # load pdb in parmed logger.debug("Load with parmed ...") structure = pm.load_file(pdb_filepath) os.remove(pdb_filepath) # search for residues that are outside of the cutoff and delete them to_delete = [] logger.debug("Flag residues ...") for residue in structure.residues: for atom in residue: p1 = np.array([atom.xx, atom.xy, atom.xz]) p2 = center squared_dist = np.sum((p1 - p2)**2, axis=0) dist = np.sqrt(squared_dist) if ( dist > radius + 1 ): # NOTE: distance must be greater than radius + 1 Angstrom to_delete.append(residue) # only delete water molecules for residue in list(set(to_delete)): if residue.name == "HOH": logger.debug(f"Remove: {residue}") structure.residues.remove(residue) else: logger.warning( f"Residue {residue} reaches outside the droplet") print(f"Residue {residue} reaches outside the droplet") structure.write_pdb(pdb_filepath) # load pdb with mdtraj traj = md.load(pdb_filepath) if not top_file: os.remove(pdb_filepath) # set coordinates #NOTE: note the xyz[0] self._ligand_in_water_coordinates = traj.xyz[0] * unit.nanometer # generate atom string atom_list = [] for atom in traj.topology.atoms: atom_list.append(atom.element.symbol) # set atom string self.ligand_in_water_atoms = "".join(atom_list) # set mdtraj topology self.ligand_in_water_topology = traj.topology # set FlattBottomRestraintToCenter on each oxygen self.solvent_restraints = [] for residue in traj.topology.residues: if residue.is_water: for atom in residue.atoms: if str(atom.element.symbol) == "O": self.solvent_restraints.append( CenterFlatBottomRestraint( sigma=0.1 * unit.angstrom, point=center * unit.angstrom, radius=(diameter / 2), atom_idx=atom.index, active_at=-1, )) logger.debug("Adding restraint to center to {}".format( atom.index)) if restrain_hydrogen_bonds or restrain_hydrogen_angles: for residue in traj.topology.residues: if residue.is_water: oxygen_idx = -1 hydrogen_idxs = [] for atom in residue.atoms: if str(atom.element.symbol) == "O": oxygen_idx = atom.index elif str(atom.element.symbol) == "H": hydrogen_idxs.append(atom.index) else: raise RuntimeError( "Water should only consist of O and H atoms.") if restrain_hydrogen_bonds: self.solvent_restraints.append( BondFlatBottomRestraint( sigma=0.2 * unit.angstrom, atom_i_idx=oxygen_idx, atom_j_idx=hydrogen_idxs[0], atoms=self.ligand_in_water_atoms, )) self.solvent_restraints.append( BondFlatBottomRestraint( sigma=0.2 * unit.angstrom, atom_i_idx=oxygen_idx, atom_j_idx=hydrogen_idxs[1], atoms=self.ligand_in_water_atoms, )) if restrain_hydrogen_angles: self.solvent_restraints.append( AngleHarmonicRestraint( sigma=0.1 * unit.radian, atom_i_idx=hydrogen_idxs[0], atom_j_idx=oxygen_idx, atom_k_idx=hydrogen_idxs[1], )) # return a mdtraj object for visual check return md.Trajectory( self._ligand_in_water_coordinates.value_in_unit(unit.nanometer), self.ligand_in_water_topology, )
# This is basically the pdbfixer code, but without the amber lines. # modeller = Modeller(fixer.topology, fixer.positions) forcefield = ForceField('amber99sb.xml', 'tip5p.xml') system = forcefield.createSystem(fixer.topology, nonbondedMethod=PME, nonbondedCutoff=0.05 * nanometer, constraints=HBonds) modeller.addSolvent(forcefield, padding=0.05 * nanometer, boxSize=None, boxVectors=None) #modeller.addSolvent(forcefield, padding=0.4*nanometer, boxSize, boxVectors=boxVectors, model='tip5p') # modeller.addSolvent(forcefield, padding=padding, boxSize=boxSize, boxVectors=boxVectors, positiveIon=positiveIon, negativeIon=negativeIon, ionicStrength=ionicStrength) fixer.topology = modeller.topology fixer.positions = modeller.positions proatoms = [atom.element._symbol for atom in modeller.topology.atoms()] procoords = np.array( [fixer.positions[atom.index]._value for atom in modeller.topology.atoms()]) def WriteXYZfile(atoms, coords, nm_="out.xyz"): natom = len(atoms) f = open(nm_, "w") f.write(str(natom) + "\n" + "\n") for i in range(natom): f.write(atoms[i] + " " + str(coords[i][0]) + " " + str(coords[i][1]) + " " + str(coords[i][2]) + "\n")
# Create OpenMM System. print('Creating OpenMM system...') system = forcefield.createSystem(fixer.topology, nonbondedMethod=nonbondedMethod, constraints=constraints, rigidWater=True, removeCMMotion=False) # Minimimze to update positions. print('Minimizing...') integrator = openmm.VerletIntegrator(1.0 * unit.femtosecond) context = openmm.Context(system, integrator) context.setPositions(fixer.positions) openmm.LocalEnergyMinimizer.minimize(context) state = context.getState(getPositions=True) fixer.positions = state.getPositions() # Write final coordinates. output_filename = '%s-minimized.pdb' % pdbid print('Writing PDB file to "%s"...' % output_filename) app.PDBFile.writeFile(fixer.topology, fixer.positions, open(output_filename, 'w')) # Serialize final coordinates. print('Serializing to XML...') system_filename = 'system.xml' integrator_filename = 'integrator.xml' state_filename = 'state.xml' write_file(system_filename, openmm.XmlSerializer.serialize(system)) write_file(integrator_filename, openmm.XmlSerializer.serialize(integrator)) state = context.getState(getPositions=True,
def hydrate(system, opt): """ This function solvates the system by using PDBFixer Parameters: ----------- system: OEMol molecule The system to solvate opt: python dictionary The parameters used to solvate the system Return: ------- oe_mol: OEMol The solvated system """ def BoundingBox(molecule): """ This function calculates the Bounding Box of the passed molecule molecule: OEMol return: bb (numpy array) the calculated bounding box is returned as numpy array: [(xmin,ymin,zmin), (xmax,ymax,zmax)] """ coords = [v for k, v in molecule.GetCoords().items()] np_coords = np.array(coords) min_coord = np_coords.min(axis=0) max_coord = np_coords.max(axis=0) bb = np.array([min_coord, max_coord]) return bb # Create a system copy sol_system = system.CreateCopy() # Calculate system BoundingBox (Angstrom units) BB = BoundingBox(sol_system) # Estimation of the box cube length in A box_edge = 2.0 * opt['solvent_padding'] + np.max(BB[1] - BB[0]) # BB center xc = (BB[0][0]+BB[1][0])/2. yc = (BB[0][1]+BB[1][1])/2. zc = (BB[0][2]+BB[1][2])/2. delta = np.array([box_edge/2., box_edge/2., box_edge/2.]) - np.array([xc, yc, zc]) sys_coord_dic = {k: (v+delta) for k, v in sol_system.GetCoords().items()} sol_system.SetCoords(sys_coord_dic) # Load a fake system to initialize PDBfixer filename = resource_filename('pdbfixer', 'tests/data/test.pdb') fixer = PDBFixer(filename=filename) # Convert between OE and OpenMM topology omm_top, omm_pos = oeommutils.oemol_to_openmmTop(sol_system) chain_names = [] for chain in omm_top.chains(): chain_names.append(chain.id) # Set the correct topology to the fake system fixer.topology = omm_top fixer.positions = omm_pos # Solvate the system fixer.addSolvent(padding=unit.Quantity(opt['solvent_padding'], unit.angstroms), ionicStrength=unit.Quantity(opt['salt_concentration'], unit.millimolar)) # The OpenMM topology produced by the solvation fixer has missing bond # orders and aromaticity. The following section is creating a new openmm # topology made of just water molecules and ions. The new topology is then # converted in an OEMol and added to the passed molecule to produce the # solvated system wat_ion_top = app.Topology() # Atom dictionary between the the PDBfixer topology and the water_ion topology fixer_atom_to_wat_ion_atom = {} for chain in fixer.topology.chains(): if chain.id not in chain_names: n_chain = wat_ion_top.addChain(chain.id) for res in chain.residues(): n_res = wat_ion_top.addResidue(res.name, n_chain) for at in res.atoms(): n_at = wat_ion_top.addAtom(at.name, at.element, n_res) fixer_atom_to_wat_ion_atom[at] = n_at for bond in fixer.topology.bonds(): at0 = bond[0] at1 = bond[1] try: wat_ion_top.addBond(fixer_atom_to_wat_ion_atom[at0], fixer_atom_to_wat_ion_atom[at1], type=None, order=1) except: pass wat_ion_pos = fixer.positions[len(omm_pos):] oe_mol = oeommutils.openmmTop_to_oemol(wat_ion_top, wat_ion_pos) # Setting the box vectors omm_box_vectors = fixer.topology.getPeriodicBoxVectors() box_vectors = utils.PackageOEMol.encodePyObj(omm_box_vectors) oe_mol.SetData(oechem.OEGetTag('box_vectors'), box_vectors) oechem.OEAddMols(oe_mol, sol_system) return oe_mol
def process_pdb(path, corr_path, chain_id, max_atoms, gsd_file, embedding_dicts, NN, nlist_model, keep_residues=[-1, 1], debug=False, units=unit.nanometer, frame_number=3, model_index=0, log_file=None, shiftx_style=False): global MA_LOST_FRAGS if shiftx_style: frame_number = 1 # load pdb pdb = app.PDBFile(path) # load cs sets peak_data, sequence_map, peak_seq = process_corr(corr_path, debug, shiftx_style) result = [] # check for weird/null chain if chain_id == '_': chain_id = list(pdb.topology.residues())[0].chain.id[0] # sometimes chains have extra characters (why?) residues = list( filter(lambda r: r.chain.id[0] == chain_id, pdb.topology.residues())) if len(residues) == 0: if debug: raise ValueError('Failed to find requested chain ', chain_id) pdb_offset, seq_offset = None, None # from pdb residue index to our aligned residue index residue_lookup = {} # bonded neighbor mask nlist_mask = None peak_count = 0 # select a random set of frames for generating data without replacement frame_choices = random.sample(range(0, pdb.getNumFrames()), k=min(pdb.getNumFrames(), frame_number)) for fi in frame_choices: peak_successes = set() # clean up individual frame frame = pdb.getPositions(frame=fi) # have to fix at each frame since inserted atoms may change # fix missing residues/atoms fixer = PDBFixer(filename=path) # overwrite positions with frame positions fixer.positions = frame # we want to add missing atoms, # but not replace missing residue. We'd # rather just ignore those fixer.findMissingResidues() # remove the missing residues fixer.missingResidues = [] # remove water! fixer.removeHeterogens(False) if not shiftx_style: fixer.findMissingAtoms() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) # get new positions frame = fixer.positions num_atoms = len(frame) # remake residue list each time so they have correct atom ids residues = list( filter(lambda r: r.chain.id[0] == chain_id, fixer.topology.residues())) if num_atoms > 20000: MA_LOST_FRAGS += len(residues) if debug: print( 'Exceeded number of atoms for building nlist (change this if you have big GPU memory) in frame {} in pdb {}' .format(fi, path)) break # check alignment once if pdb_offset is None: # create sequence from residues pdb_seq = ['XXX'] * max([int(r.id) + 1 for r in residues]) for r in residues: rid = int(r.id) if rid >= 0: pdb_seq[int(r.id)] = r.name if debug: print('pdb_seq', pdb_seq) print('peak_seq', peak_seq) pdb_offset, seq_offset = align(pdb_seq, peak_seq, debug) #TOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOODDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDOOOOOOOOOOOOOOOOOOOOOOO????? # Maybe it's ok pdb_offset = 0 if debug: print('pdb_offset', pdb_offset) print('seq_offset', seq_offset) print(sequence_map) # now check alignment - rarely perfect saw_one = False aligned = 0 for i in range(len(residues)): segid = int(residues[i].id) + pdb_offset saw_one = pdb_seq[segid] == residues[i].name if not saw_one: print('Mismatch (A) at position {} ({}). {} != {}'. format(segid, residues[i].id, pdb_seq[segid], residues[i].name)) continue if segid + seq_offset in sequence_map: peakid = sequence_map[segid + seq_offset] print(segid, segid + seq_offset, len(pdb_seq), len(peak_seq)) saw_one = pdb_seq[segid] == peak_seq[segid + seq_offset] if not saw_one: print( 'Mismatch (B) at position {}. pdb seq: {}, peak seq: {}' .format(segid, peak_seq[segid + seq_offset], pdb_seq[peakid])) continue saw_one = peak_data[peakid]['name'] == residues[i].name if not saw_one: print( 'Mismatch (C) at position {}. peak seq: {}, peak data: {}, residue: {}' .format(segid, i, peak_seq[segid + seq_offset], peak_data[peakid]['name'], residues[i].name)) continue aligned += 1 if aligned < 5: raise ValueError( 'Could not find more than 5 aligned residues, very unusual' ) # create resiud look-up from atom index for i, r in enumerate(residues): for a in r.atoms(): residue_lookup[a.index] = i # This alignment will be checked as we compare shifts against the pdb # get neighbor list for frame np_pos = np.array([v.value_in_unit(units) for v in frame]) frame_nlist = nlist_model(np_pos) for ri in range(len(residues)): # we build up fragment by getting residues around us, both in chain # and those within a certain distance of us rmin = max(0, ri + keep_residues[0]) # have to +1 here (and not in range) to get min to work :) rmax = min(len(residues), ri + keep_residues[1] + 1) # do we have any residues to consider? success = rmax - rmin > 0 consider = set(range(rmin, rmax)) # Used to indicate an atom should be included from a different residue marked = [False for _ in range(len(frame))] # now grab spatial neighbor residues # NOTE: I checked this by hand a lot # Believe this code. for a in residues[ri].atoms(): for ni in range(NN): j = int(frame_nlist[a.index, ni, 1]) try: consider.add(residue_lookup[j]) marked[j] = True except KeyError as e: success = False if debug: print( 'Neighboring residue in different chain, skipping' ) break atoms = np.zeros((max_atoms), dtype=np.int64) # we will put dummy atom at end to keep bond counts the same by bonding to it # Z-DISABLED #atoms[-1] = embedding_dicts['atom']['Z'] mask = np.zeros((max_atoms), dtype=np.float) bonds = np.zeros((max_atoms, max_atoms), dtype=np.int64) # nlist: # :,:,0 -> distance # :,:,1 -> neighbor index # :,:,2 -> bond count nlist = np.zeros((max_atoms, NEIGHBOR_NUMBER, 3), dtype=np.float) positions = np.zeros((max_atoms, 3), dtype=np.float) peaks = np.zeros((max_atoms), dtype=np.float) names = np.zeros((max_atoms), dtype=np.int64) # going from pdb atom index to index in these data structures rmap = dict() index = 0 # check our two conditions that could have made this false: there are residues and # we didn't have off-chain spatial neighboring residues if not success: continue for rj in consider: residue = residues[rj] # use the alignment result to get offset segid = int(residue.id) + pdb_offset if segid + seq_offset not in sequence_map: if debug: print('Could not find residue index', rj, ': ', residue, 'in the sequence map. Its index is', segid + seq_offset, 'ri: ', ri) print('We are considering', consider) success = False break peak_id = sequence_map[segid + seq_offset] #peak_id = segid if peak_id >= len(peak_data): success = False if debug: print('peakd id is outside of peak range') break # only check for residue we actually care about if ri == rj and residue.name != peak_data[peak_id]['name']: if debug: print('Mismatch between residue ', ri, rj, peak_id, residue, segid, peak_data[peak_id], path, corr_path, chain_id) success = False break for atom in residue.atoms(): # Make sure atom is in residue or neighbor of residue atom if ri != rj and not marked[atom.index]: continue mask[index] = float(ri == rj) atom_name = residue.name + '-' + atom.name if atom_name not in embedding_dicts['name']: embedding_dicts['name'][atom_name] = len( embedding_dicts['name']) names[index] = embedding_dicts['name'][atom_name] if atom.element.symbol not in embedding_dicts['atom']: if debug: print('Could not identify atom', atom.element.symbol) success = False break atoms[index] = embedding_dicts['atom'][atom.element.symbol] positions[index] = np_pos[atom.index, :] rmap[atom.index] = index peaks[index] = 0 if mask[index]: if atom.name[:3] in peak_data[peak_id]: peaks[index] = peak_data[peak_id][atom.name[:3]] peak_count += 1 peak_successes.add(peak_id) else: mask[index] = 0 index += 1 # Z-DISABLED # -1 for dummy atom which is stored at end if index == max_atoms - 1: #2: MA_LOST_FRAGS += 1 if debug: print('Not enough space for all atoms in ri', ri) success = False break if ri == rj and sum(mask) == 0: if debug: print('Warning found no peaks for', ri, rj, residue, peak_data[peak_id]) success = False if not success: break if not success: continue # do this after so our reverse mapping is complete for rj in consider: residue = residues[rj] for b in residue.bonds(): # set bonds try: bonds[rmap[b.atom1.index], rmap[b.atom2.index]] = 1 bonds[rmap[b.atom2.index], rmap[b.atom1.index]] = 1 except KeyError: # for bonds that cross residue pass for rj in consider: residue = residues[rj] for a in residue.atoms(): # Make sure atom is in residue or neighbor of residue atom if ri != rj and not marked[a.index]: continue index = rmap[a.index] # convert to local indices and filter neighbors n_index = 0 for ni in range(NN): if frame_nlist[a.index, ni, 0] > 50.0: # large distances are sentinels for things # like self neighbors continue try: j = rmap[int(frame_nlist[a.index, ni, 1])] except KeyError: # either we couldn't find a neighbor on the root residue (which is bad) # or just one of the neighbors is not on a considered residue. if rj == ri: success = False if debug: print('Could not find all neighbors', int(frame_nlist[a.index, ni, 1]), consider) break # Z-DISABLED #j = max_atoms - 1 # point to dummy atom continue # mark as not a neighbor if out of molecule (only for non-subject nlists) if False and j == max_atoms - 1: #set index nlist[index, n_index, 1] = j # set distance nlist[index, n_index, 0] = frame_nlist[a.index, ni, 0] #set type nlist[index, n_index, 2] = embedding_dicts['nlist']['none'] n_index += 1 # a 0 -> non-bonded elif bonds[index, j] == 0: #set index nlist[index, n_index, 1] = j # set distance nlist[index, n_index, 0] = frame_nlist[a.index, ni, 0] #set type nlist[index, n_index, 2] = embedding_dicts['nlist']['nonbonded'] n_index += 1 # single bonded else: #set index nlist[index, n_index, 1] = j # set distance nlist[index, n_index, 0] = frame_nlist[a.index, ni, 0] #set type nlist[index, n_index, 2] = embedding_dicts['nlist'][1] n_index += 1 if n_index == NEIGHBOR_NUMBER: break # how did we do on peaks if False and (peaks[index] > 0 and peaks[index] < 25): nonbonded_count = np.sum( nlist[index, :, 2] == embedding_dicts['nlist']['nonbonded']) bonded_count = np.sum( nlist[index, :, 2] == embedding_dicts['nlist'][1]) print( 'neighbor summary: non-bonded: {}, bonded: {}, total: {}' .format(nonbonded_count, bonded_count, NEIGHBOR_NUMBER)) print(nlist[index, :, :]) exit() if not success: if debug: raise RuntimeError() continue if gsd_file is not None: snapshot = write_record_traj( positions, atoms, mask, nlist, peaks, embedding_dicts['class'][residues[ri].name], names, embedding_dicts) snapshot.configuration.step = len(gsd_file) gsd_file.append(snapshot) result.append( make_tfrecord(atoms, mask, nlist, peaks, embedding_dicts['class'][residues[ri].name], names, indices=np.array( [model_index, fi, int(residues[ri].id)], dtype=np.int64))) if log_file is not None: log_file.write('{} {} {} {} {} {} {} {}\n'.format( path.split('/')[-1], corr_path.split('/')[-1], chain_id, len(peak_successes), len(gsd_file), model_index, fi, residues[ri].id)) return result, len(peak_successes) / len(peak_data), len( result), peak_count
def solvate(system, opt): """ This function solvates the system by using PDBFixer Parameters: ----------- system: OEMol molecule The system to solvate opt: python dictionary The parameters used to solvate the system Return: ------- oe_mol: OEMol The solvated system """ # Load a fake system to initialize PDBfixer filename = resource_filename('pdbfixer', 'tests/data/test.pdb') fixer = PDBFixer(filename=filename) # Convert between OE and OpenMM topology omm_top, omm_pos = oeommutils.oemol_to_openmmTop(system) chain_names = [] for chain in omm_top.chains(): chain_names.append(chain.id) # Set the correct topology to the fake system fixer.topology = omm_top fixer.positions = omm_pos # Solvate the system fixer.addSolvent(padding=unit.Quantity(opt['solvent_padding'], unit.angstroms), ionicStrength=unit.Quantity(opt['salt_concentration'], unit.millimolar)) # The OpenMM topology produced by the solvation fixer has missing bond # orders and aromaticity. The following section is creating a new openmm # topology made of just water molecules and ions. The new topology is then # converted in an OEMol and added to the passed molecule to produce the # solvated system wat_ion_top = app.Topology() # Atom dictionary between the the PDBfixer topology and the water_ion topology fixer_atom_to_wat_ion_atom = {} for chain in fixer.topology.chains(): if chain.id not in chain_names: n_chain = wat_ion_top.addChain(chain.id) for res in chain.residues(): n_res = wat_ion_top.addResidue(res.name, n_chain) for at in res.atoms(): n_at = wat_ion_top.addAtom(at.name, at.element, n_res) fixer_atom_to_wat_ion_atom[at] = n_at for bond in fixer.topology.bonds(): at0 = bond[0] at1 = bond[1] try: wat_ion_top.addBond(fixer_atom_to_wat_ion_atom[at0], fixer_atom_to_wat_ion_atom[at1], type=None, order=1) except: pass wat_ion_pos = fixer.positions[len(omm_pos):] oe_mol = oeommutils.openmmTop_to_oemol(wat_ion_top, wat_ion_pos) # Setting the box vectors omm_box_vectors = fixer.topology.getPeriodicBoxVectors() box_vectors = utils.PackageOEMol.encodePyObj(omm_box_vectors) oe_mol.SetData(oechem.OEGetTag('box_vectors'), box_vectors) oechem.OEAddMols(oe_mol, system) return oe_mol
def prepare_pdb(pdb, chains='A', ff=('amber99sbildn.xml', 'tip3p.xml'), ph=7, pad=10 * unit.angstroms, nbonded=app.PME, constraints=app.HBonds, crystal_water=True): """ Fetch, solvate and minimize a protein PDB structure. Parameters ---------- pdb : str PDB Id. chains : str or list Chain(s) to keep in the system. ff : tuple of xml ff files. Forcefields for parametrization. ph : float pH value for adding missing hydrogens. pad: Quantity object Padding around macromolecule for filling box with water. nbonded : object The method to use for nonbonded interactions. Allowed values are NoCutoff, CutoffNonPeriodic, CutoffPeriodic, Ewald, PME, or LJPME. constraints : object Specifies which bonds and angles should be implemented with constraints. Allowed values are None, HBonds, AllBonds, or HAngles. crystal_water : bool Keep crystal water. """ # Load forcefield. logger.info('Retrieving %s from PDB...', pdb) ff = app.ForceField(*ff) # Retrieve structure from PDB. fixer = PDBFixer(pdbid=pdb) # Remove unselected chains. logger.info('Removing all chains but %s', chains) all_chains = [c.id for c in fixer.topology.chains()] fixer.removeChains(chainIds=set(all_chains) - set(chains)) # Find missing residues. logger.info('Finding missing residues...') fixer.findMissingResidues() # Replace nonstandard residues. logger.info('Replacing nonstandard residues...') fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() # Add missing atoms. logger.info('Adding missing atoms...') fixer.findMissingAtoms() fixer.addMissingAtoms() # Remove heterogens. logger.info('Removing heterogens...') fixer.removeHeterogens(keepWater=crystal_water) # Add missing hydrogens. logger.info('Adding missing hydrogens appropriate for pH %s', ph) fixer.addMissingHydrogens(ph) if nbonded in [app.PME, app.CutoffPeriodic, app.Ewald]: # Add solvent. logger.info('Adding solvent...') fixer.addSolvent(padding=pad) # Write PDB file. logger.info('Writing PDB file to "%s"...', '%s-pdbfixer.pdb' % pdb) app.PDBFile.writeFile(fixer.topology, fixer.positions, open('%s-pdbfixer.pdb' % pdb, 'w')) # Create OpenMM System. logger.info('Creating OpenMM system...') system = ff.createSystem(fixer.topology, nonbondedMethod=nbonded, constraints=constraints, rigidWater=True, removeCMMotion=False) # Minimimze to update positions. logger.info('Minimizing...') integrator = mm.VerletIntegrator(1.0 * unit.femtosecond) context = mm.Context(system, integrator) context.setPositions(fixer.positions) mm.LocalEnergyMinimizer.minimize(context) # pylint: disable=unexpected-keyword-arg, no-value-for-parameter state = context.getState(getPositions=True) fixer.positions = state.getPositions() # Write final coordinates. logger.info('Writing PDB file to "%s"...', '%s-minimized.pdb' % pdb) with open('%s-minimized.pdb' % pdb, 'w') as fp: app.PDBFile.writeFile(fixer.topology, fixer.positions, fp) # Serialize final coordinates. logger.info('Serializing to XML...') serialize_system(context, system, integrator)