def pdb_fix_pdbfixer(pdbid, file_pathway, ph, chains_to_remove): """ Args: pdbid: 4 letter string specifying the PDB ID of the file yoou want to fix file_pathway: a string containing the pathway specifying how you want to organize the PDB files once written ph: the pH at which hydrogens will be determined and added chains_to_remove: dictionary containing pdbs with chains to remove Returns: nothing, but it does right PDB files """ print(pdbid) # Download the topology from rcsb based on pdbod fixer = PDBFixer(pdbid=pdbid) # Remove chains based on hand curated .csv file if pdbid in chains_to_remove['pdbid']: chains = chains_to_remove['chain_to_remove'][chain_to_remove['pdbid'].index(pdbid)] chains_list = chains.split() fixer.removeChains(chainIds=chains_list) # Determine the first and last residue resolved in chain 0 chains = [chain for chain in fixer.topology.chains()] resindices = [residue.index for residue in chains[0].residues()] resindices = natsorted(resindices) first_resindex = resindices[0] last_resindex = resindices[-1] # Find Missing residues and determine if they are C or N terminal fragments (which will be removed) fixer.findMissingResidues() if len(fixer.missingResidues) > 0: if sorted(fixer.missingResidues.keys())[0][-1] <= first_resindex: fixer.missingResidues.pop((sorted(fixer.missingResidues.keys())[0])) if sorted(fixer.missingResidues.keys())[-1][-1] >= last_resindex: fixer.missingResidues.pop((sorted(fixer.missingResidues.keys())[-1])) fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(ph) # Write fixed PDB file, with all of the waters and ligands PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway, '%s_fixed_ph%s.pdb' % (pdbid, ph)), 'w'), keepIds=keepNumbers) # Remove the ligand and write a pdb file fixer.removeHeterogens(True) PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway, '%s_fixed_ph%s_apo.pdb' % (pdbid, ph)), 'w'), keepIds=keepNumbers) # Remove the waters and write a pdb file fixer.removeHeterogens(False) PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway, '%s_fixed_ph%s_apo_nowater.pdb' % (pdbid, ph)), 'w'), keepIds=keepNumbers)
def read_and_repair(self, path_pdb: str): ''' params: path_pdb (str) path to structrue return: pdb (PDBFixer object) invalid_residues (list[residues]) residues to remove ''' assert os.path.isfile(path_pdb) fixer = PDBFixer(filename=path_pdb) #fixer.removeHeterogens(keepWater=False) #fixer.addMissingHydrogens() #fixer.findNonstandardResidues() #fixer.replaceNonstandardResidues() fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(False) fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) invalid_residues = self._check_residues(fixer.topology) return fixer, invalid_residues
def add_missing_atoms(session, m, minimization_steps = 0, keep_waters = False): fname = m.filename from pdbfixer import PDBFixer pf = PDBFixer(filename = fname) pf.findMissingResidues() pf.findNonstandardResidues() pf.replaceNonstandardResidues() pf.findMissingAtoms() pf.addMissingAtoms() pf.removeHeterogens(keep_waters) pf.addMissingHydrogens(7.0) if minimization_steps > 0: minimize(pf, minimization_steps) from os.path import splitext fout = splitext(fname)[0] + '-pdbfixer.pdb' out = open(fout, 'w') from simtk.openmm.app import PDBFile PDBFile.writeFile(pf.topology, pf.positions, out) out.close() mfix = session.models.open([fout])[0] mfix.atoms.displays = True mfix.residues.ribbon_displays = False m.display = False log = session.logger log.info('Wrote %s' % fout)
def fix_pdb(pdb_id, pdb_file, pdb_group): chains_to_retain = get_required_chains(pdb_group) chains_to_remove = [] for chain in PDBParser().get_structure(pdb_id, pdb_file)[0]: if chain.get_id() not in chains_to_retain: chains_to_remove.append(chain.get_id()) fixer = PDBFixer(filename=pdb_file) fixer.removeChains(chainIds=chains_to_remove) fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.removeHeterogens(True) # KeepIds flag is critical here, otherwise we loose all information binding pdb_file = dirname(pdb_file) + '/' + pdb_id + '.pdb' PDBFile.writeFile(fixer.topology, fixer.positions, open(pdb_file, 'w'), keepIds=True) return pdb_file
def _fix(self, atoms): try: from pdbfixer import PDBFixer from openmm.app import PDBFile except ImportError: raise ImportError('Please install PDBFixer and OpenMM 7.6 in order to use ClustENM.') stream = createStringIO() title = atoms.getTitle() writePDBStream(stream, atoms) stream.seek(0) fixed = PDBFixer(pdbfile=stream) stream.close() fixed.missingResidues = {} fixed.findNonstandardResidues() fixed.replaceNonstandardResidues() fixed.removeHeterogens(False) fixed.findMissingAtoms() fixed.addMissingAtoms() fixed.addMissingHydrogens(self._ph) stream = createStringIO() PDBFile.writeFile(fixed.topology, fixed.positions, stream, keepIds=True) stream.seek(0) self._atoms = parsePDBStream(stream) self._atoms.setTitle(title) stream.close() self._topology = fixed.topology self._positions = fixed.positions
def pdb_clean_sim(args): """ Top-level function to be executed in parallel to clean and generate features. :param args: Input and output directories, pdb name. :return: """ input_dir, output_dir, fname = args # print(input_dir, output_dir, fname) if not Path(output_dir + fname).exists(): # clean PDB pdb = pmd.load_file(input_dir + fname) pdb.save('/tmp/' + fname, overwrite=True) fixer = PDBFixer(filename='/tmp/' + fname) Path('/tmp/' + fname).unlink() fixer.findMissingResidues() fixer.findNonstandardResidues() # print(f'number of non-standard residues in {fname}: {len(fixer.nonstandardResidues)}') fixer.replaceNonstandardResidues() fixer.removeHeterogens(False) fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) # fixer.addSolvent(fixer.topology.getUnitCellDimensions()) # Run simulation try: forcefield = so.app.ForceField('amber14-all.xml', 'amber14/tip3pfb.xml') system = forcefield.createSystem(fixer.topology, nonbondedMethod=so.app.NoCutoff) param = pmd.openmm.load_topology(fixer.topology, system=system, xyz=fixer.positions) basename = '.'.join(fname.split('.')[:-1]) # get indices of atoms for the 2 interacting subunits sub_unit_chains = pdb_parser(basename) # print(param.to_dataframe()['chain']) ids0, ids1 = (np.where(param.to_dataframe()['chain'].isin(cids))[ 0] for cids in sub_unit_chains) # print(sub_unit_chains,fname,ids0,ids1) features = generate_features(ids0, ids1, forcefield, system, param) print(f'done simulating: {fname}') # stack 3 matrices into 1 combined_mat = np.stack((features["U_LJ"], features["U_el"], features["D_mat"])) np.save(output_dir + '/' + basename + '.npy', combined_mat) print(f'saved features: {fname}') except Exception as e: print(f'could not simulate: {fname} Exception: {e}') return 1, f'E;{fname};{e}' return 0, f'S;{fname};'
def fix_pdb(pdb_file): fixer = PDBFixer(filename=pdb_file) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(True) fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) PDBFile.writeFile(fixer.topology, fixer.positions, open(pdb_file, 'w'))
def fix_pdb(self, infile, out=None, pH=7): with open(infile, 'r') as f: fixer = PDBFixer(pdbfile=f) fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(pH=pH) if out is None: out = '{0[0]}{1}{0[1]}'.format(os.path.splitext(infile), '_fixed') with open(out, 'w') as f: PDBFile.writeFile(fixer.topology, fixer.positions, f)
def add_hydrogens_by_openmm(self): from simtk.openmm.app import ForceField, Modeller, PDBFile from pdbfixer import PDBFixer fixer = PDBFixer(self.name) field = ForceField('amber99sb.xml', 'tip3p.xml') fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) modeller = Modeller(fixer.topology, fixer.positions) modeller.addHydrogens(forcefield=field) modeller.deleteWater() PDBFile.writeModel(modeller.topology, modeller.positions, open(self.shotname+'_h.pdb', 'w'))
def cleanPdb(pdb_list, chain=None, fromFolder=None, toFolder="cleaned_pdbs"): os.system(f"mkdir -p {toFolder}") for pdb_id in pdb_list: # print(chain) pdb = f"{pdb_id.lower()[:4]}" pdbFile = pdb + ".pdb" if fromFolder is None: fromFile = os.path.join("original_pdbs", pdbFile) elif fromFolder[:4] == ".pdb": fromFile = fromFolder else: fromFile = os.path.join(fromFolder, pdbFile) if chain is None: # None mean deafult is chain A unless specified. if len(pdb_id) == 5: Chosen_chain = pdb_id[4].upper() else: assert (len(pdb_id) == 4) Chosen_chain = "A" elif chain == "-1" or chain == -1: Chosen_chain = getAllChains(fromFile) else: Chosen_chain = chain # clean pdb fixer = PDBFixer(filename=fromFile) # remove unwanted chains chains = list(fixer.topology.chains()) chains_to_remove = [ i for i, x in enumerate(chains) if x.id not in Chosen_chain ] fixer.removeChains(chains_to_remove) fixer.findMissingResidues() # add missing residues in the middle of a chain, not ones at the start or end of the chain. chains = list(fixer.topology.chains()) keys = fixer.missingResidues.keys() # print(keys) for key in list(keys): chain_tmp = chains[key[0]] if key[1] == 0 or key[1] == len(list(chain_tmp.residues())): del fixer.missingResidues[key] fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(keepWater=False) fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(toFolder, pdbFile), 'w'))
def add_membrane(pdb_path, membrane_lipid_type='POPC', out_as=None): """ Make a lipid bilayer for your protein easy. Parameters ---------- pdb_path: Give your pdb whole path to this parameter membrane_lipid_type : Add POPC or POPE lipid membranes to your system. out_as: Give and extension list like ['psf', 'crd', 'gro'] Example ---------- add_membrane('protein.pdb', 'POPC', ['crd', 'gro']) """ fixer = PDBFixer(filename=pdb_path) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) print('\nAdding membrane:', membrane_lipid_type) app.PDBFile.writeFile(fixer.topology, fixer.positions, open("fixed.pdb", 'w')) fixer.addMembrane(lipidType=membrane_lipid_type, membraneCenterZ=0 * unit.nanometer, minimumPadding=1 * unit.nanometer, positiveIon="Na+", negativeIon="Cl-", ionicStrength=0.0 * unit.molar) app.PDBFile.writeFile(fixer.topology, fixer.positions, open("fixed_membrane.pdb", 'w'), keepIds=True) if out_as is not None: struct = pmd.load_file('fixed_membrane.pdb') for i in out_as: try: print("Savind *.%s extension File" % i) struct.save('fixed_membrane.%s' % i) except: pass
def __init__(self, config_: Config): self.config = config_ self.logger = make_message_writer(self.config.verbose, self.__class__.__name__) with self.logger("__init__") as logger: self.boxvec = None self.explicit = self.config.explicit self.system = None ofs = oechem.oemolistream(self.config.ligand_file_name) oemol = oechem.OEMol() oechem.OEReadMolecule(ofs, oemol) ofs.close() self.inital_ligand_smiles = oechem.OEMolToSmiles(oemol) self.params_written = 0 self.mol = Molecule.from_openeye(oemol, allow_undefined_stereo=True) fixer = PDBFixer(self.config.pdb_file_name) if self.config.use_pdbfixer: logger.log("Fixing with PDBFixer") fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(keepWater=False) fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) logger.log("Found missing residues: ", fixer.missingResidues) logger.log("Found missing terminals residues: ", fixer.missingTerminals) logger.log("Found missing atoms:", fixer.missingAtoms) logger.log("Found nonstandard residues:", fixer.nonstandardResidues) self.config.pdb_file_name = f"{self.config.tempdir(main_context=True)}/inital_fixed.pdb" with open(self.config.pdb_file_name, 'w') as f: app.PDBFile.writeFile(fixer.topology, fixer.positions, f) cmd.reinitialize() cmd.load(self.config.pdb_file_name) cmd.load(self.config.ligand_file_name, "UNL") cmd.alter("UNL", "resn='UNL'") cmd.save("{}".format(self.config.pdb_file_name))
def pdbfix_protein(input_pdb_path, output_pdb_path, find_missing_residues=True, keep_water=False, ph=None): """Run PDBFixer on the input PDB file. Heterogen atoms are always removed. Parameters ---------- input_pdb_path : str The PDB to fix. output_pdb_path : str The path to the output PDB file. find_missing_residues : bool, optional If True, PDBFixer will try to model the unresolved residues that appear in the amino acid sequence (default is True). keep_water : bool, optional If True, water molecules are not stripped (default is False). ph : float or None, optional If not None, hydrogen atoms will be added at this pH. """ fixer = PDBFixer(filename=input_pdb_path) if find_missing_residues: fixer.findMissingResidues() else: fixer.missingResidues = {} fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(keep_water) fixer.findMissingAtoms() fixer.addMissingAtoms() if ph is not None: fixer.addMissingHydrogens(ph) # print(fixer.nonstandardResidues) # print(fixer.missingAtoms) # print(fixer.missingTerminals) with open(output_pdb_path, 'w') as f: PDBFile.writeFile(fixer.topology, fixer.positions, f)
def _apply_pdbfix(molecule, pH=7.0, add_hydrogens=False): """ Run PDBFixer to ammend potential issues in PDB format. Parameters ---------- molecule : chimera.Molecule Chimera Molecule object to fix. pH : float, optional Target pH for adding missing hydrogens. add_hydrogens : bool, optional Whether to add missing hydrogens or not. Returns ------- memfile : StringIO An in-memory file with the modified PDB contents """ memfile = StringIO() chimera.pdbWrite([molecule], chimera.Xform(), memfile) chimera.openModels.close([molecule]) memfile.seek(0) fixer = PDBFixer(pdbfile=memfile) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.removeHeterogens(True) if add_hydrogens: fixer.addMissingHydrogens(pH) memfile.close() memfile = StringIO() PDBFile.writeFile(fixer.topology, fixer.positions, memfile) memfile.seek(0) molecule = chimera.openModels.open(memfile, type="PDB", identifyAs=molecule.name) chimera.openModels.remove(molecule) memfile.close() return molecule[0]
def fix_peptide(pdb_file, seq_dict, pH=7.4, remove_water=True, remove_small_mols=True): global ONE_THREE_CODE fixer = PDBFixer(filename=pdb_file) fixer.sequences.clear() for chain in fixer.topology.chains(): seq = pdbfixer.pdbfixer.Sequence(chain.id, [r.name for r in list(chain.residues())]) fixer.sequences.append(seq) if remove_small_mols: fixer.removeHeterogens(not remove_water) delete_chains = [] # Convert single AA codes to three letter code for key, value in seq_dict.items(): if not value or value is None: delete_chains.append(key) else: three_letter = [] for item in value: three_letter.append(ONE_THREE_CODE[item]) seq_dict[key] = three_letter for chain in fixer.topology.chains(): if chain.index in seq_dict: if seq_dict[chain.index] is not None: fixer.sequences[chain.index].residues = seq_dict[chain.index] fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(pH) fixer.removeChains(delete_chains) dummy = tempfile.NamedTemporaryFile(suffix=".pdb") app.PDBFile.writeFile(fixer.topology, fixer.positions, open(dummy.name, 'w')) product = mdtraj.load(dummy.name) problem_cis = ChiralityCheck.check_cispeptide_bond(product) problem_chiral = ChiralityCheck.check_chirality(product) print("The following problems have been detected:") print(problem_cis) print(problem_chiral) print("Either rerun or find a tool to solve. Perhaps VMD?") return product
def __init__(self, constraints=app.HBonds, hydrogenMass=None, pH=7.0, **kwargs): TestSystem.__init__(self, **kwargs) system_pdb=PDBFixer(os.path.dirname(__file__)+"/pdbs/1l2y.pdb") system_pdb.findMissingResidues() system_pdb.findNonstandardResidues() system_pdb.findMissingAtoms() system_pdb.addMissingAtoms() forcefield = app.ForceField('amber96.xml', 'amber96_obc.xml') modeller = app.Modeller(system_pdb.topology, system_pdb.positions) addHs_log = modeller.addHydrogens(forcefield, pH=pH) self.topology = modeller.getTopology() self.positions = modeller.getPositions() # asNumpy=True self.positions._value = np.array(self.positions._value) self.system = forcefield.createSystem(modeller.topology,implicitSolvent=app.OBC1, constraints=constraints,nonbondedMethod=app.NoCutoff, hydrogenMass=hydrogenMass)
def fix_pdb(pdb_id): path = os.getcwd() if len(pdb_id) != 4: print("Creating PDBFixer...") fixer = PDBFixer(pdb_id) print("Finding missing residues...") fixer.findMissingResidues() chains = list(fixer.topology.chains()) keys = fixer.missingResidues.keys() for key in list(keys): chain = chains[key[0]] if key[1] == 0 or key[1] == len(list(chain.residues())): print("ok") del fixer.missingResidues[key] print("Finding nonstandard residues...") fixer.findNonstandardResidues() print("Replacing nonstandard residues...") fixer.replaceNonstandardResidues() print("Removing heterogens...") fixer.removeHeterogens(keepWater=True) print("Finding missing atoms...") fixer.findMissingAtoms() print("Adding missing atoms...") fixer.addMissingAtoms() print("Adding missing hydrogens...") fixer.addMissingHydrogens(7) print("Writing PDB file...") PDBFile.writeFile( fixer.topology, fixer.positions, open( os.path.join(path, "%s_fixed_pH_%s.pdb" % (pdb_id.split('.')[0], 7)), "w"), keepIds=True) return "%s_fixed_pH_%s.pdb" % (pdb_id.split('.')[0], 7)
def pdbfix(receptor: Optional[str] = None, pdbid: Optional[str] = None, pH: float = 7.0, path: str = '.', **kwargs) -> str: if pdbid: fixer = PDBFixer(pdbid=pdbid) else: fixer = PDBFixer(filename=receptor) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(pH) if receptor: outfile = receptor else: outfile = Path(path)/f'{pdbid}.pdb' PDBFile.writeFile(fixer.topology, fixer.positions, open(outfile, 'w')) return outfile
def process_pdb(path, corr_path, chain_id, max_atoms, gsd_file, embedding_dicts, NN, nlist_model, keep_residues=[-1, 1], debug=False, units=unit.nanometer, frame_number=3, model_index=0, log_file=None, shiftx_style=False): global MA_LOST_FRAGS if shiftx_style: frame_number = 1 # load pdb pdb = app.PDBFile(path) # load cs sets peak_data, sequence_map, peak_seq = process_corr(corr_path, debug, shiftx_style) result = [] # check for weird/null chain if chain_id == '_': chain_id = list(pdb.topology.residues())[0].chain.id[0] # sometimes chains have extra characters (why?) residues = list( filter(lambda r: r.chain.id[0] == chain_id, pdb.topology.residues())) if len(residues) == 0: if debug: raise ValueError('Failed to find requested chain ', chain_id) pdb_offset, seq_offset = None, None # from pdb residue index to our aligned residue index residue_lookup = {} # bonded neighbor mask nlist_mask = None peak_count = 0 # select a random set of frames for generating data without replacement frame_choices = random.sample(range(0, pdb.getNumFrames()), k=min(pdb.getNumFrames(), frame_number)) for fi in frame_choices: peak_successes = set() # clean up individual frame frame = pdb.getPositions(frame=fi) # have to fix at each frame since inserted atoms may change # fix missing residues/atoms fixer = PDBFixer(filename=path) # overwrite positions with frame positions fixer.positions = frame # we want to add missing atoms, # but not replace missing residue. We'd # rather just ignore those fixer.findMissingResidues() # remove the missing residues fixer.missingResidues = [] # remove water! fixer.removeHeterogens(False) if not shiftx_style: fixer.findMissingAtoms() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) # get new positions frame = fixer.positions num_atoms = len(frame) # remake residue list each time so they have correct atom ids residues = list( filter(lambda r: r.chain.id[0] == chain_id, fixer.topology.residues())) if num_atoms > 20000: MA_LOST_FRAGS += len(residues) if debug: print( 'Exceeded number of atoms for building nlist (change this if you have big GPU memory) in frame {} in pdb {}' .format(fi, path)) break # check alignment once if pdb_offset is None: # create sequence from residues pdb_seq = ['XXX'] * max([int(r.id) + 1 for r in residues]) for r in residues: rid = int(r.id) if rid >= 0: pdb_seq[int(r.id)] = r.name if debug: print('pdb_seq', pdb_seq) print('peak_seq', peak_seq) pdb_offset, seq_offset = align(pdb_seq, peak_seq, debug) #TOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOODDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDOOOOOOOOOOOOOOOOOOOOOOO????? # Maybe it's ok pdb_offset = 0 if debug: print('pdb_offset', pdb_offset) print('seq_offset', seq_offset) print(sequence_map) # now check alignment - rarely perfect saw_one = False aligned = 0 for i in range(len(residues)): segid = int(residues[i].id) + pdb_offset saw_one = pdb_seq[segid] == residues[i].name if not saw_one: print('Mismatch (A) at position {} ({}). {} != {}'. format(segid, residues[i].id, pdb_seq[segid], residues[i].name)) continue if segid + seq_offset in sequence_map: peakid = sequence_map[segid + seq_offset] print(segid, segid + seq_offset, len(pdb_seq), len(peak_seq)) saw_one = pdb_seq[segid] == peak_seq[segid + seq_offset] if not saw_one: print( 'Mismatch (B) at position {}. pdb seq: {}, peak seq: {}' .format(segid, peak_seq[segid + seq_offset], pdb_seq[peakid])) continue saw_one = peak_data[peakid]['name'] == residues[i].name if not saw_one: print( 'Mismatch (C) at position {}. peak seq: {}, peak data: {}, residue: {}' .format(segid, i, peak_seq[segid + seq_offset], peak_data[peakid]['name'], residues[i].name)) continue aligned += 1 if aligned < 5: raise ValueError( 'Could not find more than 5 aligned residues, very unusual' ) # create resiud look-up from atom index for i, r in enumerate(residues): for a in r.atoms(): residue_lookup[a.index] = i # This alignment will be checked as we compare shifts against the pdb # get neighbor list for frame np_pos = np.array([v.value_in_unit(units) for v in frame]) frame_nlist = nlist_model(np_pos) for ri in range(len(residues)): # we build up fragment by getting residues around us, both in chain # and those within a certain distance of us rmin = max(0, ri + keep_residues[0]) # have to +1 here (and not in range) to get min to work :) rmax = min(len(residues), ri + keep_residues[1] + 1) # do we have any residues to consider? success = rmax - rmin > 0 consider = set(range(rmin, rmax)) # Used to indicate an atom should be included from a different residue marked = [False for _ in range(len(frame))] # now grab spatial neighbor residues # NOTE: I checked this by hand a lot # Believe this code. for a in residues[ri].atoms(): for ni in range(NN): j = int(frame_nlist[a.index, ni, 1]) try: consider.add(residue_lookup[j]) marked[j] = True except KeyError as e: success = False if debug: print( 'Neighboring residue in different chain, skipping' ) break atoms = np.zeros((max_atoms), dtype=np.int64) # we will put dummy atom at end to keep bond counts the same by bonding to it # Z-DISABLED #atoms[-1] = embedding_dicts['atom']['Z'] mask = np.zeros((max_atoms), dtype=np.float) bonds = np.zeros((max_atoms, max_atoms), dtype=np.int64) # nlist: # :,:,0 -> distance # :,:,1 -> neighbor index # :,:,2 -> bond count nlist = np.zeros((max_atoms, NEIGHBOR_NUMBER, 3), dtype=np.float) positions = np.zeros((max_atoms, 3), dtype=np.float) peaks = np.zeros((max_atoms), dtype=np.float) names = np.zeros((max_atoms), dtype=np.int64) # going from pdb atom index to index in these data structures rmap = dict() index = 0 # check our two conditions that could have made this false: there are residues and # we didn't have off-chain spatial neighboring residues if not success: continue for rj in consider: residue = residues[rj] # use the alignment result to get offset segid = int(residue.id) + pdb_offset if segid + seq_offset not in sequence_map: if debug: print('Could not find residue index', rj, ': ', residue, 'in the sequence map. Its index is', segid + seq_offset, 'ri: ', ri) print('We are considering', consider) success = False break peak_id = sequence_map[segid + seq_offset] #peak_id = segid if peak_id >= len(peak_data): success = False if debug: print('peakd id is outside of peak range') break # only check for residue we actually care about if ri == rj and residue.name != peak_data[peak_id]['name']: if debug: print('Mismatch between residue ', ri, rj, peak_id, residue, segid, peak_data[peak_id], path, corr_path, chain_id) success = False break for atom in residue.atoms(): # Make sure atom is in residue or neighbor of residue atom if ri != rj and not marked[atom.index]: continue mask[index] = float(ri == rj) atom_name = residue.name + '-' + atom.name if atom_name not in embedding_dicts['name']: embedding_dicts['name'][atom_name] = len( embedding_dicts['name']) names[index] = embedding_dicts['name'][atom_name] if atom.element.symbol not in embedding_dicts['atom']: if debug: print('Could not identify atom', atom.element.symbol) success = False break atoms[index] = embedding_dicts['atom'][atom.element.symbol] positions[index] = np_pos[atom.index, :] rmap[atom.index] = index peaks[index] = 0 if mask[index]: if atom.name[:3] in peak_data[peak_id]: peaks[index] = peak_data[peak_id][atom.name[:3]] peak_count += 1 peak_successes.add(peak_id) else: mask[index] = 0 index += 1 # Z-DISABLED # -1 for dummy atom which is stored at end if index == max_atoms - 1: #2: MA_LOST_FRAGS += 1 if debug: print('Not enough space for all atoms in ri', ri) success = False break if ri == rj and sum(mask) == 0: if debug: print('Warning found no peaks for', ri, rj, residue, peak_data[peak_id]) success = False if not success: break if not success: continue # do this after so our reverse mapping is complete for rj in consider: residue = residues[rj] for b in residue.bonds(): # set bonds try: bonds[rmap[b.atom1.index], rmap[b.atom2.index]] = 1 bonds[rmap[b.atom2.index], rmap[b.atom1.index]] = 1 except KeyError: # for bonds that cross residue pass for rj in consider: residue = residues[rj] for a in residue.atoms(): # Make sure atom is in residue or neighbor of residue atom if ri != rj and not marked[a.index]: continue index = rmap[a.index] # convert to local indices and filter neighbors n_index = 0 for ni in range(NN): if frame_nlist[a.index, ni, 0] > 50.0: # large distances are sentinels for things # like self neighbors continue try: j = rmap[int(frame_nlist[a.index, ni, 1])] except KeyError: # either we couldn't find a neighbor on the root residue (which is bad) # or just one of the neighbors is not on a considered residue. if rj == ri: success = False if debug: print('Could not find all neighbors', int(frame_nlist[a.index, ni, 1]), consider) break # Z-DISABLED #j = max_atoms - 1 # point to dummy atom continue # mark as not a neighbor if out of molecule (only for non-subject nlists) if False and j == max_atoms - 1: #set index nlist[index, n_index, 1] = j # set distance nlist[index, n_index, 0] = frame_nlist[a.index, ni, 0] #set type nlist[index, n_index, 2] = embedding_dicts['nlist']['none'] n_index += 1 # a 0 -> non-bonded elif bonds[index, j] == 0: #set index nlist[index, n_index, 1] = j # set distance nlist[index, n_index, 0] = frame_nlist[a.index, ni, 0] #set type nlist[index, n_index, 2] = embedding_dicts['nlist']['nonbonded'] n_index += 1 # single bonded else: #set index nlist[index, n_index, 1] = j # set distance nlist[index, n_index, 0] = frame_nlist[a.index, ni, 0] #set type nlist[index, n_index, 2] = embedding_dicts['nlist'][1] n_index += 1 if n_index == NEIGHBOR_NUMBER: break # how did we do on peaks if False and (peaks[index] > 0 and peaks[index] < 25): nonbonded_count = np.sum( nlist[index, :, 2] == embedding_dicts['nlist']['nonbonded']) bonded_count = np.sum( nlist[index, :, 2] == embedding_dicts['nlist'][1]) print( 'neighbor summary: non-bonded: {}, bonded: {}, total: {}' .format(nonbonded_count, bonded_count, NEIGHBOR_NUMBER)) print(nlist[index, :, :]) exit() if not success: if debug: raise RuntimeError() continue if gsd_file is not None: snapshot = write_record_traj( positions, atoms, mask, nlist, peaks, embedding_dicts['class'][residues[ri].name], names, embedding_dicts) snapshot.configuration.step = len(gsd_file) gsd_file.append(snapshot) result.append( make_tfrecord(atoms, mask, nlist, peaks, embedding_dicts['class'][residues[ri].name], names, indices=np.array( [model_index, fi, int(residues[ri].id)], dtype=np.int64))) if log_file is not None: log_file.write('{} {} {} {} {} {} {} {}\n'.format( path.split('/')[-1], corr_path.split('/')[-1], chain_id, len(peak_successes), len(gsd_file), model_index, fi, residues[ri].id)) return result, len(peak_successes) / len(peak_data), len( result), peak_count
def apply_pdbfixer(mol, add_missing=True, hydrogenate=True, pH=7.4, remove_heterogens=True, is_protein=True): """ Apply PDBFixer to a molecule to try to clean it up. Parameters ---------- mol: Rdkit Mol Molecule to clean up. add_missing: bool, optional If true, add in missing residues and atoms hydrogenate: bool, optional If true, add hydrogens at specified pH pH: float, optional The pH at which hydrogens will be added if `hydrogenate==True`. Set to 7.4 by default. remove_heterogens: bool, optional Often times, PDB files come with extra waters and salts attached. If this field is set, remove these heterogens. is_protein: bool, optional If false, then don't remove heterogens (since this molecule is itself a heterogen). Returns ------- Rdkit Mol Note ---- This function requires RDKit and PDBFixer to be installed. """ molecule_file = None try: from pdbfixer import PDBFixer except ModuleNotFoundError: raise ImportError("This function requires pdbfixer") try: import simtk except ModuleNotFoundError: raise ImportError("This function requires openmm") try: from rdkit import Chem pdbblock = Chem.MolToPDBBlock(mol) pdb_stringio = StringIO() pdb_stringio.write(pdbblock) pdb_stringio.seek(0) fixer = PDBFixer(pdbfile=pdb_stringio) if add_missing: fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() if hydrogenate: fixer.addMissingHydrogens(pH) if is_protein and remove_heterogens: # False here specifies that water is to be removed fixer.removeHeterogens(False) hydrogenated_io = StringIO() simtk.openmm.app.PDBFile.writeFile(fixer.topology, fixer.positions, hydrogenated_io) hydrogenated_io.seek(0) return Chem.MolFromPDBBlock(hydrogenated_io.read(), sanitize=False, removeHs=False) except ValueError as e: logger.warning("Unable to add hydrogens %s", e) raise MoleculeLoadException(e) finally: try: os.remove(molecule_file) except (OSError, TypeError): pass
def pdb2omm(self, input_pdb=None, solvate=True, protonate=True, fix_pdb=True, inspect=False, extra_input_pdb=[], ff_files=[], extra_ff_files=[], extra_names=[], other_ff_instance=False, pH_protein = 7.0, residue_variants={}, other_omm=False, input_sdf_file=None, box_size=9.0, name='NoName'): """ Method to prepare an openMM system from PDB and XML/other force field definitions. Returns self, so that other methods can act on it. Requires input PDB file(s) handled by "input_pdbs". Uses default AMBER force fields if none are provide by "ff_files". Includes to provided force fields (or defaults) additional XML/other definitions with "extra_ff_files". TODO: include "extra_input_pdb" methods to build boxes on the fly. Parameters ---------- input_pdb : TYPE, optional DESCRIPTION. The default is None. solvate : TYPE, optional DESCRIPTION. The default is True. protonate : TYPE, optional DESCRIPTION. The default is True. fix_pdb : TYPE, optional DESCRIPTION. The default is True. extra_input_pdb : TYPE, optional DESCRIPTION. The default is []. ff_files : TYPE, optional DESCRIPTION. The default is []. extra_ff_files : TYPE, optional DESCRIPTION. The default is []. extra_names : TYPE, optional DESCRIPTION. The default is []. other_ff_instance : TYPE, optional DESCRIPTION. The default is False. pH_protein : TYPE, optional DESCRIPTION. The default is 7.0. Returns ------- None. """ self.structures={} self.input_pdb=input_pdb self.structures['input_pdb']=input_pdb #Fix the input_pdb with PDBFixer if fix_pdb: pdb=PDBFixer(self.input_pdb) pdb.findMissingResidues() pdb.findMissingAtoms() pdb.addMissingAtoms() else: pdb = app.PDBFile(self.input_pdb) #Generate a Modeller instance of the fixed pdb #It will be used to populate system pre_system = app.Modeller(pdb.topology, pdb.positions) #Add ligand structures to the model with addExtraMolecules_PDB if len(extra_input_pdb) > 0: pre_system, self.extra_molecules=self.addExtraMolecules_PDB(pre_system, extra_input_pdb) #Create a ForceField instance with provided XMLs with setForceFields() forcefield, ff_paths=self.setForceFields(ff_files=ff_files) #Call to setProtonationState() if protonate: if residue_variants: pre_system.addHydrogens(forcefield, pH = pH_protein, variants = self.setProtonationState(pre_system.topology.chains(), protonation_dict=residue_variants)) else: pre_system.addHydrogens(forcefield, pH = pH_protein) #Call to solvate() #TODO: For empty box, add waters, remove then if solvate: pre_system=self.solvate(pre_system, forcefield, box_size=box_size) self.topology=pre_system.topology self.positions=pre_system.positions #Define system. Either by provided pre_system, or other_omm system instance. if other_omm: system, forcefield_other=self.omm_system(input_sdf_file, pre_system, forcefield, self.def_input_struct, ff_files=ff_paths, template_ff='gaff-2.11') #forcefield not needed?? else: #Create a openMM topology instance system = forcefield.createSystem(pre_system.topology, nonbondedMethod=app.PME, nonbondedCutoff=1.0*nanometers, ewaldErrorTolerance=0.0005, constraints='HBonds', rigidWater=True) #Update attributes self.system=system #TODO: A lot. Link to Visualization self.structures['system']=self.writePDB(pre_system.topology, pre_system.positions, name='system') print(f"System is now converted to openMM type: \n\tFile: {self.structures['system']}, \n\tTopology: {self.topology}") return self
def pdb2omm(self, input_pdbs=None, solvate=True, protonate=True, fix_pdb=True, inspect=False, extra_input_pdb=[], ff_files=[], extra_ff_files=[], extra_names=[], other_ff_instance=False, pH = 7.0): """ Parameters ---------- input_pdb : TYPE, optional DESCRIPTION. The default is None. solvate : TYPE, optional DESCRIPTION. The default is True. protonate : TYPE, optional DESCRIPTION. The default is True. fix_pdb : TYPE, optional DESCRIPTION. The default is True. extra_input_pdb : TYPE, optional DESCRIPTION. The default is []. ff_files : TYPE, optional DESCRIPTION. The default is []. extra_ff_files : TYPE, optional DESCRIPTION. The default is []. extra_names : TYPE, optional DESCRIPTION. The default is []. other_ff_instance : TYPE, optional DESCRIPTION. The default is False. pH : TYPE, optional DESCRIPTION. The default is 7.0. Returns ------- None. """ # ============================================================================= # # extra_input_pdb=[], #['SAM_H3K36.pdb', 'ZNB_H3K36.pdb'] # ff_files=[], #['amber14-all.xml', 'amber14/tip4pew.xml', 'gaff.xml'], # extra_ff_files=[], #['SAM.xml', 'ZNB.xml'] # extra_names=[], #['SAM', 'ZNB'], # # ============================================================================= tools.Functions.fileHandler(self.workdir) input_pdb=f'{self.workdir}/{input_pdbs}' if fix_pdb: pdb=PDBFixer(input_pdb) pdb.findMissingResidues() pdb.findMissingAtoms() pdb.addMissingAtoms() else: pdb = app.PDBFile(input_pdb) pre_system = app.Modeller(pdb.topology, pdb.positions) forcefield=self.setForceFields(ff_files=ff_files, extra_ff_files=extra_ff_files, omm_ff=False) if protonate: pre_system.addHydrogens(forcefield, pH = pH, variants = self.setProtonationState(pre_system.topology.chains(), protonation_dict={('A',187): 'ASP', ('A',224): 'HID'}) ) # add ligand structures to the model for extra_pdb_file in extra_input_pdb: extra_pdb = app.PDBFile(extra_pdb_file) pre_system.add(extra_pdb.topology, extra_pdb.positions) #Call to static solvate if solvate: pre_system=self.solvate(pre_system, forcefield) #Create a openMM topology instance system = forcefield.createSystem(pre_system.topology, nonbondedMethod=app.PME, nonbondedCutoff=1.0*nanometers, ewaldErrorTolerance=0.0005, constraints='HBonds', rigidWater=True) #Update attributes self.input_pdb=input_pdb self.system=system self.topology=pre_system.topology self.positions=pre_system.positions #TODO: A lot. Link to Visualization self.system_pdb=self.writePDB(pre_system.topology, pre_system.positions, name='system') return self
def cleanProtein(structure, mutator=None, regexes=None, hydrogens=True, run_pdb2pqr=True, quiet=False, remove_numerical_chain_id=False, method="geobind", **kwargs): """ Perform any operations needed to modify the structure or sequence of a protein chain. """ prefix = structure.name # used for file names if remove_numerical_chain_id: # APBS and TABI-PB does not process numerical chain IDs correctly. This is a work-around available_ids = list( "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") # find current chain ids taken_ids = set() for chain in structure.get_chains(): cid = chain.get_id() taken_ids.add(cid) # iterate over chains and update chain_map = {} for chain in structure.get_chains(): cid = chain.get_id() if cid.isnumeric(): # we want to replace this chain id while len(available_ids) > 0: new_id = available_ids.pop() if new_id in taken_ids: continue else: break chain_map[cid] = new_id chain.id = new_id else: chain_map[cid] = cid if method == "geobind": # set up needed objects if regexes is None: regexes = data.regexes if mutator is None: mutator = ResidueMutator(data.tripeptides, data.chem_components) # remove non-standard residues for chain in structure.get_chains(): replace = [] remove = [] for residue in chain: resn = residue.get_resname().strip() resid = residue.get_id() if resn in data.chem_components and heavyAtomCount(residue) / ( data.chem_components[resn]['heavy_atom_count'] - 1) < 0.6: # too many missing atoms - replace residue replace.append(resid) elif mutator.standard(resn): if resid[0] == ' ': continue else: remove.append( (resid, "removed HETATM standard residue: %s")) elif resn == 'HOH' or resn == 'WAT': remove.append((resid, None)) elif regexes["SOLVENT_COMPONENTS"].search(resn): continue elif mutator.modified(resn): replace.append(resid) else: remove.append((resid, "removed unrecognized residue: %s")) for rid, reason in remove: if reason is not None and not quiet: logging.info(reason, chain[rid].get_resname()) chain.detach_child(rid) for rid in replace: replacement = mutator.mutate(chain[rid]) if replacement: if not quiet: logging.info("replacing residue %s with %s", chain[rid].get_resname(), replacement.get_resname()) replacement.id = rid idx = chain.child_list.index(chain[rid]) chain.child_list[idx] = replacement else: if not quiet: logging.info( "could not perform replacement on %s, removing", chain[rid].get_resname()) chain.detach_child(rid) elif method == "pdbfixer": try: from pdbfixer import PDBFixer from openmm.app import PDBFile except ModuleNotFoundError: raise ModuleNotFoundError( "The dependencies 'pdbfixer' and 'openmm' are required with option 'method=\"pdbfixer\"'" ) # create a temp file tmpFile1 = tempFileName(prefix, 'pdb') structure.save(tmpFile1) # run pdbfixer fixer = PDBFixer(filename=tmpFile1) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(False) fixer.findMissingAtoms() fixer.addMissingAtoms() tmpFile2 = tempFileName(prefix, 'pdb') PDBFile.writeFile(fixer.topology, fixer.positions, open(tmpFile2, 'w'), keepIds=True) # load new fixed structure structure = StructureData(tmpFile2, name=prefix) # clean up os.remove(tmpFile1) os.remove(tmpFile2) # run PDB2PQR if requested if run_pdb2pqr: structure, pqrFile = runPDB2PQR(structure, **kwargs) # remove hydrogens if requested if not hydrogens: stripHydrogens(structure) # decide what to return rargs = [structure] if run_pdb2pqr: rargs.append(pqrFile) if remove_numerical_chain_id: rargs.append(chain_map) return tuple(rargs)
""" import argparse from pdbfixer import PDBFixer from simtk.openmm.app import PDBxFile ap = argparse.ArgumentParser() ap.add_argument('structure') ap.add_argument('--output', default=None, help='Output name for fixed structure') cmd = ap.parse_args() print('Reading PDB structure: {}'.format(cmd.structure)) fixer = PDBFixer(cmd.structure) print('Finding and adding missing heavy atoms') fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() if cmd.output is None: ofname = cmd.structure[:-4] + '.cif' else: ofname = cmd.output print('Writing completed structure in CIF format: {}'.format(ofname)) with open('{}'.format(ofname), 'w') as handle: PDBxFile.writeFile(fixer.topology, fixer.positions, handle)
chain_id_list = [c.chain_id for c in fixer.structure.models[0].chains] chain_ids_to_remove = set(chain_id_list) - set(chain_ids_to_keep) fixer.removeChains(chainIds=chain_ids_to_remove) # Find missing residues. print('Finding missing residues...') fixer.findMissingResidues() # Replace nonstandard residues. print('Replacing nonstandard residues...') fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() # Add missing atoms. print('Adding missing atoms...') fixer.findMissingAtoms() fixer.addMissingAtoms() # Remove heterogens. print('Removing heterogens...') fixer.removeHeterogens(keepWater=keepWater) # Add missing hydrogens. print('Adding missing hydrogens appropriate for pH %s' % pH) fixer.addMissingHydrogens(pH) if nonbondedMethod in [app.PME, app.CutoffPeriodic, app.Ewald]: # Add solvent. print('Adding solvent...') fixer.addSolvent(padding=padding)
ligand_off_molecule.to_topology()) ligand_structure = parmed.openmm.load_topology( ligand_pdbfile.topology, ligand_system, xyz=ligand_pdbfile.positions) if 1: # DO PROTEIN THINGS receptor_file = 'receptor.pdb' fixed_receptor_file = f'{path}/fixed_receptor.pdb' omm_forcefield = app.ForceField('amber14-all.xml') fixer = PDBFixer(receptor_file) #filename='receptor.pdb') missingresidues = fixer.findMissingResidues() rezez = fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(keepWater=False) missingatoms = fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) PDBFile.writeFile(fixer.topology, fixer.positions, open(fixed_receptor_file, 'w')) fixed_receptor = PDBFile(fixed_receptor_file) receptor_system = omm_forcefield.createSystem(fixed_receptor.topology) receptor_structure = parmed.openmm.load_topology( fixed_receptor.topology, receptor_system, xyz=fixed_receptor.positions) complex_structure = receptor_structure + ligand_structure complex_system = complex_structure.createSystem( nonbondedMethod=NoCutoff, nonbondedCutoff=9.0 * unit.angstrom, constraints=HBonds,
def cleanPdb(pdb_list, chain=None, source=None, toFolder="cleaned_pdbs", formatName=False, removeDNAchains=True, verbose=False, removeTwoEndsMissingResidues=True, addMissingResidues=True, removeHeterogens=True, keepIds=False): os.system(f"mkdir -p {toFolder}") for pdb_id in pdb_list: # print(chain) print(pdb_id) # pdb = f"{pdb_id.lower()[:4]}" # pdbFile = pdb+".pdb" if formatName: pdb = f"{pdb_id.lower()[:4]}" else: pdb = pdb_id pdbFile = pdb + ".pdb" if source is None: fromFile = os.path.join("original_pdbs", pdbFile) elif source[-4:] == ".pdb": fromFile = source else: fromFile = os.path.join(source, pdbFile) # clean pdb try: fixer = PDBFixer(filename=fromFile) except Exception as inst: print(inst) print(f"{fromFile} not found. skipped") continue # remove unwanted chains chains = list(fixer.topology.chains()) print(chains) if chain is None: # 'None' means deafult is chain A unless specified. if len(pdb_id) >= 5: Chosen_chain = pdb_id[4] # Chosen_chain = pdb_id[4].upper() else: assert (len(pdb_id) == 4) Chosen_chain = "A" elif chain == "-1" or chain == -1: Chosen_chain = getAllChains(fromFile, removeDNAchains=removeDNAchains) print(f"Chains: {Chosen_chain}") elif chain == "first": Chosen_chain = chains[0].id else: Chosen_chain = chain chains_to_remove = [ i for i, x in enumerate(chains) if x.id not in Chosen_chain ] fixer.removeChains(chains_to_remove) fixer.findMissingResidues() # add missing residues in the middle of a chain, not ones at the start or end of the chain. chains = list(fixer.topology.chains()) keys = fixer.missingResidues.keys() if verbose: print("chains to remove", chains_to_remove) print("missing residues: ", keys) if not addMissingResidues: for key in list(keys): del fixer.missingResidues[key] else: if removeTwoEndsMissingResidues: for key in list(keys): chain_tmp = chains[key[0]] if key[1] == 0 or key[1] == len(list( chain_tmp.residues())): del fixer.missingResidues[key] fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() if removeHeterogens: fixer.removeHeterogens(keepWater=False) fixer.findMissingAtoms() try: fixer.addMissingAtoms() except: print("Unable to add missing atoms") continue fixer.addMissingHydrogens(7.0) PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(toFolder, pdbFile), 'w'), keepIds=keepIds)
def prepare_pdb(pdb, chains='A', ff=('amber99sbildn.xml', 'tip3p.xml'), ph=7, pad=10 * unit.angstroms, nbonded=app.PME, constraints=app.HBonds, crystal_water=True): """ Fetch, solvate and minimize a protein PDB structure. Parameters ---------- pdb : str PDB Id. chains : str or list Chain(s) to keep in the system. ff : tuple of xml ff files. Forcefields for parametrization. ph : float pH value for adding missing hydrogens. pad: Quantity object Padding around macromolecule for filling box with water. nbonded : object The method to use for nonbonded interactions. Allowed values are NoCutoff, CutoffNonPeriodic, CutoffPeriodic, Ewald, PME, or LJPME. constraints : object Specifies which bonds and angles should be implemented with constraints. Allowed values are None, HBonds, AllBonds, or HAngles. crystal_water : bool Keep crystal water. """ # Load forcefield. logger.info('Retrieving %s from PDB...', pdb) ff = app.ForceField(*ff) # Retrieve structure from PDB. fixer = PDBFixer(pdbid=pdb) # Remove unselected chains. logger.info('Removing all chains but %s', chains) all_chains = [c.id for c in fixer.topology.chains()] fixer.removeChains(chainIds=set(all_chains) - set(chains)) # Find missing residues. logger.info('Finding missing residues...') fixer.findMissingResidues() # Replace nonstandard residues. logger.info('Replacing nonstandard residues...') fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() # Add missing atoms. logger.info('Adding missing atoms...') fixer.findMissingAtoms() fixer.addMissingAtoms() # Remove heterogens. logger.info('Removing heterogens...') fixer.removeHeterogens(keepWater=crystal_water) # Add missing hydrogens. logger.info('Adding missing hydrogens appropriate for pH %s', ph) fixer.addMissingHydrogens(ph) if nbonded in [app.PME, app.CutoffPeriodic, app.Ewald]: # Add solvent. logger.info('Adding solvent...') fixer.addSolvent(padding=pad) # Write PDB file. logger.info('Writing PDB file to "%s"...', '%s-pdbfixer.pdb' % pdb) app.PDBFile.writeFile(fixer.topology, fixer.positions, open('%s-pdbfixer.pdb' % pdb, 'w')) # Create OpenMM System. logger.info('Creating OpenMM system...') system = ff.createSystem(fixer.topology, nonbondedMethod=nbonded, constraints=constraints, rigidWater=True, removeCMMotion=False) # Minimimze to update positions. logger.info('Minimizing...') integrator = mm.VerletIntegrator(1.0 * unit.femtosecond) context = mm.Context(system, integrator) context.setPositions(fixer.positions) mm.LocalEnergyMinimizer.minimize(context) # pylint: disable=unexpected-keyword-arg, no-value-for-parameter state = context.getState(getPositions=True) fixer.positions = state.getPositions() # Write final coordinates. logger.info('Writing PDB file to "%s"...', '%s-minimized.pdb' % pdb) with open('%s-minimized.pdb' % pdb, 'w') as fp: app.PDBFile.writeFile(fixer.topology, fixer.positions, fp) # Serialize final coordinates. logger.info('Serializing to XML...') serialize_system(context, system, integrator)