def pdb_fix_pdbfixer(pdbid, file_pathway, ph, chains_to_remove): """ Args: pdbid: 4 letter string specifying the PDB ID of the file yoou want to fix file_pathway: a string containing the pathway specifying how you want to organize the PDB files once written ph: the pH at which hydrogens will be determined and added chains_to_remove: dictionary containing pdbs with chains to remove Returns: nothing, but it does right PDB files """ print(pdbid) # Download the topology from rcsb based on pdbod fixer = PDBFixer(pdbid=pdbid) # Remove chains based on hand curated .csv file if pdbid in chains_to_remove['pdbid']: chains = chains_to_remove['chain_to_remove'][chain_to_remove['pdbid'].index(pdbid)] chains_list = chains.split() fixer.removeChains(chainIds=chains_list) # Determine the first and last residue resolved in chain 0 chains = [chain for chain in fixer.topology.chains()] resindices = [residue.index for residue in chains[0].residues()] resindices = natsorted(resindices) first_resindex = resindices[0] last_resindex = resindices[-1] # Find Missing residues and determine if they are C or N terminal fragments (which will be removed) fixer.findMissingResidues() if len(fixer.missingResidues) > 0: if sorted(fixer.missingResidues.keys())[0][-1] <= first_resindex: fixer.missingResidues.pop((sorted(fixer.missingResidues.keys())[0])) if sorted(fixer.missingResidues.keys())[-1][-1] >= last_resindex: fixer.missingResidues.pop((sorted(fixer.missingResidues.keys())[-1])) fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(ph) # Write fixed PDB file, with all of the waters and ligands PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway, '%s_fixed_ph%s.pdb' % (pdbid, ph)), 'w'), keepIds=keepNumbers) # Remove the ligand and write a pdb file fixer.removeHeterogens(True) PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway, '%s_fixed_ph%s_apo.pdb' % (pdbid, ph)), 'w'), keepIds=keepNumbers) # Remove the waters and write a pdb file fixer.removeHeterogens(False) PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway, '%s_fixed_ph%s_apo_nowater.pdb' % (pdbid, ph)), 'w'), keepIds=keepNumbers)
def read_and_repair(self, path_pdb: str): ''' params: path_pdb (str) path to structrue return: pdb (PDBFixer object) invalid_residues (list[residues]) residues to remove ''' assert os.path.isfile(path_pdb) fixer = PDBFixer(filename=path_pdb) #fixer.removeHeterogens(keepWater=False) #fixer.addMissingHydrogens() #fixer.findNonstandardResidues() #fixer.replaceNonstandardResidues() fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(False) fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) invalid_residues = self._check_residues(fixer.topology) return fixer, invalid_residues
def add_missing_atoms(session, m, minimization_steps = 0, keep_waters = False): fname = m.filename from pdbfixer import PDBFixer pf = PDBFixer(filename = fname) pf.findMissingResidues() pf.findNonstandardResidues() pf.replaceNonstandardResidues() pf.findMissingAtoms() pf.addMissingAtoms() pf.removeHeterogens(keep_waters) pf.addMissingHydrogens(7.0) if minimization_steps > 0: minimize(pf, minimization_steps) from os.path import splitext fout = splitext(fname)[0] + '-pdbfixer.pdb' out = open(fout, 'w') from simtk.openmm.app import PDBFile PDBFile.writeFile(pf.topology, pf.positions, out) out.close() mfix = session.models.open([fout])[0] mfix.atoms.displays = True mfix.residues.ribbon_displays = False m.display = False log = session.logger log.info('Wrote %s' % fout)
def _fix(self, atoms): try: from pdbfixer import PDBFixer from openmm.app import PDBFile except ImportError: raise ImportError('Please install PDBFixer and OpenMM 7.6 in order to use ClustENM.') stream = createStringIO() title = atoms.getTitle() writePDBStream(stream, atoms) stream.seek(0) fixed = PDBFixer(pdbfile=stream) stream.close() fixed.missingResidues = {} fixed.findNonstandardResidues() fixed.replaceNonstandardResidues() fixed.removeHeterogens(False) fixed.findMissingAtoms() fixed.addMissingAtoms() fixed.addMissingHydrogens(self._ph) stream = createStringIO() PDBFile.writeFile(fixed.topology, fixed.positions, stream, keepIds=True) stream.seek(0) self._atoms = parsePDBStream(stream) self._atoms.setTitle(title) stream.close() self._topology = fixed.topology self._positions = fixed.positions
def fix_pdb(pdb_id, pdb_file, pdb_group): chains_to_retain = get_required_chains(pdb_group) chains_to_remove = [] for chain in PDBParser().get_structure(pdb_id, pdb_file)[0]: if chain.get_id() not in chains_to_retain: chains_to_remove.append(chain.get_id()) fixer = PDBFixer(filename=pdb_file) fixer.removeChains(chainIds=chains_to_remove) fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.removeHeterogens(True) # KeepIds flag is critical here, otherwise we loose all information binding pdb_file = dirname(pdb_file) + '/' + pdb_id + '.pdb' PDBFile.writeFile(fixer.topology, fixer.positions, open(pdb_file, 'w'), keepIds=True) return pdb_file
def pdb_clean_sim(args): """ Top-level function to be executed in parallel to clean and generate features. :param args: Input and output directories, pdb name. :return: """ input_dir, output_dir, fname = args # print(input_dir, output_dir, fname) if not Path(output_dir + fname).exists(): # clean PDB pdb = pmd.load_file(input_dir + fname) pdb.save('/tmp/' + fname, overwrite=True) fixer = PDBFixer(filename='/tmp/' + fname) Path('/tmp/' + fname).unlink() fixer.findMissingResidues() fixer.findNonstandardResidues() # print(f'number of non-standard residues in {fname}: {len(fixer.nonstandardResidues)}') fixer.replaceNonstandardResidues() fixer.removeHeterogens(False) fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) # fixer.addSolvent(fixer.topology.getUnitCellDimensions()) # Run simulation try: forcefield = so.app.ForceField('amber14-all.xml', 'amber14/tip3pfb.xml') system = forcefield.createSystem(fixer.topology, nonbondedMethod=so.app.NoCutoff) param = pmd.openmm.load_topology(fixer.topology, system=system, xyz=fixer.positions) basename = '.'.join(fname.split('.')[:-1]) # get indices of atoms for the 2 interacting subunits sub_unit_chains = pdb_parser(basename) # print(param.to_dataframe()['chain']) ids0, ids1 = (np.where(param.to_dataframe()['chain'].isin(cids))[ 0] for cids in sub_unit_chains) # print(sub_unit_chains,fname,ids0,ids1) features = generate_features(ids0, ids1, forcefield, system, param) print(f'done simulating: {fname}') # stack 3 matrices into 1 combined_mat = np.stack((features["U_LJ"], features["U_el"], features["D_mat"])) np.save(output_dir + '/' + basename + '.npy', combined_mat) print(f'saved features: {fname}') except Exception as e: print(f'could not simulate: {fname} Exception: {e}') return 1, f'E;{fname};{e}' return 0, f'S;{fname};'
def fix_pdb(pdb_file): fixer = PDBFixer(filename=pdb_file) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(True) fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) PDBFile.writeFile(fixer.topology, fixer.positions, open(pdb_file, 'w'))
def cleanPdb(pdb_list, chain=None, fromFolder=None, toFolder="cleaned_pdbs"): os.system(f"mkdir -p {toFolder}") for pdb_id in pdb_list: # print(chain) pdb = f"{pdb_id.lower()[:4]}" pdbFile = pdb + ".pdb" if fromFolder is None: fromFile = os.path.join("original_pdbs", pdbFile) elif fromFolder[:4] == ".pdb": fromFile = fromFolder else: fromFile = os.path.join(fromFolder, pdbFile) if chain is None: # None mean deafult is chain A unless specified. if len(pdb_id) == 5: Chosen_chain = pdb_id[4].upper() else: assert (len(pdb_id) == 4) Chosen_chain = "A" elif chain == "-1" or chain == -1: Chosen_chain = getAllChains(fromFile) else: Chosen_chain = chain # clean pdb fixer = PDBFixer(filename=fromFile) # remove unwanted chains chains = list(fixer.topology.chains()) chains_to_remove = [ i for i, x in enumerate(chains) if x.id not in Chosen_chain ] fixer.removeChains(chains_to_remove) fixer.findMissingResidues() # add missing residues in the middle of a chain, not ones at the start or end of the chain. chains = list(fixer.topology.chains()) keys = fixer.missingResidues.keys() # print(keys) for key in list(keys): chain_tmp = chains[key[0]] if key[1] == 0 or key[1] == len(list(chain_tmp.residues())): del fixer.missingResidues[key] fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(keepWater=False) fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(toFolder, pdbFile), 'w'))
def __init__(self, config_: Config): self.config = config_ self.logger = make_message_writer(self.config.verbose, self.__class__.__name__) with self.logger("__init__") as logger: self.boxvec = None self.explicit = self.config.explicit self.system = None ofs = oechem.oemolistream(self.config.ligand_file_name) oemol = oechem.OEMol() oechem.OEReadMolecule(ofs, oemol) ofs.close() self.inital_ligand_smiles = oechem.OEMolToSmiles(oemol) self.params_written = 0 self.mol = Molecule.from_openeye(oemol, allow_undefined_stereo=True) fixer = PDBFixer(self.config.pdb_file_name) if self.config.use_pdbfixer: logger.log("Fixing with PDBFixer") fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(keepWater=False) fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) logger.log("Found missing residues: ", fixer.missingResidues) logger.log("Found missing terminals residues: ", fixer.missingTerminals) logger.log("Found missing atoms:", fixer.missingAtoms) logger.log("Found nonstandard residues:", fixer.nonstandardResidues) self.config.pdb_file_name = f"{self.config.tempdir(main_context=True)}/inital_fixed.pdb" with open(self.config.pdb_file_name, 'w') as f: app.PDBFile.writeFile(fixer.topology, fixer.positions, f) cmd.reinitialize() cmd.load(self.config.pdb_file_name) cmd.load(self.config.ligand_file_name, "UNL") cmd.alter("UNL", "resn='UNL'") cmd.save("{}".format(self.config.pdb_file_name))
def pdbfix_protein(input_pdb_path, output_pdb_path, find_missing_residues=True, keep_water=False, ph=None): """Run PDBFixer on the input PDB file. Heterogen atoms are always removed. Parameters ---------- input_pdb_path : str The PDB to fix. output_pdb_path : str The path to the output PDB file. find_missing_residues : bool, optional If True, PDBFixer will try to model the unresolved residues that appear in the amino acid sequence (default is True). keep_water : bool, optional If True, water molecules are not stripped (default is False). ph : float or None, optional If not None, hydrogen atoms will be added at this pH. """ fixer = PDBFixer(filename=input_pdb_path) if find_missing_residues: fixer.findMissingResidues() else: fixer.missingResidues = {} fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(keep_water) fixer.findMissingAtoms() fixer.addMissingAtoms() if ph is not None: fixer.addMissingHydrogens(ph) # print(fixer.nonstandardResidues) # print(fixer.missingAtoms) # print(fixer.missingTerminals) with open(output_pdb_path, 'w') as f: PDBFile.writeFile(fixer.topology, fixer.positions, f)
def _apply_pdbfix(molecule, pH=7.0, add_hydrogens=False): """ Run PDBFixer to ammend potential issues in PDB format. Parameters ---------- molecule : chimera.Molecule Chimera Molecule object to fix. pH : float, optional Target pH for adding missing hydrogens. add_hydrogens : bool, optional Whether to add missing hydrogens or not. Returns ------- memfile : StringIO An in-memory file with the modified PDB contents """ memfile = StringIO() chimera.pdbWrite([molecule], chimera.Xform(), memfile) chimera.openModels.close([molecule]) memfile.seek(0) fixer = PDBFixer(pdbfile=memfile) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.removeHeterogens(True) if add_hydrogens: fixer.addMissingHydrogens(pH) memfile.close() memfile = StringIO() PDBFile.writeFile(fixer.topology, fixer.positions, memfile) memfile.seek(0) molecule = chimera.openModels.open(memfile, type="PDB", identifyAs=molecule.name) chimera.openModels.remove(molecule) memfile.close() return molecule[0]
def fix_peptide(pdb_file, seq_dict, pH=7.4, remove_water=True, remove_small_mols=True): global ONE_THREE_CODE fixer = PDBFixer(filename=pdb_file) fixer.sequences.clear() for chain in fixer.topology.chains(): seq = pdbfixer.pdbfixer.Sequence(chain.id, [r.name for r in list(chain.residues())]) fixer.sequences.append(seq) if remove_small_mols: fixer.removeHeterogens(not remove_water) delete_chains = [] # Convert single AA codes to three letter code for key, value in seq_dict.items(): if not value or value is None: delete_chains.append(key) else: three_letter = [] for item in value: three_letter.append(ONE_THREE_CODE[item]) seq_dict[key] = three_letter for chain in fixer.topology.chains(): if chain.index in seq_dict: if seq_dict[chain.index] is not None: fixer.sequences[chain.index].residues = seq_dict[chain.index] fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(pH) fixer.removeChains(delete_chains) dummy = tempfile.NamedTemporaryFile(suffix=".pdb") app.PDBFile.writeFile(fixer.topology, fixer.positions, open(dummy.name, 'w')) product = mdtraj.load(dummy.name) problem_cis = ChiralityCheck.check_cispeptide_bond(product) problem_chiral = ChiralityCheck.check_chirality(product) print("The following problems have been detected:") print(problem_cis) print(problem_chiral) print("Either rerun or find a tool to solve. Perhaps VMD?") return product
def fix_pdb(pdb_id): path = os.getcwd() if len(pdb_id) != 4: print("Creating PDBFixer...") fixer = PDBFixer(pdb_id) print("Finding missing residues...") fixer.findMissingResidues() chains = list(fixer.topology.chains()) keys = fixer.missingResidues.keys() for key in list(keys): chain = chains[key[0]] if key[1] == 0 or key[1] == len(list(chain.residues())): print("ok") del fixer.missingResidues[key] print("Finding nonstandard residues...") fixer.findNonstandardResidues() print("Replacing nonstandard residues...") fixer.replaceNonstandardResidues() print("Removing heterogens...") fixer.removeHeterogens(keepWater=True) print("Finding missing atoms...") fixer.findMissingAtoms() print("Adding missing atoms...") fixer.addMissingAtoms() print("Adding missing hydrogens...") fixer.addMissingHydrogens(7) print("Writing PDB file...") PDBFile.writeFile( fixer.topology, fixer.positions, open( os.path.join(path, "%s_fixed_pH_%s.pdb" % (pdb_id.split('.')[0], 7)), "w"), keepIds=True) return "%s_fixed_pH_%s.pdb" % (pdb_id.split('.')[0], 7)
def pdbfix(receptor: Optional[str] = None, pdbid: Optional[str] = None, pH: float = 7.0, path: str = '.', **kwargs) -> str: if pdbid: fixer = PDBFixer(pdbid=pdbid) else: fixer = PDBFixer(filename=receptor) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(pH) if receptor: outfile = receptor else: outfile = Path(path)/f'{pdbid}.pdb' PDBFile.writeFile(fixer.topology, fixer.positions, open(outfile, 'w')) return outfile
def getAllChains(pdbFile, removeDNAchains=True): fixer = PDBFixer(filename=pdbFile) # we only want pdb chains, ligands or DNA chain will be ignored here. fixer.removeHeterogens(keepWater=False) # remove unwanted chains chains = list(fixer.topology.chains()) a = "" proteinResidues = [ 'ALA', 'ASN', 'CYS', 'GLU', 'HIS', 'LEU', 'MET', 'PRO', 'THR', 'TYR', 'ARG', 'ASP', 'GLN', 'GLY', 'ILE', 'LYS', 'PHE', 'SER', 'TRP', 'VAL' ] rnaResidues = ['A', 'G', 'C', 'U', 'I'] dnaResidues = ['DA', 'DG', 'DC', 'DT', 'DI'] for c in chains: if removeDNAchains and np.alltrue( [a.name in dnaResidues for a in c.residues()]): print(f"chain {c.id} is a DNA chain. it will be removed") continue if c.id in 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789': a += c.id # return ''.join(sorted(set(a.upper().replace(" ", "")))) return ''.join(sorted(set(a.replace(" ", ""))))
if not os.path.isfile( os.path.join(work_dir, f'{pdbid}_chain{chain}_minimized.pdb')): print("Need to minimize the protein structure.") # clean up the input pdb file using pdbfixer and load using Modeller if not os.path.isfile(os.path.join(work_dir, f'{pdbid}_chain{chain}.pdb')): fixer = PDBFixer(url=f'http://www.pdb.org/pdb/files/{pdbid}.pdb') ''' for this case somehow the pdb after chain selection doesn't go through fixing so fix and then select ''' # find missing residues fixer.findMissingResidues() # modify missingResidues so the extra residues on the end are ignored fixer.missingResidues = {} # remove ligand but keep crystal waters fixer.removeHeterogens(True) print("Done removing heterogens.") # find missing atoms/terminals fixer.findMissingAtoms() if fixer.missingAtoms or fixer.missingTerminals: fixer.addMissingAtoms() print("Done adding atoms/terminals.") else: print("No atom/terminal needs to be added.") # add hydrogens fixer.addMissingHydrogens(7.0) print("Done adding hydrogens.") # output fixed pdb PDBFile.writeFile(fixer.topology,
def prepare_inputs( protein: str, ligand: str, replace_nonstandard_residues: bool = True, remove_heterogens: bool = True, remove_water: bool = True, add_hydrogens: bool = True, pH: float = 7.0, optimize_ligand: bool = True, pdb_name: Optional[str] = None) -> Tuple[RDKitMol, RDKitMol]: """This prepares protein-ligand complexes for docking. Autodock Vina requires PDB files for proteins and ligands with sensible inputs. This function uses PDBFixer and RDKit to ensure that inputs are reasonable and ready for docking. Default values are given for convenience, but fixing PDB files is complicated and human judgement is required to produce protein structures suitable for docking. Always inspect the results carefully before trying to perform docking. Parameters ---------- protein: str Filename for protein PDB file or a PDBID. ligand: str Either a filename for a ligand PDB file or a SMILES string. replace_nonstandard_residues: bool (default True) Replace nonstandard residues with standard residues. remove_heterogens: bool (default True) Removes residues that are not standard amino acids or nucleotides. remove_water: bool (default True) Remove water molecules. add_hydrogens: bool (default True) Add missing hydrogens at the protonation state given by `pH`. pH: float (default 7.0) Most common form of each residue at given `pH` value is used. optimize_ligand: bool (default True) If True, optimize ligand with RDKit. Required for SMILES inputs. pdb_name: Optional[str] If given, write sanitized protein and ligand to files called "pdb_name.pdb" and "ligand_pdb_name.pdb" Returns ------- Tuple[RDKitMol, RDKitMol] Tuple of `protein_molecule, ligand_molecule` with 3D information. Note ---- This function requires RDKit and OpenMM to be installed. Read more about PDBFixer here: https://github.com/openmm/pdbfixer. Examples -------- >>> p, m = prepare_inputs('3cyx', 'CCC') >> p.GetNumAtoms() >> m.GetNumAtoms() >>> p, m = prepare_inputs('3cyx', 'CCC', remove_heterogens=False) >> p.GetNumAtoms() """ try: from rdkit import Chem from rdkit.Chem import AllChem from pdbfixer import PDBFixer from simtk.openmm.app import PDBFile except ModuleNotFoundError: raise ImportError( "This function requires RDKit and OpenMM to be installed.") if protein.endswith('.pdb'): fixer = PDBFixer(protein) else: fixer = PDBFixer(url='https://files.rcsb.org/download/%s.pdb' % (protein)) if ligand.endswith('.pdb'): m = Chem.MolFromPDBFile(ligand) else: m = Chem.MolFromSmiles(ligand, sanitize=True) # Apply common fixes to PDB files if replace_nonstandard_residues: fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() if remove_heterogens and not remove_water: fixer.removeHeterogens(True) if remove_heterogens and remove_water: fixer.removeHeterogens(False) if add_hydrogens: fixer.addMissingHydrogens(pH) PDBFile.writeFile(fixer.topology, fixer.positions, open('tmp.pdb', 'w')) p = Chem.MolFromPDBFile('tmp.pdb', sanitize=True) os.remove('tmp.pdb') # Optimize ligand if optimize_ligand: m = Chem.AddHs(m) # need hydrogens for optimization AllChem.EmbedMolecule(m) AllChem.MMFFOptimizeMolecule(m) if pdb_name: Chem.rdmolfiles.MolToPDBFile(p, '%s.pdb' % (pdb_name)) Chem.rdmolfiles.MolToPDBFile(m, 'ligand_%s.pdb' % (pdb_name)) return (p, m)
ligand_system = force_field.create_openmm_system( ligand_off_molecule.to_topology()) ligand_structure = parmed.openmm.load_topology( ligand_pdbfile.topology, ligand_system, xyz=ligand_pdbfile.positions) if 1: # DO PROTEIN THINGS receptor_file = 'receptor.pdb' fixed_receptor_file = f'{path}/fixed_receptor.pdb' omm_forcefield = app.ForceField('amber14-all.xml') fixer = PDBFixer(receptor_file) #filename='receptor.pdb') missingresidues = fixer.findMissingResidues() rezez = fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(keepWater=False) missingatoms = fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) PDBFile.writeFile(fixer.topology, fixer.positions, open(fixed_receptor_file, 'w')) fixed_receptor = PDBFile(fixed_receptor_file) receptor_system = omm_forcefield.createSystem(fixed_receptor.topology) receptor_structure = parmed.openmm.load_topology( fixed_receptor.topology, receptor_system, xyz=fixed_receptor.positions) complex_structure = receptor_structure + ligand_structure complex_system = complex_structure.createSystem( nonbondedMethod=NoCutoff, nonbondedCutoff=9.0 * unit.angstrom,
chain_numbers_to_remove.append(chain_number) # Remove all but desired chains. logger.info("Removing chains...") fixer.removeChains(chain_numbers_to_remove) # DEBUG print "fixer.topology.chains(): %s" % str([ chain.id for chain in fixer.topology.chains() ]) # Add missing atoms and residues. logger.info("Adding missing atoms and residues...") fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() #fixer.addMissingHydrogens(pH) # DEBUG fixer.removeHeterogens(keepWater=keep_crystallographic_water) # Write PDB file for completed output. logger.info("Writing pdbfixer output...") pdb_filename = os.path.join(workdir, 'pdbfixer.pdb') outfile = open(pdb_filename, 'w') app.PDBFile.writeFile(fixer.topology, fixer.positions, outfile) outfile.close() # ============================================================================== # UTILITIES # ============================================================================== def write_file(filename, contents): with open(filename, 'w') as outfile: outfile.write(contents)
from pdbfixer import PDBFixer from simtk.openmm.app import PDBFile fixer = PDBFixer(filename='3UE4.pdb') fixer.removeChains(chainIds=['B']) # Without fixer.missingResidues = {}, fixer.addMissingAtoms() throw an exception # and if I call fixer.findMissingResidues() several terminal residues are added fixer.missingResidues = {} fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.removeHeterogens(keepWater=False) fixer.addMissingHydrogens(7.0) PDBFile.writeFile(fixer.topology, fixer.positions, open('3UE4-pdbfixer.pdb', 'w'))
def cleanProtein(structure, mutator=None, regexes=None, hydrogens=True, run_pdb2pqr=True, quiet=False, remove_numerical_chain_id=False, method="geobind", **kwargs): """ Perform any operations needed to modify the structure or sequence of a protein chain. """ prefix = structure.name # used for file names if remove_numerical_chain_id: # APBS and TABI-PB does not process numerical chain IDs correctly. This is a work-around available_ids = list( "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz") # find current chain ids taken_ids = set() for chain in structure.get_chains(): cid = chain.get_id() taken_ids.add(cid) # iterate over chains and update chain_map = {} for chain in structure.get_chains(): cid = chain.get_id() if cid.isnumeric(): # we want to replace this chain id while len(available_ids) > 0: new_id = available_ids.pop() if new_id in taken_ids: continue else: break chain_map[cid] = new_id chain.id = new_id else: chain_map[cid] = cid if method == "geobind": # set up needed objects if regexes is None: regexes = data.regexes if mutator is None: mutator = ResidueMutator(data.tripeptides, data.chem_components) # remove non-standard residues for chain in structure.get_chains(): replace = [] remove = [] for residue in chain: resn = residue.get_resname().strip() resid = residue.get_id() if resn in data.chem_components and heavyAtomCount(residue) / ( data.chem_components[resn]['heavy_atom_count'] - 1) < 0.6: # too many missing atoms - replace residue replace.append(resid) elif mutator.standard(resn): if resid[0] == ' ': continue else: remove.append( (resid, "removed HETATM standard residue: %s")) elif resn == 'HOH' or resn == 'WAT': remove.append((resid, None)) elif regexes["SOLVENT_COMPONENTS"].search(resn): continue elif mutator.modified(resn): replace.append(resid) else: remove.append((resid, "removed unrecognized residue: %s")) for rid, reason in remove: if reason is not None and not quiet: logging.info(reason, chain[rid].get_resname()) chain.detach_child(rid) for rid in replace: replacement = mutator.mutate(chain[rid]) if replacement: if not quiet: logging.info("replacing residue %s with %s", chain[rid].get_resname(), replacement.get_resname()) replacement.id = rid idx = chain.child_list.index(chain[rid]) chain.child_list[idx] = replacement else: if not quiet: logging.info( "could not perform replacement on %s, removing", chain[rid].get_resname()) chain.detach_child(rid) elif method == "pdbfixer": try: from pdbfixer import PDBFixer from openmm.app import PDBFile except ModuleNotFoundError: raise ModuleNotFoundError( "The dependencies 'pdbfixer' and 'openmm' are required with option 'method=\"pdbfixer\"'" ) # create a temp file tmpFile1 = tempFileName(prefix, 'pdb') structure.save(tmpFile1) # run pdbfixer fixer = PDBFixer(filename=tmpFile1) fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(False) fixer.findMissingAtoms() fixer.addMissingAtoms() tmpFile2 = tempFileName(prefix, 'pdb') PDBFile.writeFile(fixer.topology, fixer.positions, open(tmpFile2, 'w'), keepIds=True) # load new fixed structure structure = StructureData(tmpFile2, name=prefix) # clean up os.remove(tmpFile1) os.remove(tmpFile2) # run PDB2PQR if requested if run_pdb2pqr: structure, pqrFile = runPDB2PQR(structure, **kwargs) # remove hydrogens if requested if not hydrogens: stripHydrogens(structure) # decide what to return rargs = [structure] if run_pdb2pqr: rargs.append(pqrFile) if remove_numerical_chain_id: rargs.append(chain_map) return tuple(rargs)
def apply_pdbfixer(mol, add_missing=True, hydrogenate=True, pH=7.4, remove_heterogens=True, is_protein=True): """ Apply PDBFixer to a molecule to try to clean it up. Parameters ---------- mol: Rdkit Mol Molecule to clean up. add_missing: bool, optional If true, add in missing residues and atoms hydrogenate: bool, optional If true, add hydrogens at specified pH pH: float, optional The pH at which hydrogens will be added if `hydrogenate==True`. Set to 7.4 by default. remove_heterogens: bool, optional Often times, PDB files come with extra waters and salts attached. If this field is set, remove these heterogens. is_protein: bool, optional If false, then don't remove heterogens (since this molecule is itself a heterogen). Returns ------- Rdkit Mol Note ---- This function requires RDKit and PDBFixer to be installed. """ molecule_file = None try: from pdbfixer import PDBFixer except ModuleNotFoundError: raise ImportError("This function requires pdbfixer") try: import simtk except ModuleNotFoundError: raise ImportError("This function requires openmm") try: from rdkit import Chem pdbblock = Chem.MolToPDBBlock(mol) pdb_stringio = StringIO() pdb_stringio.write(pdbblock) pdb_stringio.seek(0) fixer = PDBFixer(pdbfile=pdb_stringio) if add_missing: fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() if hydrogenate: fixer.addMissingHydrogens(pH) if is_protein and remove_heterogens: # False here specifies that water is to be removed fixer.removeHeterogens(False) hydrogenated_io = StringIO() simtk.openmm.app.PDBFile.writeFile(fixer.topology, fixer.positions, hydrogenated_io) hydrogenated_io.seek(0) return Chem.MolFromPDBBlock(hydrogenated_io.read(), sanitize=False, removeHs=False) except ValueError as e: logger.warning("Unable to add hydrogens %s", e) raise MoleculeLoadException(e) finally: try: os.remove(molecule_file) except (OSError, TypeError): pass
print('Finding missing residues...') fixer.findMissingResidues() # Replace nonstandard residues. print('Replacing nonstandard residues...') fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() # Add missing atoms. print('Adding missing atoms...') fixer.findMissingAtoms() fixer.addMissingAtoms() # Remove heterogens. print('Removing heterogens...') fixer.removeHeterogens(keepWater=keepWater) # Add missing hydrogens. print('Adding missing hydrogens appropriate for pH %s' % pH) fixer.addMissingHydrogens(pH) if nonbondedMethod in [app.PME, app.CutoffPeriodic, app.Ewald]: # Add solvent. print('Adding solvent...') fixer.addSolvent(padding=padding) # Write PDB file. output_filename = '%s-pdbfixer.pdb' % pdbid print('Writing PDB file to "%s"...' % output_filename) app.PDBFile.writeFile(fixer.topology, fixer.positions, open(output_filename, 'w'))
def process_pdb(path, corr_path, chain_id, max_atoms, gsd_file, embedding_dicts, NN, nlist_model, keep_residues=[-1, 1], debug=False, units=unit.nanometer, frame_number=3, model_index=0, log_file=None, shiftx_style=False): global MA_LOST_FRAGS if shiftx_style: frame_number = 1 # load pdb pdb = app.PDBFile(path) # load cs sets peak_data, sequence_map, peak_seq = process_corr(corr_path, debug, shiftx_style) result = [] # check for weird/null chain if chain_id == '_': chain_id = list(pdb.topology.residues())[0].chain.id[0] # sometimes chains have extra characters (why?) residues = list( filter(lambda r: r.chain.id[0] == chain_id, pdb.topology.residues())) if len(residues) == 0: if debug: raise ValueError('Failed to find requested chain ', chain_id) pdb_offset, seq_offset = None, None # from pdb residue index to our aligned residue index residue_lookup = {} # bonded neighbor mask nlist_mask = None peak_count = 0 # select a random set of frames for generating data without replacement frame_choices = random.sample(range(0, pdb.getNumFrames()), k=min(pdb.getNumFrames(), frame_number)) for fi in frame_choices: peak_successes = set() # clean up individual frame frame = pdb.getPositions(frame=fi) # have to fix at each frame since inserted atoms may change # fix missing residues/atoms fixer = PDBFixer(filename=path) # overwrite positions with frame positions fixer.positions = frame # we want to add missing atoms, # but not replace missing residue. We'd # rather just ignore those fixer.findMissingResidues() # remove the missing residues fixer.missingResidues = [] # remove water! fixer.removeHeterogens(False) if not shiftx_style: fixer.findMissingAtoms() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) # get new positions frame = fixer.positions num_atoms = len(frame) # remake residue list each time so they have correct atom ids residues = list( filter(lambda r: r.chain.id[0] == chain_id, fixer.topology.residues())) if num_atoms > 20000: MA_LOST_FRAGS += len(residues) if debug: print( 'Exceeded number of atoms for building nlist (change this if you have big GPU memory) in frame {} in pdb {}' .format(fi, path)) break # check alignment once if pdb_offset is None: # create sequence from residues pdb_seq = ['XXX'] * max([int(r.id) + 1 for r in residues]) for r in residues: rid = int(r.id) if rid >= 0: pdb_seq[int(r.id)] = r.name if debug: print('pdb_seq', pdb_seq) print('peak_seq', peak_seq) pdb_offset, seq_offset = align(pdb_seq, peak_seq, debug) #TOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOOODDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDOOOOOOOOOOOOOOOOOOOOOOO????? # Maybe it's ok pdb_offset = 0 if debug: print('pdb_offset', pdb_offset) print('seq_offset', seq_offset) print(sequence_map) # now check alignment - rarely perfect saw_one = False aligned = 0 for i in range(len(residues)): segid = int(residues[i].id) + pdb_offset saw_one = pdb_seq[segid] == residues[i].name if not saw_one: print('Mismatch (A) at position {} ({}). {} != {}'. format(segid, residues[i].id, pdb_seq[segid], residues[i].name)) continue if segid + seq_offset in sequence_map: peakid = sequence_map[segid + seq_offset] print(segid, segid + seq_offset, len(pdb_seq), len(peak_seq)) saw_one = pdb_seq[segid] == peak_seq[segid + seq_offset] if not saw_one: print( 'Mismatch (B) at position {}. pdb seq: {}, peak seq: {}' .format(segid, peak_seq[segid + seq_offset], pdb_seq[peakid])) continue saw_one = peak_data[peakid]['name'] == residues[i].name if not saw_one: print( 'Mismatch (C) at position {}. peak seq: {}, peak data: {}, residue: {}' .format(segid, i, peak_seq[segid + seq_offset], peak_data[peakid]['name'], residues[i].name)) continue aligned += 1 if aligned < 5: raise ValueError( 'Could not find more than 5 aligned residues, very unusual' ) # create resiud look-up from atom index for i, r in enumerate(residues): for a in r.atoms(): residue_lookup[a.index] = i # This alignment will be checked as we compare shifts against the pdb # get neighbor list for frame np_pos = np.array([v.value_in_unit(units) for v in frame]) frame_nlist = nlist_model(np_pos) for ri in range(len(residues)): # we build up fragment by getting residues around us, both in chain # and those within a certain distance of us rmin = max(0, ri + keep_residues[0]) # have to +1 here (and not in range) to get min to work :) rmax = min(len(residues), ri + keep_residues[1] + 1) # do we have any residues to consider? success = rmax - rmin > 0 consider = set(range(rmin, rmax)) # Used to indicate an atom should be included from a different residue marked = [False for _ in range(len(frame))] # now grab spatial neighbor residues # NOTE: I checked this by hand a lot # Believe this code. for a in residues[ri].atoms(): for ni in range(NN): j = int(frame_nlist[a.index, ni, 1]) try: consider.add(residue_lookup[j]) marked[j] = True except KeyError as e: success = False if debug: print( 'Neighboring residue in different chain, skipping' ) break atoms = np.zeros((max_atoms), dtype=np.int64) # we will put dummy atom at end to keep bond counts the same by bonding to it # Z-DISABLED #atoms[-1] = embedding_dicts['atom']['Z'] mask = np.zeros((max_atoms), dtype=np.float) bonds = np.zeros((max_atoms, max_atoms), dtype=np.int64) # nlist: # :,:,0 -> distance # :,:,1 -> neighbor index # :,:,2 -> bond count nlist = np.zeros((max_atoms, NEIGHBOR_NUMBER, 3), dtype=np.float) positions = np.zeros((max_atoms, 3), dtype=np.float) peaks = np.zeros((max_atoms), dtype=np.float) names = np.zeros((max_atoms), dtype=np.int64) # going from pdb atom index to index in these data structures rmap = dict() index = 0 # check our two conditions that could have made this false: there are residues and # we didn't have off-chain spatial neighboring residues if not success: continue for rj in consider: residue = residues[rj] # use the alignment result to get offset segid = int(residue.id) + pdb_offset if segid + seq_offset not in sequence_map: if debug: print('Could not find residue index', rj, ': ', residue, 'in the sequence map. Its index is', segid + seq_offset, 'ri: ', ri) print('We are considering', consider) success = False break peak_id = sequence_map[segid + seq_offset] #peak_id = segid if peak_id >= len(peak_data): success = False if debug: print('peakd id is outside of peak range') break # only check for residue we actually care about if ri == rj and residue.name != peak_data[peak_id]['name']: if debug: print('Mismatch between residue ', ri, rj, peak_id, residue, segid, peak_data[peak_id], path, corr_path, chain_id) success = False break for atom in residue.atoms(): # Make sure atom is in residue or neighbor of residue atom if ri != rj and not marked[atom.index]: continue mask[index] = float(ri == rj) atom_name = residue.name + '-' + atom.name if atom_name not in embedding_dicts['name']: embedding_dicts['name'][atom_name] = len( embedding_dicts['name']) names[index] = embedding_dicts['name'][atom_name] if atom.element.symbol not in embedding_dicts['atom']: if debug: print('Could not identify atom', atom.element.symbol) success = False break atoms[index] = embedding_dicts['atom'][atom.element.symbol] positions[index] = np_pos[atom.index, :] rmap[atom.index] = index peaks[index] = 0 if mask[index]: if atom.name[:3] in peak_data[peak_id]: peaks[index] = peak_data[peak_id][atom.name[:3]] peak_count += 1 peak_successes.add(peak_id) else: mask[index] = 0 index += 1 # Z-DISABLED # -1 for dummy atom which is stored at end if index == max_atoms - 1: #2: MA_LOST_FRAGS += 1 if debug: print('Not enough space for all atoms in ri', ri) success = False break if ri == rj and sum(mask) == 0: if debug: print('Warning found no peaks for', ri, rj, residue, peak_data[peak_id]) success = False if not success: break if not success: continue # do this after so our reverse mapping is complete for rj in consider: residue = residues[rj] for b in residue.bonds(): # set bonds try: bonds[rmap[b.atom1.index], rmap[b.atom2.index]] = 1 bonds[rmap[b.atom2.index], rmap[b.atom1.index]] = 1 except KeyError: # for bonds that cross residue pass for rj in consider: residue = residues[rj] for a in residue.atoms(): # Make sure atom is in residue or neighbor of residue atom if ri != rj and not marked[a.index]: continue index = rmap[a.index] # convert to local indices and filter neighbors n_index = 0 for ni in range(NN): if frame_nlist[a.index, ni, 0] > 50.0: # large distances are sentinels for things # like self neighbors continue try: j = rmap[int(frame_nlist[a.index, ni, 1])] except KeyError: # either we couldn't find a neighbor on the root residue (which is bad) # or just one of the neighbors is not on a considered residue. if rj == ri: success = False if debug: print('Could not find all neighbors', int(frame_nlist[a.index, ni, 1]), consider) break # Z-DISABLED #j = max_atoms - 1 # point to dummy atom continue # mark as not a neighbor if out of molecule (only for non-subject nlists) if False and j == max_atoms - 1: #set index nlist[index, n_index, 1] = j # set distance nlist[index, n_index, 0] = frame_nlist[a.index, ni, 0] #set type nlist[index, n_index, 2] = embedding_dicts['nlist']['none'] n_index += 1 # a 0 -> non-bonded elif bonds[index, j] == 0: #set index nlist[index, n_index, 1] = j # set distance nlist[index, n_index, 0] = frame_nlist[a.index, ni, 0] #set type nlist[index, n_index, 2] = embedding_dicts['nlist']['nonbonded'] n_index += 1 # single bonded else: #set index nlist[index, n_index, 1] = j # set distance nlist[index, n_index, 0] = frame_nlist[a.index, ni, 0] #set type nlist[index, n_index, 2] = embedding_dicts['nlist'][1] n_index += 1 if n_index == NEIGHBOR_NUMBER: break # how did we do on peaks if False and (peaks[index] > 0 and peaks[index] < 25): nonbonded_count = np.sum( nlist[index, :, 2] == embedding_dicts['nlist']['nonbonded']) bonded_count = np.sum( nlist[index, :, 2] == embedding_dicts['nlist'][1]) print( 'neighbor summary: non-bonded: {}, bonded: {}, total: {}' .format(nonbonded_count, bonded_count, NEIGHBOR_NUMBER)) print(nlist[index, :, :]) exit() if not success: if debug: raise RuntimeError() continue if gsd_file is not None: snapshot = write_record_traj( positions, atoms, mask, nlist, peaks, embedding_dicts['class'][residues[ri].name], names, embedding_dicts) snapshot.configuration.step = len(gsd_file) gsd_file.append(snapshot) result.append( make_tfrecord(atoms, mask, nlist, peaks, embedding_dicts['class'][residues[ri].name], names, indices=np.array( [model_index, fi, int(residues[ri].id)], dtype=np.int64))) if log_file is not None: log_file.write('{} {} {} {} {} {} {} {}\n'.format( path.split('/')[-1], corr_path.split('/')[-1], chain_id, len(peak_successes), len(gsd_file), model_index, fi, residues[ri].id)) return result, len(peak_successes) / len(peak_data), len( result), peak_count
def prepare_pdb(pdb, chains='A', ff=('amber99sbildn.xml', 'tip3p.xml'), ph=7, pad=10 * unit.angstroms, nbonded=app.PME, constraints=app.HBonds, crystal_water=True): """ Fetch, solvate and minimize a protein PDB structure. Parameters ---------- pdb : str PDB Id. chains : str or list Chain(s) to keep in the system. ff : tuple of xml ff files. Forcefields for parametrization. ph : float pH value for adding missing hydrogens. pad: Quantity object Padding around macromolecule for filling box with water. nbonded : object The method to use for nonbonded interactions. Allowed values are NoCutoff, CutoffNonPeriodic, CutoffPeriodic, Ewald, PME, or LJPME. constraints : object Specifies which bonds and angles should be implemented with constraints. Allowed values are None, HBonds, AllBonds, or HAngles. crystal_water : bool Keep crystal water. """ # Load forcefield. logger.info('Retrieving %s from PDB...', pdb) ff = app.ForceField(*ff) # Retrieve structure from PDB. fixer = PDBFixer(pdbid=pdb) # Remove unselected chains. logger.info('Removing all chains but %s', chains) all_chains = [c.id for c in fixer.topology.chains()] fixer.removeChains(chainIds=set(all_chains) - set(chains)) # Find missing residues. logger.info('Finding missing residues...') fixer.findMissingResidues() # Replace nonstandard residues. logger.info('Replacing nonstandard residues...') fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() # Add missing atoms. logger.info('Adding missing atoms...') fixer.findMissingAtoms() fixer.addMissingAtoms() # Remove heterogens. logger.info('Removing heterogens...') fixer.removeHeterogens(keepWater=crystal_water) # Add missing hydrogens. logger.info('Adding missing hydrogens appropriate for pH %s', ph) fixer.addMissingHydrogens(ph) if nbonded in [app.PME, app.CutoffPeriodic, app.Ewald]: # Add solvent. logger.info('Adding solvent...') fixer.addSolvent(padding=pad) # Write PDB file. logger.info('Writing PDB file to "%s"...', '%s-pdbfixer.pdb' % pdb) app.PDBFile.writeFile(fixer.topology, fixer.positions, open('%s-pdbfixer.pdb' % pdb, 'w')) # Create OpenMM System. logger.info('Creating OpenMM system...') system = ff.createSystem(fixer.topology, nonbondedMethod=nbonded, constraints=constraints, rigidWater=True, removeCMMotion=False) # Minimimze to update positions. logger.info('Minimizing...') integrator = mm.VerletIntegrator(1.0 * unit.femtosecond) context = mm.Context(system, integrator) context.setPositions(fixer.positions) mm.LocalEnergyMinimizer.minimize(context) # pylint: disable=unexpected-keyword-arg, no-value-for-parameter state = context.getState(getPositions=True) fixer.positions = state.getPositions() # Write final coordinates. logger.info('Writing PDB file to "%s"...', '%s-minimized.pdb' % pdb) with open('%s-minimized.pdb' % pdb, 'w') as fp: app.PDBFile.writeFile(fixer.topology, fixer.positions, fp) # Serialize final coordinates. logger.info('Serializing to XML...') serialize_system(context, system, integrator)
def cleanPdb(pdb_list, chain=None, source=None, toFolder="cleaned_pdbs", formatName=False, removeDNAchains=True, verbose=False, removeTwoEndsMissingResidues=True, addMissingResidues=True, removeHeterogens=True, keepIds=False): os.system(f"mkdir -p {toFolder}") for pdb_id in pdb_list: # print(chain) print(pdb_id) # pdb = f"{pdb_id.lower()[:4]}" # pdbFile = pdb+".pdb" if formatName: pdb = f"{pdb_id.lower()[:4]}" else: pdb = pdb_id pdbFile = pdb + ".pdb" if source is None: fromFile = os.path.join("original_pdbs", pdbFile) elif source[-4:] == ".pdb": fromFile = source else: fromFile = os.path.join(source, pdbFile) # clean pdb try: fixer = PDBFixer(filename=fromFile) except Exception as inst: print(inst) print(f"{fromFile} not found. skipped") continue # remove unwanted chains chains = list(fixer.topology.chains()) print(chains) if chain is None: # 'None' means deafult is chain A unless specified. if len(pdb_id) >= 5: Chosen_chain = pdb_id[4] # Chosen_chain = pdb_id[4].upper() else: assert (len(pdb_id) == 4) Chosen_chain = "A" elif chain == "-1" or chain == -1: Chosen_chain = getAllChains(fromFile, removeDNAchains=removeDNAchains) print(f"Chains: {Chosen_chain}") elif chain == "first": Chosen_chain = chains[0].id else: Chosen_chain = chain chains_to_remove = [ i for i, x in enumerate(chains) if x.id not in Chosen_chain ] fixer.removeChains(chains_to_remove) fixer.findMissingResidues() # add missing residues in the middle of a chain, not ones at the start or end of the chain. chains = list(fixer.topology.chains()) keys = fixer.missingResidues.keys() if verbose: print("chains to remove", chains_to_remove) print("missing residues: ", keys) if not addMissingResidues: for key in list(keys): del fixer.missingResidues[key] else: if removeTwoEndsMissingResidues: for key in list(keys): chain_tmp = chains[key[0]] if key[1] == 0 or key[1] == len(list( chain_tmp.residues())): del fixer.missingResidues[key] fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() if removeHeterogens: fixer.removeHeterogens(keepWater=False) fixer.findMissingAtoms() try: fixer.addMissingAtoms() except: print("Unable to add missing atoms") continue fixer.addMissingHydrogens(7.0) PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(toFolder, pdbFile), 'w'), keepIds=keepIds)
pdb_in = sys.argv[1] pdb_out = sys.argv[2] print('Processing', pdb_in, 'to', pdb_out) fixer = PDBFixer(filename=pdb_in) fixer.findMissingResidues() fixer.findMissingAtoms() fixer.findNonstandardResidues() print('Residues:', fixer.missingResidues) print('Atoms:', fixer.missingAtoms) print('Terminals:', fixer.missingTerminals) print('Non-standard:', fixer.nonstandardResidues) fixer.addMissingAtoms() fixer.addMissingHydrogens(7.4) fixer.removeHeterogens(False) with open(pdb_out + '_fixed.pdb', 'w') as outfile: PDBFile.writeFile(fixer.topology, fixer.positions, file=outfile, keepIds=True) system_generator = SystemGenerator(forcefields=['amber/ff14SB.xml']) system = system_generator.create_system(fixer.topology) integrator = LangevinIntegrator(300 * unit.kelvin, 1 / unit.picosecond, 0.002 * unit.picoseconds) simulation = Simulation(fixer.topology, system, integrator) simulation.context.setPositions(fixer.positions) print('Minimising') simulation.minimizeEnergy() # write out the minimised PDB with open(pdb_out + '_minimised.pdb', 'w') as outfile: PDBFile.writeFile(fixer.topology, simulation.context.getState(getPositions=True, enforcePeriodicBox=False).getPositions(), file=outfile, keepIds=True)