def pdb_fix_pdbfixer(pdbid, file_pathway, ph, chains_to_remove): """ Args: pdbid: 4 letter string specifying the PDB ID of the file yoou want to fix file_pathway: a string containing the pathway specifying how you want to organize the PDB files once written ph: the pH at which hydrogens will be determined and added chains_to_remove: dictionary containing pdbs with chains to remove Returns: nothing, but it does right PDB files """ print(pdbid) # Download the topology from rcsb based on pdbod fixer = PDBFixer(pdbid=pdbid) # Remove chains based on hand curated .csv file if pdbid in chains_to_remove['pdbid']: chains = chains_to_remove['chain_to_remove'][chain_to_remove['pdbid'].index(pdbid)] chains_list = chains.split() fixer.removeChains(chainIds=chains_list) # Determine the first and last residue resolved in chain 0 chains = [chain for chain in fixer.topology.chains()] resindices = [residue.index for residue in chains[0].residues()] resindices = natsorted(resindices) first_resindex = resindices[0] last_resindex = resindices[-1] # Find Missing residues and determine if they are C or N terminal fragments (which will be removed) fixer.findMissingResidues() if len(fixer.missingResidues) > 0: if sorted(fixer.missingResidues.keys())[0][-1] <= first_resindex: fixer.missingResidues.pop((sorted(fixer.missingResidues.keys())[0])) if sorted(fixer.missingResidues.keys())[-1][-1] >= last_resindex: fixer.missingResidues.pop((sorted(fixer.missingResidues.keys())[-1])) fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(ph) # Write fixed PDB file, with all of the waters and ligands PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway, '%s_fixed_ph%s.pdb' % (pdbid, ph)), 'w'), keepIds=keepNumbers) # Remove the ligand and write a pdb file fixer.removeHeterogens(True) PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway, '%s_fixed_ph%s_apo.pdb' % (pdbid, ph)), 'w'), keepIds=keepNumbers) # Remove the waters and write a pdb file fixer.removeHeterogens(False) PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(file_pathway, '%s_fixed_ph%s_apo_nowater.pdb' % (pdbid, ph)), 'w'), keepIds=keepNumbers)
def fix_pdb(pdb_id, pdb_file, pdb_group): chains_to_retain = get_required_chains(pdb_group) chains_to_remove = [] for chain in PDBParser().get_structure(pdb_id, pdb_file)[0]: if chain.get_id() not in chains_to_retain: chains_to_remove.append(chain.get_id()) fixer = PDBFixer(filename=pdb_file) fixer.removeChains(chainIds=chains_to_remove) fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.removeHeterogens(True) # KeepIds flag is critical here, otherwise we loose all information binding pdb_file = dirname(pdb_file) + '/' + pdb_id + '.pdb' PDBFile.writeFile(fixer.topology, fixer.positions, open(pdb_file, 'w'), keepIds=True) return pdb_file
def cleanPdb(pdb_list, chain=None, fromFolder=None, toFolder="cleaned_pdbs"): os.system(f"mkdir -p {toFolder}") for pdb_id in pdb_list: # print(chain) pdb = f"{pdb_id.lower()[:4]}" pdbFile = pdb + ".pdb" if fromFolder is None: fromFile = os.path.join("original_pdbs", pdbFile) elif fromFolder[:4] == ".pdb": fromFile = fromFolder else: fromFile = os.path.join(fromFolder, pdbFile) if chain is None: # None mean deafult is chain A unless specified. if len(pdb_id) == 5: Chosen_chain = pdb_id[4].upper() else: assert (len(pdb_id) == 4) Chosen_chain = "A" elif chain == "-1" or chain == -1: Chosen_chain = getAllChains(fromFile) else: Chosen_chain = chain # clean pdb fixer = PDBFixer(filename=fromFile) # remove unwanted chains chains = list(fixer.topology.chains()) chains_to_remove = [ i for i, x in enumerate(chains) if x.id not in Chosen_chain ] fixer.removeChains(chains_to_remove) fixer.findMissingResidues() # add missing residues in the middle of a chain, not ones at the start or end of the chain. chains = list(fixer.topology.chains()) keys = fixer.missingResidues.keys() # print(keys) for key in list(keys): chain_tmp = chains[key[0]] if key[1] == 0 or key[1] == len(list(chain_tmp.residues())): del fixer.missingResidues[key] fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.removeHeterogens(keepWater=False) fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(7.0) PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(toFolder, pdbFile), 'w'))
def fix_peptide(pdb_file, seq_dict, pH=7.4, remove_water=True, remove_small_mols=True): global ONE_THREE_CODE fixer = PDBFixer(filename=pdb_file) fixer.sequences.clear() for chain in fixer.topology.chains(): seq = pdbfixer.pdbfixer.Sequence(chain.id, [r.name for r in list(chain.residues())]) fixer.sequences.append(seq) if remove_small_mols: fixer.removeHeterogens(not remove_water) delete_chains = [] # Convert single AA codes to three letter code for key, value in seq_dict.items(): if not value or value is None: delete_chains.append(key) else: three_letter = [] for item in value: three_letter.append(ONE_THREE_CODE[item]) seq_dict[key] = three_letter for chain in fixer.topology.chains(): if chain.index in seq_dict: if seq_dict[chain.index] is not None: fixer.sequences[chain.index].residues = seq_dict[chain.index] fixer.findMissingResidues() fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.addMissingHydrogens(pH) fixer.removeChains(delete_chains) dummy = tempfile.NamedTemporaryFile(suffix=".pdb") app.PDBFile.writeFile(fixer.topology, fixer.positions, open(dummy.name, 'w')) product = mdtraj.load(dummy.name) problem_cis = ChiralityCheck.check_cispeptide_bond(product) problem_chiral = ChiralityCheck.check_chirality(product) print("The following problems have been detected:") print(problem_cis) print(problem_chiral) print("Either rerun or find a tool to solve. Perhaps VMD?") return product
c.id = "Z" chains_to_remove.append(i) elif num_residues <= 15: if not found_C: found_C = True c.id = "C" else: c.id = "Z" chains_to_remove.append(i) else: c.id = "Z" chains_to_remove.append(i) #print "ERROR: Found chains with weird number of residues:", num_residues #sys.exit(0) fixer.removeChains(chains_to_remove) chains = fixer.topology.chains() print chains chain_lengths = [] for c in chains: num_residues = len(list(c.residues())) chain_lengths.append(num_residues) PDBFile.writeFile(fixer.topology, fixer.positions, open(pdbcode + ".pdb", 'w')) call(["rm temp.pdb"], shell=True) if chain_lengths[1] < chain_lengths[2]: call(["grep \"[A-Z] B \" " + pdbcode + ".pdb > temp.pdb"], shell=True) call(["sed -i \"/[A-Z] B /d\" " + pdbcode + ".pdb"], shell=True) call(["sed -i \"/END/d\" " + pdbcode + ".pdb"], shell=True)
# SET UP SYSTEM ################################################################################ # Load forcefield. forcefield = app.ForceField(*forcefields_to_use) # Retrieve structure from PDB. print('Retrieving %s from PDB...' % pdbid) fixer = PDBFixer(pdbid=pdbid) # Build a list of chains to remove. print('Removing all chains but %s' % chain_ids_to_keep) all_chains = list(fixer.topology.chains()) chain_id_list = [c.chain_id for c in fixer.structure.models[0].chains] chain_ids_to_remove = set(chain_id_list) - set(chain_ids_to_keep) fixer.removeChains(chainIds=chain_ids_to_remove) # Find missing residues. print('Finding missing residues...') fixer.findMissingResidues() # Replace nonstandard residues. print('Replacing nonstandard residues...') fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() # Add missing atoms. print('Adding missing atoms...') fixer.findMissingAtoms() fixer.addMissingAtoms()
app.PDBFile.writeFile(fixer.topology, fixer.positions, outfile) outfile.close() if chain_ids_to_keep is not None: # Hack to get chain id to chain number mapping. chain_id_list = [c.chain_id for c in fixer.structure.models[0].chains] # Build list of chains to remove chain_numbers_to_remove = list() for (chain_number, chain_id) in enumerate(chain_id_list): if chain_id not in chain_ids_to_keep: chain_numbers_to_remove.append(chain_number) # Remove all but desired chains. logger.info("Removing chains...") fixer.removeChains(chain_numbers_to_remove) # DEBUG print "fixer.topology.chains(): %s" % str([ chain.id for chain in fixer.topology.chains() ]) # Add missing atoms and residues. logger.info("Adding missing atoms and residues...") fixer.findMissingResidues() fixer.findMissingAtoms() fixer.addMissingAtoms() #fixer.addMissingHydrogens(pH) # DEBUG fixer.removeHeterogens(keepWater=keep_crystallographic_water) # Write PDB file for completed output. logger.info("Writing pdbfixer output...") pdb_filename = os.path.join(workdir, 'pdbfixer.pdb')
def cleanPdb(pdb_list, chain=None, source=None, toFolder="cleaned_pdbs", formatName=False, removeDNAchains=True, verbose=False, removeTwoEndsMissingResidues=True, addMissingResidues=True, removeHeterogens=True, keepIds=False): os.system(f"mkdir -p {toFolder}") for pdb_id in pdb_list: # print(chain) print(pdb_id) # pdb = f"{pdb_id.lower()[:4]}" # pdbFile = pdb+".pdb" if formatName: pdb = f"{pdb_id.lower()[:4]}" else: pdb = pdb_id pdbFile = pdb + ".pdb" if source is None: fromFile = os.path.join("original_pdbs", pdbFile) elif source[-4:] == ".pdb": fromFile = source else: fromFile = os.path.join(source, pdbFile) # clean pdb try: fixer = PDBFixer(filename=fromFile) except Exception as inst: print(inst) print(f"{fromFile} not found. skipped") continue # remove unwanted chains chains = list(fixer.topology.chains()) print(chains) if chain is None: # 'None' means deafult is chain A unless specified. if len(pdb_id) >= 5: Chosen_chain = pdb_id[4] # Chosen_chain = pdb_id[4].upper() else: assert (len(pdb_id) == 4) Chosen_chain = "A" elif chain == "-1" or chain == -1: Chosen_chain = getAllChains(fromFile, removeDNAchains=removeDNAchains) print(f"Chains: {Chosen_chain}") elif chain == "first": Chosen_chain = chains[0].id else: Chosen_chain = chain chains_to_remove = [ i for i, x in enumerate(chains) if x.id not in Chosen_chain ] fixer.removeChains(chains_to_remove) fixer.findMissingResidues() # add missing residues in the middle of a chain, not ones at the start or end of the chain. chains = list(fixer.topology.chains()) keys = fixer.missingResidues.keys() if verbose: print("chains to remove", chains_to_remove) print("missing residues: ", keys) if not addMissingResidues: for key in list(keys): del fixer.missingResidues[key] else: if removeTwoEndsMissingResidues: for key in list(keys): chain_tmp = chains[key[0]] if key[1] == 0 or key[1] == len(list( chain_tmp.residues())): del fixer.missingResidues[key] fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() if removeHeterogens: fixer.removeHeterogens(keepWater=False) fixer.findMissingAtoms() try: fixer.addMissingAtoms() except: print("Unable to add missing atoms") continue fixer.addMissingHydrogens(7.0) PDBFile.writeFile(fixer.topology, fixer.positions, open(os.path.join(toFolder, pdbFile), 'w'), keepIds=keepIds)
from pdbfixer import PDBFixer from simtk.openmm.app import PDBFile fixer = PDBFixer(filename='3UE4.pdb') fixer.removeChains(chainIds=['B']) # Without fixer.missingResidues = {}, fixer.addMissingAtoms() throw an exception # and if I call fixer.findMissingResidues() several terminal residues are added fixer.missingResidues = {} fixer.findMissingAtoms() fixer.addMissingAtoms() fixer.removeHeterogens(keepWater=False) fixer.addMissingHydrogens(7.0) PDBFile.writeFile(fixer.topology, fixer.positions, open('3UE4-pdbfixer.pdb', 'w'))
from simtk.openmm import app from pdbfixer import PDBFixer fixer = PDBFixer(pdbid="2z9j") fixer.removeChains(chainIds=['B']) fixer.removeHeterogens(keepWater=False) app.PDBFile.writeFile(fixer.topology, fixer.positions, open("input/2z9j_clean.pdb", "w"))
def prepare_pdb(pdb, chains='A', ff=('amber99sbildn.xml', 'tip3p.xml'), ph=7, pad=10 * unit.angstroms, nbonded=app.PME, constraints=app.HBonds, crystal_water=True): """ Fetch, solvate and minimize a protein PDB structure. Parameters ---------- pdb : str PDB Id. chains : str or list Chain(s) to keep in the system. ff : tuple of xml ff files. Forcefields for parametrization. ph : float pH value for adding missing hydrogens. pad: Quantity object Padding around macromolecule for filling box with water. nbonded : object The method to use for nonbonded interactions. Allowed values are NoCutoff, CutoffNonPeriodic, CutoffPeriodic, Ewald, PME, or LJPME. constraints : object Specifies which bonds and angles should be implemented with constraints. Allowed values are None, HBonds, AllBonds, or HAngles. crystal_water : bool Keep crystal water. """ # Load forcefield. logger.info('Retrieving %s from PDB...', pdb) ff = app.ForceField(*ff) # Retrieve structure from PDB. fixer = PDBFixer(pdbid=pdb) # Remove unselected chains. logger.info('Removing all chains but %s', chains) all_chains = [c.id for c in fixer.topology.chains()] fixer.removeChains(chainIds=set(all_chains) - set(chains)) # Find missing residues. logger.info('Finding missing residues...') fixer.findMissingResidues() # Replace nonstandard residues. logger.info('Replacing nonstandard residues...') fixer.findNonstandardResidues() fixer.replaceNonstandardResidues() # Add missing atoms. logger.info('Adding missing atoms...') fixer.findMissingAtoms() fixer.addMissingAtoms() # Remove heterogens. logger.info('Removing heterogens...') fixer.removeHeterogens(keepWater=crystal_water) # Add missing hydrogens. logger.info('Adding missing hydrogens appropriate for pH %s', ph) fixer.addMissingHydrogens(ph) if nbonded in [app.PME, app.CutoffPeriodic, app.Ewald]: # Add solvent. logger.info('Adding solvent...') fixer.addSolvent(padding=pad) # Write PDB file. logger.info('Writing PDB file to "%s"...', '%s-pdbfixer.pdb' % pdb) app.PDBFile.writeFile(fixer.topology, fixer.positions, open('%s-pdbfixer.pdb' % pdb, 'w')) # Create OpenMM System. logger.info('Creating OpenMM system...') system = ff.createSystem(fixer.topology, nonbondedMethod=nbonded, constraints=constraints, rigidWater=True, removeCMMotion=False) # Minimimze to update positions. logger.info('Minimizing...') integrator = mm.VerletIntegrator(1.0 * unit.femtosecond) context = mm.Context(system, integrator) context.setPositions(fixer.positions) mm.LocalEnergyMinimizer.minimize(context) # pylint: disable=unexpected-keyword-arg, no-value-for-parameter state = context.getState(getPositions=True) fixer.positions = state.getPositions() # Write final coordinates. logger.info('Writing PDB file to "%s"...', '%s-minimized.pdb' % pdb) with open('%s-minimized.pdb' % pdb, 'w') as fp: app.PDBFile.writeFile(fixer.topology, fixer.positions, fp) # Serialize final coordinates. logger.info('Serializing to XML...') serialize_system(context, system, integrator)
def main(args): pdbcode = args[0] if len(pdbcode) != 4: print("Please enter a correct 4 letter pdbid") sys.exit(0) call("pdbfixer --pdbid " + pdbcode + " --output=temp.pdb --add-atoms=heavy --keep-heterogens=none", shell=True) fixer = PDBFixer(filename="temp.pdb") found_A = False found_B = False found_C = False num_chains = len(list(fixer.topology.chains())) chains = fixer.topology.chains() chains_to_remove = [] for i, c in enumerate(chains): num_residues = len(list(c.residues())) if num_residues > 250: if not found_A: found_A = True c.id = "A" else: c.id = "Z" chains_to_remove.append(i) elif num_residues > 50 and num_residues < 150: if not found_B: found_B = True c.id = "B" else: c.id = "Z" chains_to_remove.append(i) elif num_residues <= 15: if not found_C: found_C = True c.id = "C" else: c.id = "Z" chains_to_remove.append(i) else: c.id = "Z" chains_to_remove.append(i) #print "ERROR: Found chains with weird number of residues:", num_residues #sys.exit(0) fixer.removeChains(chains_to_remove) chains = fixer.topology.chains() chain_lengths = [] for c in chains: num_residues = len(list(c.residues())) chain_lengths.append(num_residues) PDBFile.writeFile(fixer.topology, fixer.positions, open(pdbcode + ".pdb", 'w')) call(["rm temp.pdb"], shell=True) if chain_lengths[1] < chain_lengths[2]: call(["grep \"[A-Z] B \" " + pdbcode + ".pdb > temp.pdb"], shell=True) call(["sed -i \"/[A-Z] B /d\" " + pdbcode + ".pdb"], shell=True) call(["sed -i \"/END/d\" " + pdbcode + ".pdb"], shell=True) call(["sed -i \"/CONECT/d\" " + pdbcode + ".pdb"], shell=True) call(["less temp.pdb >> " + pdbcode + ".pdb"], shell=True) fixer = PDBFixer(filename=pdbcode + ".pdb") PDBFile.writeFile(fixer.topology, fixer.positions, open(pdbcode + ".pdb", 'w')) call(["rm temp.pdb"], shell=True)