def runPDB2PQR(pdblist, ff, outname="", ph=None, verbose=False, selectedExtensions=[], extensionOptions=utilities.ExtraOptions(), ph_calc_method=None, ph_calc_options=None, clean=False, neutraln=False, neutralc=False, ligand=None, assign_only=False, chain=False, drop_water=False, debump=True, opt=True, typemap=False, userff=None, usernames=None, ffout=None, commandLine=None, include_old_header=False): """ Run the PDB2PQR Suite Arguments: pdblist: The list of objects that was read from the PDB file given as input (list) ff: The name of the forcefield (string) Keyword Arguments: outname: The name of the desired output file ph: The desired ph of the system (float) verbose: When True, script will print information to stdout When False, no detailed information will be printed (float) extensions: List of extensions to run extensionOptions:optionParser like option object that is passed to each object. propkaOptions:optionParser like option object for propka30. clean: only return original PDB file in aligned format. neutraln: Make the N-terminus of this protein neutral neutralc: Make the C-terminus of this protein neutral ligand: Calculate the parameters for the ligand in mol2 format at the given path. assign_only: Only assign charges and radii - do not add atoms, debump, or optimize. chain: Keep the chain ID in the output PQR file drop_water: Remove water molecules from output debump: When 1, debump heavy atoms (int) opt: When 1, run hydrogen optimization (int) typemap: Create Typemap output. userff: The user created forcefield file to use. Overrides ff. usernames: The user created names file to use. Required if using userff. ffout: Instead of using the standard canonical naming scheme for residue and atom names, + use the names from the given forcefield commandLine: command line used (if any) to launch the program. Included in output header. include_old_header: Include most of the PDB header in output. pdb2pka_params: parameters for running pdb2pka. Returns header: The PQR file header (string) lines: The PQR file atoms (list) missedligandresidues: A list of ligand residue names whose charges could not be assigned (ligand) """ pkaname = "" lines = [] Lig = None atomcount = 0 # Count the number of ATOM records in pdb outroot = utilities.getPQRBaseFileName(outname) if ph_calc_method == 'propka': pkaname = outroot + ".propka" #TODO: What? Shouldn't it be up to propka on how to handle this? if os.path.isfile(pkaname): os.remove(pkaname) start = time.time() if verbose: print "Beginning PDB2PQR...\n" myDefinition = Definition() if verbose: print "Parsed Amino Acid definition file." if drop_water: # Remove the waters pdblist_new = [] for record in pdblist: if isinstance(record, (HETATM, ATOM, SIGATM, SEQADV)): if record.resName in WAT.water_residue_names: continue pdblist_new.append(record) pdblist = pdblist_new # Check for the presence of a ligand! This code is taken from pdb2pka/pka.py if not ligand is None: from pdb2pka.ligandclean import ligff myProtein, myDefinition, Lig = ligff.initialize( myDefinition, ligand, pdblist, verbose) for atom in myProtein.getAtoms(): if atom.type == "ATOM": atomcount += 1 else: myProtein = Protein(pdblist, myDefinition) if verbose: print "Created protein object -" print "\tNumber of residues in protein: %s" % myProtein.numResidues() print "\tNumber of atoms in protein : %s" % myProtein.numAtoms() myRoutines = Routines(myProtein, verbose) for residue in myProtein.getResidues(): multoccupancy = 0 for atom in residue.getAtoms(): if atom.altLoc != "": multoccupancy = 1 txt = "Warning: multiple occupancies found: %s in %s\n" % ( atom.name, residue) sys.stderr.write(txt) if multoccupancy == 1: myRoutines.warnings.append( "WARNING: multiple occupancies found in %s,\n" % (residue)) myRoutines.warnings.append( " at least one of the instances is being ignored.\n") myRoutines.setTermini(neutraln, neutralc) myRoutines.updateBonds() if clean: header = "" lines = myProtein.printAtoms(myProtein.getAtoms(), chain) # Process the extensions for ext in selectedExtensions: module = extensions.extDict[ext] #TODO: figure out a way to do this without crashing... #tempRoutines = copy.deepcopy(myRoutines) module.run_extension(myRoutines, outroot, extensionOptions) if verbose: print "Total time taken: %.2f seconds\n" % (time.time() - start) #Be sure to include None for missed ligand residues return header, lines, None #remove any future need to convert to lower case if not ff is None: ff = ff.lower() if not ffout is None: ffout = ffout.lower() if not assign_only: # It is OK to process ligands with no ATOM records in the pdb if atomcount == 0 and Lig != None: pass else: myRoutines.findMissingHeavy() myRoutines.updateSSbridges() if debump: myRoutines.debumpProtein() if ph_calc_method == 'propka': myRoutines.runPROPKA(ph, ff, outroot, pkaname, ph_calc_options) elif ph_calc_method == 'pdb2pka': myRoutines.runPDB2PKA(ph, ff, pdblist, ligand, verbose, ph_calc_options) myRoutines.addHydrogens() myhydRoutines = hydrogenRoutines(myRoutines) if debump: myRoutines.debumpProtein() if opt: myhydRoutines.setOptimizeableHydrogens() myhydRoutines.initializeFullOptimization() myhydRoutines.optimizeHydrogens() else: myhydRoutines.initializeWaterOptimization() myhydRoutines.optimizeHydrogens() # Special for GLH/ASH, since both conformations were added myhydRoutines.cleanup() else: # Special case for HIS if using assign-only for residue in myProtein.getResidues(): if isinstance(residue, HIS): myRoutines.applyPatch("HIP", residue) myRoutines.setStates() myForcefield = Forcefield(ff, myDefinition, userff, usernames) hitlist, misslist = myRoutines.applyForcefield(myForcefield) ligsuccess = 0 if not ligand is None: # If this is independent, we can assign charges and radii here for residue in myProtein.getResidues(): if isinstance(residue, LIG): templist = [] Lig.make_up2date(residue) for atom in residue.getAtoms(): atom.ffcharge = Lig.ligand_props[atom.name]["charge"] atom.radius = Lig.ligand_props[atom.name]["radius"] if atom in misslist: misslist.pop(misslist.index(atom)) templist.append(atom) charge = residue.getCharge() if abs(charge - int(charge)) > 0.001: # Ligand parameterization failed myRoutines.warnings.append( "WARNING: PDB2PQR could not successfully parameterize\n" ) myRoutines.warnings.append( " the desired ligand; it has been left out of\n" ) myRoutines.warnings.append(" the PQR file.\n") myRoutines.warnings.append("\n") # remove the ligand myProtein.residues.remove(residue) for myChain in myProtein.chains: if residue in myChain.residues: myChain.residues.remove(residue) else: ligsuccess = 1 # Mark these atoms as hits hitlist = hitlist + templist # Temporary fix; if ligand was successful, pull all ligands from misslist if ligsuccess: templist = misslist[:] for atom in templist: if isinstance(atom.residue, (Amino, Nucleic)): continue misslist.remove(atom) # Create the Typemap if typemap: typemapname = "%s-typemap.html" % outroot myProtein.createHTMLTypeMap(myDefinition, typemapname) # Grab the protein charge reslist, charge = myProtein.getCharge() # If we want a different naming scheme, use that if not ffout is None: scheme = ffout userff = None # Currently not supported if scheme != ff: myNameScheme = Forcefield(scheme, myDefinition, userff) else: myNameScheme = myForcefield myRoutines.applyNameScheme(myNameScheme) header = printPQRHeader(pdblist, misslist, reslist, charge, ff, myRoutines.getWarnings(), ph_calc_method, ph, ffout, commandLine, include_old_header=include_old_header) lines = myProtein.printAtoms(hitlist, chain) # Determine if any of the atoms in misslist were ligands missedligandresidues = [] for atom in misslist: if isinstance(atom.residue, (Amino, Nucleic)): continue if atom.resName not in missedligandresidues: missedligandresidues.append(atom.resName) # Process the extensions for ext in selectedExtensions: module = extensions.extDict[ext] #TODO: figure out a way to do this without crashing... #tempRoutines = copy.deepcopy(myRoutines) module.run_extension(myRoutines, outroot, extensionOptions) if verbose: print "Total time taken: %.2f seconds\n" % (time.time() - start) return header, lines, missedligandresidues
def runPDB2PQR(pdblist, ff, outname = "", ph = None, verbose = False, selectedExtensions = [], extensionOptions = utilities.ExtraOptions(), propkaOptions = None, clean = False, neutraln = False, neutralc = False, ligand = None, assign_only = False, chain = False, debump = True, opt = True, typemap = False, userff = None, usernames = None, ffout = None, commandLine=None, include_old_header=False): """ Run the PDB2PQR Suite Arguments: pdblist: The list of objects that was read from the PDB file given as input (list) ff: The name of the forcefield (string) Keyword Arguments: outname: The name of the desired output file ph: The desired ph of the system (float) verbose: When True, script will print information to stdout When False, no detailed information will be printed (float) extensions: List of extensions to run extensionOptions:optionParser like option object that is passed to each object. propkaOptions:optionParser like option object for propka30. clean: only return original PDB file in aligned format. neutraln: Make the N-terminus of this protein neutral neutralc: Make the C-terminus of this protein neutral ligand: Calculate the parameters for the ligand in mol2 format at the given path. assign_only: Only assign charges and radii - do not add atoms, debump, or optimize. chain: Keep the chain ID in the output PQR file debump: When 1, debump heavy atoms (int) opt: When 1, run hydrogen optimization (int) typemap: Create Typemap output. userff: The user created forcefield file to use. Overrides ff. usernames: The user created names file to use. Required if using userff. ffout: Instead of using the standard canonical naming scheme for residue and atom names, + use the names from the given forcefield commandLine: command line used (if any) to launch the program. Included in output header. include_old_header: Include most of the PDB header in output. Returns header: The PQR file header (string) lines: The PQR file atoms (list) missedligandresidues: A list of ligand residue names whose charges could not be assigned (ligand) """ pkaname = "" outroot = "" lines = [] Lig = None atomcount = 0 # Count the number of ATOM records in pdb outroot = utilities.getPQRBaseFileName(outname) if not ph is None: pka = True pkaname = outroot + ".propka" #TODO: What? Shouldn't it be up to propka on how to handle this? if os.path.isfile(pkaname): os.remove(pkaname) else: pka = False start = time.time() if verbose: print "Beginning PDB2PQR...\n" myDefinition = Definition() if verbose: print "Parsed Amino Acid definition file." # Check for the presence of a ligand! This code is taken from pdb2pka/pka.py if not ligand is None: from pdb2pka.ligandclean import ligff myProtein, myDefinition, Lig = ligff.initialize(myDefinition, ligand, pdblist, verbose) for atom in myProtein.getAtoms(): if atom.type == "ATOM": atomcount += 1 else: myProtein = Protein(pdblist, myDefinition) if verbose: print "Created protein object -" print "\tNumber of residues in protein: %s" % myProtein.numResidues() print "\tNumber of atoms in protein : %s" % myProtein.numAtoms() myRoutines = Routines(myProtein, verbose) for residue in myProtein.getResidues(): multoccupancy = 0 for atom in residue.getAtoms(): if atom.altLoc != "": multoccupancy = 1 txt = "Warning: multiple occupancies found: %s in %s\n" % (atom.name, residue) sys.stderr.write(txt) if multoccupancy == 1: myRoutines.warnings.append("WARNING: multiple occupancies found in %s,\n" % (residue)) myRoutines.warnings.append(" at least one of the instances is being ignored.\n") myRoutines.setTermini(neutraln, neutralc) myRoutines.updateBonds() if clean: header = "" lines = myProtein.printAtoms(myProtein.getAtoms(), chain) # Process the extensions for ext in selectedExtensions: module = extensions.extDict[ext] tempRoutines = copy.deepcopy(myRoutines) module.run_extension(tempRoutines, outroot, extensionOptions) if verbose: print "Total time taken: %.2f seconds\n" % (time.time() - start) #Be sure to include None for missed ligand residues return header, lines, None #remove any future need to convert to lower case if not ff is None: ff = ff.lower() if not ffout is None: ffout = ffout.lower() if not assign_only: # It is OK to process ligands with no ATOM records in the pdb if atomcount == 0 and Lig != None: pass else: myRoutines.findMissingHeavy() myRoutines.updateSSbridges() if debump: myRoutines.debumpProtein() if pka: myRoutines.runPROPKA(ph, ff, outroot, pkaname, propkaOptions) myRoutines.addHydrogens() myhydRoutines = hydrogenRoutines(myRoutines) if debump: myRoutines.debumpProtein() if opt: myhydRoutines.setOptimizeableHydrogens() myhydRoutines.initializeFullOptimization() myhydRoutines.optimizeHydrogens() else: myhydRoutines.initializeWaterOptimization() myhydRoutines.optimizeHydrogens() # Special for GLH/ASH, since both conformations were added myhydRoutines.cleanup() else: # Special case for HIS if using assign-only for residue in myProtein.getResidues(): if isinstance(residue, HIS): myRoutines.applyPatch("HIP", residue) myRoutines.setStates() myForcefield = Forcefield(ff, myDefinition, userff, usernames) hitlist, misslist = myRoutines.applyForcefield(myForcefield) ligsuccess = 0 if not ligand is None: # If this is independent, we can assign charges and radii here for residue in myProtein.getResidues(): if isinstance(residue, LIG): templist = [] Lig.make_up2date(residue) for atom in residue.getAtoms(): atom.ffcharge = Lig.ligand_props[atom.name]["charge"] atom.radius = Lig.ligand_props[atom.name]["radius"] if atom in misslist: misslist.pop(misslist.index(atom)) templist.append(atom) charge = residue.getCharge() if abs(charge - int(charge)) > 0.001: # Ligand parameterization failed myRoutines.warnings.append("WARNING: PDB2PQR could not successfully parameterize\n") myRoutines.warnings.append(" the desired ligand; it has been left out of\n") myRoutines.warnings.append(" the PQR file.\n") myRoutines.warnings.append("\n") # remove the ligand myProtein.residues.remove(residue) for myChain in myProtein.chains: if residue in myChain.residues: myChain.residues.remove(residue) else: ligsuccess = 1 # Mark these atoms as hits hitlist = hitlist + templist # Temporary fix; if ligand was successful, pull all ligands from misslist if ligsuccess: templist = misslist[:] for atom in templist: if isinstance(atom.residue, (Amino, Nucleic)): continue misslist.remove(atom) # Create the Typemap if typemap: typemapname = "%s-typemap.html" % outroot myProtein.createHTMLTypeMap(myDefinition, typemapname) # Grab the protein charge reslist, charge = myProtein.getCharge() # If we want a different naming scheme, use that if not ffout is None: scheme = ffout userff = None # Currently not supported if scheme != ff: myNameScheme = Forcefield(scheme, myDefinition, userff) else: myNameScheme = myForcefield myRoutines.applyNameScheme(myNameScheme) header = printPQRHeader(pdblist, misslist, reslist, charge, ff, myRoutines.getWarnings(), ph, ffout, commandLine, include_old_header=include_old_header) lines = myProtein.printAtoms(hitlist, chain) # Determine if any of the atoms in misslist were ligands missedligandresidues = [] for atom in misslist: if isinstance(atom.residue, (Amino, Nucleic)): continue if atom.resName not in missedligandresidues: missedligandresidues.append(atom.resName) # Process the extensions for ext in selectedExtensions: module = extensions.extDict[ext] #TODO: figure out a way to do this without crashing... #tempRoutines = copy.deepcopy(myRoutines) module.run_extension(myRoutines, outroot, extensionOptions) if verbose: print "Total time taken: %.2f seconds\n" % (time.time() - start) return header, lines, missedligandresidues