def pre_init(original_pdb_list=None, output_dir=None, ff=None, verbose=False, pdie=8.0, sdie=80, maps=None, xdiel=None, ydiel=None, zdiel=None, kappa=None, sd=None, ligand=None): """This function cleans the PDB and prepares the APBS input file Prepares the output folder.""" #prepare the output directory output_dir = os.path.abspath(output_dir) try: os.makedirs(output_dir) except OSError: if not os.path.isdir(output_dir): raise ValueError('Target directory is a file! Aborting.') workspace_dir = os.path.join(output_dir,'workspace') try: os.makedirs(workspace_dir) except OSError: if not os.path.isdir(output_dir): raise ValueError('Target directory is a file! Aborting.') # # remove hydrogen atoms # working_pdb_filename = os.path.join(workspace_dir,'working.pdb') pka_help.dump_protein_no_hydrogens(original_pdb_list, working_pdb_filename) # # Get the PDBfile # pdbfile = getPDBFile(working_pdb_filename) pdblist, errlist = readPDB(pdbfile) if verbose: print "Beginning PDB2PKA...\n" # # Read the definition file # myDefinition = Definition() ligand_titratable_groups=None # # # Choose whether to include the ligand or not # # Add the ligand to the pdb2pqr arrays # Lig=None if ligand is None: myProtein = Protein(pdblist, myDefinition) else: from pdb2pka.ligandclean import ligff myProtein, myDefinition, Lig = ligff.initialize(myDefinition, ligand, pdblist, verbose) # # ======================================================================= # # We have identified the structural elements, now contiue with the setup # # Print something for some reason? # if verbose: print "Created protein object -" print "\tNumber of residues in protein: %s" % myProtein.numResidues() print "\tNumber of atoms in protein : %s" % myProtein.numAtoms() # # Set up all other routines # myRoutines = Routines(myProtein, verbose) #myDefinition) myRoutines.updateResidueTypes() myRoutines.updateSSbridges() myRoutines.updateBonds() myRoutines.setTermini() myRoutines.updateInternalBonds() myRoutines.applyNameScheme(Forcefield(ff, myDefinition, None)) myRoutines.findMissingHeavy() myRoutines.addHydrogens() myRoutines.debumpProtein() #myRoutines.randomizeWaters() myProtein.reSerialize() # # Inject the information on hydrogen conformations in the HYDROGENS.DAT arrays # We get this information from ligand_titratable_groups # from src.hydrogens import hydrogenRoutines myRoutines.updateInternalBonds() myRoutines.calculateDihedralAngles() myhydRoutines = hydrogenRoutines(myRoutines) # # Here we should inject the info!! # myhydRoutines.setOptimizeableHydrogens() myhydRoutines.initializeFullOptimization() myhydRoutines.optimizeHydrogens() myhydRoutines.cleanup() myRoutines.setStates() # # Choose the correct forcefield # myForcefield = Forcefield(ff, myDefinition, None) if Lig: hitlist, misslist = myRoutines.applyForcefield(myForcefield) # # Can we get charges for the ligand? # templist=[] ligsuccess=False for residue in myProtein.getResidues(): if isinstance(residue, LIG): templist = [] Lig.make_up2date(residue) net_charge=0.0 print 'Ligand',residue print 'Atom\tCharge\tRadius' for atom in residue.getAtoms(): if atom.mol2charge: atom.ffcharge=atom.mol2charge else: atom.ffcharge = Lig.ligand_props[atom.name]["charge"] # # Find the net charge # net_charge=net_charge+atom.ffcharge # # Assign radius # atom.radius = Lig.ligand_props[atom.name]["radius"] print '%s\t%6.4f\t%6.4f' %(atom.name,atom.ffcharge,atom.radius) if atom in misslist: misslist.pop(misslist.index(atom)) templist.append(atom) # # Store the charge and radius in the atom instance for later use # This really should be done in a nicer way, but this will do for now # atom.secret_radius=atom.radius atom.secret_charge=atom.ffcharge # # charge = residue.getCharge() if abs(charge - round(charge)) > 0.01: # Ligand parameterization failed myProtein.residues.remove(residue) raise Exception('Non-integer charge on ligand: %8.5f' %charge) else: ligsuccess = 1 # Mark these atoms as hits hitlist = hitlist + templist # # Print the net charge # print 'Net charge for ligand %s is: %5.3f' %(residue.name,net_charge) # # Temporary fix; if ligand was successful, pull all ligands from misslist # Not sure if this is needed at all here ...? (Jens wrote this) # if ligsuccess: templist = misslist[:] for atom in templist: if isinstance(atom.residue, Amino) or isinstance(atom.residue, Nucleic): continue misslist.remove(atom) if verbose: print "Created protein object (after processing myRoutines) -" print "\tNumber of residues in protein: %s" % myProtein.numResidues() print "\tNumber of atoms in protein : %s" % myProtein.numAtoms() # # Create the APBS input file # import src.psize size=src.psize.Psize() method="" async=0 split=0 igen = inputgen_pKa.inputGen(working_pdb_filename) # # For convenience # igen.pdie = pdie print 'Setting protein dielectric constant to ',igen.pdie igen.sdie=sdie igen.maps=maps if maps==1: print "Using dielectric and mobile ion-accessibility function maps in PBE" if xdiel: igen.xdiel = xdiel else: raise PDB2PKAError('X dielectric map is missing') if ydiel: igen.ydiel = ydiel else: raise PDB2PKAError("Y dielectric map is missing\n") if zdiel: igen.zdiel = zdiel else: raise PDB2PKAError("Z dielectric map is missing\n") print 'Setting dielectric function maps: %s, %s, %s'%(igen.xdiel,igen.ydiel,igen.zdiel) if kappa: igen.kappa = kappa else: raise PDB2PKAError("Mobile ion-accessibility map is missing\n") print 'Setting mobile ion-accessibility function map to: ',igen.kappa if sd: xdiel_smooth, ydiel_smooth, zdiel_smooth = smooth(xdiel,ydiel,zdiel) igen.xdiel = xdiel_smooth igen.ydiel = ydiel_smooth igen.zdiel = zdiel_smooth # # Return all we need # return output_dir, myProtein, myRoutines, myForcefield,igen, ligand_titratable_groups, maps, sd
def startpKa(): """ Function for starting pKa script from the command line. Returns protein: The protein object as generated by PDB2PQR routines: The routines object as generated by PDB2PQR forcefield: The forcefield object as generated by PDB2PQR """ print print 'PDB2PQR pKa calculations' print parser = optparse.OptionParser() ## ## set optparse options ## parser.add_option( '-v','--verbose', dest='verbose', action="store_true", default=False, ) parser.add_option( '--pdie', dest='pdie', default=8, type='int', help='<protein dielectric constant>', ) parser.add_option( '--sdie', dest='sdie', default=80, type='int', help='<solvent dielectric constant>', ) parser.add_option( '--ff', dest='ff', type='choice', default='parse', choices=("amber","AMBER","charmm","CHARMM","parse","PARSE",), help='<force field (amber, charmm, parse)>', ) parser.add_option( '--resume', dest='resume', action="store_true", default=False, help='resume run from saved state.', ) parser.add_option( '--ligand', dest='ligand', type='str', help='<ligand in MOL2 format>', ) parser.add_option( '--maps', dest='maps', default=None, type='int', help='<1 for using provided 3D maps; 2 for genereting new maps>', ) parser.add_option( '--xdiel', dest='xdiel', default=None, type='str', help='<xdiel maps>', ) parser.add_option( '--ydiel', dest='ydiel', default=None, type='str', help='<ydiel maps>', ) parser.add_option( '--zdiel', dest='zdiel', default=None, type='str', help='<zdiel maps>', ) parser.add_option( '--kappa', dest='kappa', default=None, type='str', help='<ion-accessibility map>', ) parser.add_option( '--smooth', dest='sd', default=None, type='float', help='<st.dev [A] of Gaussian smooting of 3D maps at the boundary, bandthwith=3 st.dev>', ) # # Cut off energy for calculating non-charged-charged interaction energies # parser.add_option('--pairene',dest='pairene',type='float',default=1.0, help='Cutoff energy in kT for calculating non charged-charged interaction energies. Default: %default') # # Options for doing partial calculations # parser.add_option('--res_energy', dest='desolvation_res', default=[], action='append', type='string', help='Calculate desolvation energy and interaction energy for this residue in its default protonation state. Protonation states can be specified with the --protonation_state argument') parser.add_option('--PS_file',dest='PS_file',default='',type='string',action='store',help='Set protonation states according to the pdb2pka protonation state file (option --PS_file)') (options,args,) = parser.parse_args() ## ## parse optparse options ## ff = options.ff.lower() pdie = options.pdie verbose = options.verbose sdie = options.sdie maps = options.maps xdiel = options.xdiel ydiel = options.ydiel zdiel = options.zdiel kappa = options.kappa sd = options.sd # # Find the PDB file # if len(args) != 2: parser.error("Usage: pka.py [options] <pdbfile> <output directory>\n") input_path = args[0] output_path = args[1] ligand = None if options.ligand is not None: try: ligand = open(options.ligand, 'rU') except IOError: print 'Unable to find ligand file %s! Skipping...' % options.ligand #Set up the protien object #In the standalone version of pdb2pka this is redundent but needed so we emulate the #interface needed by pdb2pqr pdbfile = getPDBFile(input_path) pdblist, errlist = readPDB(pdbfile) if len(errlist) != 0 and verbose: print "Warning: %s is a non-standard PDB file.\n" %input_path print errlist # # Read the definition file # myDefinition = Definition() # # # Choose whether to include the ligand or not # # Add the ligand to the pdb2pqr arrays # if ligand is None: myProtein = Protein(pdblist, myDefinition) else: from pdb2pka.ligandclean import ligff myProtein, _, _ = ligff.initialize(myDefinition, ligand, pdblist, verbose) # # Call the pre_init function # return pre_init(protein=myProtein, output_dir=output_path, ff=ff, verbose=verbose, pdie=pdie, sdie=sdie, maps=maps, xdiel=xdiel, ydiel=ydiel, zdiel=zdiel, kappa=kappa, sd=sd, ligand=ligand),options
def runPDB2PQR(pdblist, ff, options): """ Run the PDB2PQR Suite Parameters pdblist: The list of objects that was read from the PDB file given as input (list) ff: The name of the forcefield (string) options: A dictionary of PDB2PQR options, including: verbose: When 1, script will print information to stdout When 0, no detailed information will be printed (int) debump: When 1, debump heavy atoms (int) opt: When 1, run hydrogen optimization (int) ph: The desired ph of the system (float) outname: The name of the desired output file Returns header: The PQR file header (string) lines: The PQR file atoms (list) missedligandresidues: A list of ligand residue names whose charges could not be assigned (ligand) """ ph = None pkaname = "" outname = "" outroot = "" typemapname = "" neutraln = None neutralc = None lines = [] Lig = None atomcount = 0 # Count the number of ATOM records in pdb # userff is CGI-based User Forcefield file object if "userff" in options: userff = options["userff"] else: userff = None if "usernames" in options: usernames = options["usernames"] else: usernames = None if "verbose" in options: verbose = 1 else: verbose = 0 if "opt" in options: optflag = 1 else: optflag = 0 if "typemap" in options: typemapflag = 1 else: typemapflag = 0 if "chain" in options: chainflag = 1 else: chainflag = 0 if "outname" not in options or options["outname"] == None: text = "Error: Output name not set!" raise ValueError, text else: outname = options["outname"] period = string.rfind(outname,".") if period > 0: outroot = outname[0:period] else: outroot = outname if "ph" in options: pka = 1 ph = options["ph"] pkaname = outroot + ".propka" if os.path.isfile(pkaname): os.remove(pkaname) else: pka = 0 typemapname = "%s-typemap.html" % outroot extmap = options["extensions"] start = time.time() if verbose: print "Beginning PDB2PQR...\n" myDefinition = Definition() if verbose: print "Parsed Amino Acid definition file." # Check for the presence of a ligand! This code is taken from pdb2pka/pka.py if "ligand" in options: from pdb2pka.ligandclean import ligff myProtein, myDefinition, Lig = ligff.initialize(myDefinition, options["ligand"], pdblist, verbose) for atom in myProtein.getAtoms(): if atom.type == "ATOM": atomcount += 1 else: myProtein = Protein(pdblist, myDefinition) if verbose: print "Created protein object -" print "\tNumber of residues in protein: %s" % myProtein.numResidues() print "\tNumber of atoms in protein : %s" % myProtein.numAtoms() myRoutines = Routines(myProtein, verbose) for residue in myProtein.getResidues(): multoccupancy = 0 for atom in residue.getAtoms(): if atom.altLoc != "": multoccupancy = 1 txt = "Warning: multiple occupancies found: %s in %s\n" % (atom.name, residue) sys.stderr.write(txt) if multoccupancy == 1: myRoutines.warnings.append("WARNING: multiple occupancies found in %s,\n" % (residue)) myRoutines.warnings.append(" at least one of the instances is being ignored.\n") if "neutraln" in options: neutraln = 1 if "neutralc" in options: neutralc = 1 myRoutines.setTermini(neutraln, neutralc) myRoutines.updateBonds() if "clean" in options: header = "" lines = myProtein.printAtoms(myProtein.getAtoms(), chainflag) # Process the extensions for ext in extmap: module = extmap[ext] call = "module.%s(myRoutines, outroot)" % ext eval(call) if verbose: print "Total time taken: %.2f seconds\n" % (time.time() - start) return header, lines if not "assign-only" in options: # It is OK to process ligands with no ATOM records in the pdb if atomcount == 0 and Lig != None: pass else: myRoutines.findMissingHeavy() myRoutines.updateSSbridges() if "debump" in options: myRoutines.debumpProtein() if pka: myRoutines.runPROPKA(ph, ff, pkaname) myRoutines.addHydrogens() myhydRoutines = hydrogenRoutines(myRoutines) if "debump" in options: myRoutines.debumpProtein() if optflag: myhydRoutines.setOptimizeableHydrogens() myhydRoutines.initializeFullOptimization() myhydRoutines.optimizeHydrogens() else: myhydRoutines = hydrogenRoutines(myRoutines) myhydRoutines.initializeWaterOptimization() myhydRoutines.optimizeHydrogens() # Special for GLH/ASH, since both conformations were added myhydRoutines.cleanup() else: # Special case for HIS if using assign-only for residue in myProtein.getResidues(): if isinstance(residue, HIS): myRoutines.applyPatch("HIP", residue) myRoutines.setStates() myForcefield = Forcefield(ff, myDefinition, userff, usernames) hitlist, misslist = myRoutines.applyForcefield(myForcefield) ligsuccess = 0 if "ligand" in options: # If this is independent, we can assign charges and radii here for residue in myProtein.getResidues(): if isinstance(residue, LIG): templist = [] Lig.make_up2date(residue) for atom in residue.getAtoms(): atom.ffcharge = Lig.ligand_props[atom.name]["charge"] atom.radius = Lig.ligand_props[atom.name]["radius"] if atom in misslist: misslist.pop(misslist.index(atom)) templist.append(atom) charge = residue.getCharge() if abs(charge - int(charge)) > 0.001: # Ligand parameterization failed myRoutines.warnings.append("WARNING: PDB2PQR could not successfully parameterize\n") myRoutines.warnings.append(" the desired ligand; it has been left out of\n") myRoutines.warnings.append(" the PQR file.\n") myRoutines.warnings.append("\n") # remove the ligand myProtein.residues.remove(residue) for chain in myProtein.chains: if residue in chain.residues: chain.residues.remove(residue) else: ligsuccess = 1 # Mark these atoms as hits hitlist = hitlist + templist # Temporary fix; if ligand was successful, pull all ligands from misslist if ligsuccess: templist = misslist[:] for atom in templist: if isinstance(atom.residue, Amino) or isinstance(atom.residue, Nucleic): continue misslist.remove(atom) # Creat the Typemap if typemapflag: myProtein.createHTMLTypeMap(myDefinition, typemapname) # Grab the protein charge reslist, charge = myProtein.getCharge() # If we want a different naming scheme, use that if "ffout" in options: scheme = options["ffout"] userff = None # Currently not supported if scheme != ff: myNameScheme = Forcefield(scheme, myDefinition, userff) else: myNameScheme = myForcefield myRoutines.applyNameScheme(myNameScheme) header = printPQRHeader(misslist, reslist, charge, ff, myRoutines.getWarnings(), options) lines = myProtein.printAtoms(hitlist, chainflag) # Determine if any of the atoms in misslist were ligands missedligandresidues = [] for atom in misslist: if isinstance(atom.residue, Amino) or isinstance(atom.residue, Nucleic): continue if atom.resName not in missedligandresidues: missedligandresidues.append(atom.resName) # Process the extensions for ext in extmap: module = extmap[ext] call = "module.%s(myRoutines, outroot)" % ext eval(call) if verbose: print "Total time taken: %.2f seconds\n" % (time.time() - start) return header, lines, missedligandresidues
def runPDB2PQR(pdblist, ff, outname = "", ph = None, verbose = False, extentions = [], ententionOptions = ExtentionOptions(), clean = False, neutraln = False, neutralc = False, ligand = None, assign_only = False, chain = False, debump = True, opt = True, typemap = False, userff = None, usernames = None, ffout = None): """ Run the PDB2PQR Suite Arguments: pdblist: The list of objects that was read from the PDB file given as input (list) ff: The name of the forcefield (string) Keyword Arguments: outname: The name of the desired output file ph: The desired ph of the system (float) verbose: When True, script will print information to stdout When False, no detailed information will be printed (float) extentions: List of extensions to run ententionOptions:optionParser like option object that is passed to each object. clean: only return original PDB file in aligned format. neutraln: Make the N-terminus of this protein neutral neutralc: Make the C-terminus of this protein neutral ligand: Calculate the parameters for the ligand in mol2 format at the given path. assign_only: Only assign charges and radii - do not add atoms, debump, or optimize. chain: Keep the chain ID in the output PQR file debump: When 1, debump heavy atoms (int) opt: When 1, run hydrogen optimization (int) typemap: Create Typemap output. userff: The user created forcefield file to use. Overrides ff. usernames: The user created names file to use. Required if using userff. ffout: Instead of using the standard canonical naming scheme for residue and atom names, + use the names from the given forcefield Returns header: The PQR file header (string) lines: The PQR file atoms (list) missedligandresidues: A list of ligand residue names whose charges could not be assigned (ligand) """ pkaname = "" outroot = "" lines = [] Lig = None atomcount = 0 # Count the number of ATOM records in pdb period = string.rfind(outname,".") if period > 0: outroot = outname[0:period] else: outroot = outname if not ph is None: pka = True pkaname = outroot + ".propka" if os.path.isfile(pkaname): os.remove(pkaname) else: pka = False start = time.time() if verbose: print "Beginning PDB2PQR...\n" myDefinition = Definition() if verbose: print "Parsed Amino Acid definition file." # Check for the presence of a ligand! This code is taken from pdb2pka/pka.py if not ligand is None: from pdb2pka.ligandclean import ligff myProtein, myDefinition, Lig = ligff.initialize(myDefinition, ligand, pdblist, verbose) for atom in myProtein.getAtoms(): if atom.type == "ATOM": atomcount += 1 else: myProtein = Protein(pdblist, myDefinition) if verbose: print "Created protein object -" print "\tNumber of residues in protein: %s" % myProtein.numResidues() print "\tNumber of atoms in protein : %s" % myProtein.numAtoms() myRoutines = Routines(myProtein, verbose) for residue in myProtein.getResidues(): multoccupancy = 0 for atom in residue.getAtoms(): if atom.altLoc != "": multoccupancy = 1 txt = "Warning: multiple occupancies found: %s in %s\n" % (atom.name, residue) sys.stderr.write(txt) if multoccupancy == 1: myRoutines.warnings.append("WARNING: multiple occupancies found in %s,\n" % (residue)) myRoutines.warnings.append(" at least one of the instances is being ignored.\n") myRoutines.setTermini(neutraln, neutralc) myRoutines.updateBonds() if clean: header = "" lines = myProtein.printAtoms(myProtein.getAtoms(), chain) # Process the extensions # TODO: kill the eval call. for ext in extentions: module = extensions.extDict[ext] call = "module.%s(myRoutines, outroot)" % ext eval(call) if verbose: print "Total time taken: %.2f seconds\n" % (time.time() - start) #Be sure to include None for missed ligand residues return header, lines, None #remove any future need to convert to lower case if not ff is None: ff = ff.lower() if not ffout is None: ffout = ffout.lower() if not assign_only: # It is OK to process ligands with no ATOM records in the pdb if atomcount == 0 and Lig != None: pass else: myRoutines.findMissingHeavy() myRoutines.updateSSbridges() if debump: myRoutines.debumpProtein() if pka: myRoutines.runPROPKA(ph, ff, pkaname) myRoutines.addHydrogens() myhydRoutines = hydrogenRoutines(myRoutines) if debump: myRoutines.debumpProtein() if opt: myhydRoutines.setOptimizeableHydrogens() myhydRoutines.initializeFullOptimization() myhydRoutines.optimizeHydrogens() else: myhydRoutines = hydrogenRoutines(myRoutines) myhydRoutines.initializeWaterOptimization() myhydRoutines.optimizeHydrogens() # Special for GLH/ASH, since both conformations were added myhydRoutines.cleanup() else: # Special case for HIS if using assign-only for residue in myProtein.getResidues(): if isinstance(residue, HIS): myRoutines.applyPatch("HIP", residue) myRoutines.setStates() myForcefield = Forcefield(ff, myDefinition, userff, usernames) hitlist, misslist = myRoutines.applyForcefield(myForcefield) ligsuccess = 0 if not ligand is None: # If this is independent, we can assign charges and radii here for residue in myProtein.getResidues(): if isinstance(residue, LIG): templist = [] Lig.make_up2date(residue) for atom in residue.getAtoms(): atom.ffcharge = Lig.ligand_props[atom.name]["charge"] atom.radius = Lig.ligand_props[atom.name]["radius"] if atom in misslist: misslist.pop(misslist.index(atom)) templist.append(atom) charge = residue.getCharge() if abs(charge - int(charge)) > 0.001: # Ligand parameterization failed myRoutines.warnings.append("WARNING: PDB2PQR could not successfully parameterize\n") myRoutines.warnings.append(" the desired ligand; it has been left out of\n") myRoutines.warnings.append(" the PQR file.\n") myRoutines.warnings.append("\n") # remove the ligand myProtein.residues.remove(residue) for myChain in myProtein.chains: if residue in myChain.residues: myChain.residues.remove(residue) else: ligsuccess = 1 # Mark these atoms as hits hitlist = hitlist + templist # Temporary fix; if ligand was successful, pull all ligands from misslist if ligsuccess: templist = misslist[:] for atom in templist: if isinstance(atom.residue, Amino) or isinstance(atom.residue, Nucleic): continue misslist.remove(atom) # Create the Typemap if typemap: typemapname = "%s-typemap.html" % outroot myProtein.createHTMLTypeMap(myDefinition, typemapname) # Grab the protein charge reslist, charge = myProtein.getCharge() # If we want a different naming scheme, use that if not ffout is None: scheme = ffout userff = None # Currently not supported if scheme != ff: myNameScheme = Forcefield(scheme, myDefinition, userff) else: myNameScheme = myForcefield myRoutines.applyNameScheme(myNameScheme) header = printPQRHeader(misslist, reslist, charge, ff, myRoutines.getWarnings(), ph, ffout) lines = myProtein.printAtoms(hitlist, chain) # Determine if any of the atoms in misslist were ligands missedligandresidues = [] for atom in misslist: if isinstance(atom.residue, Amino) or isinstance(atom.residue, Nucleic): continue if atom.resName not in missedligandresidues: missedligandresidues.append(atom.resName) # Process the extensions #TODO: kill the eval call. for ext in extentions: module = extensions.extDict[ext] call = "module.%s(myRoutines, outroot)" % ext eval(call) if verbose: print "Total time taken: %.2f seconds\n" % (time.time() - start) return header, lines, missedligandresidues
def runPDB2PQR(pdblist, ff, options): """ Run the PDB2PQR Suite Parameters pdblist: The list of objects that was read from the PDB file given as input (list) ff: The name of the forcefield (string) options: A dictionary of PDB2PQR options, including: verbose: When 1, script will print information to stdout When 0, no detailed information will be printed (int) debump: When 1, debump heavy atoms (int) opt: When 1, run hydrogen optimization (int) ph: The desired ph of the system (float) outname: The name of the desired output file Returns header: The PQR file header (string) lines: The PQR file atoms (list) missedligandresidues: A list of ligand residue names whose charges could not be assigned (ligand) """ ph = None pkaname = "" outname = "" outroot = "" typemapname = "" lines = [] # userff is CGI-based User Forcefield file object if "userff" in options: userff = options["userff"] else: userff = None if "verbose" in options: verbose = 1 else: verbose = 0 if "opt" in options: optflag = 1 else: optflag = 0 if "chain" in options: chainflag = 1 else: chainflag = 0 if "outname" not in options or options["outname"] == None: text = "Error: Output name not set!" raise ValueError, text else: outname = options["outname"] period = string.find(outname, ".") if period > 0: outroot = outname[0:period] else: outroot = outname if "ph" in options: pka = 1 ph = options["ph"] pkaname = outroot + ".propka" if os.path.isfile(pkaname): os.remove(pkaname) else: pka = 0 typemapname = "%s-typemap.html" % outroot extmap = options["extensions"] start = time.time() if verbose: print "Beginning PDB2PQR...\n" myDefinition = Definition() if verbose: print "Parsed Amino Acid definition file." # Check for the presence of a ligand! This code is taken from pdb2pka/pka.py if "ligand" in options: from pdb2pka.ligandclean import ligff myProtein, myDefinition, Lig = ligff.initialize( myDefinition, options["ligand"], pdblist, verbose) else: myProtein = Protein(pdblist, myDefinition) if verbose: print "Created protein object -" print "\tNumber of residues in protein: %s" % myProtein.numResidues() print "\tNumber of atoms in protein : %s" % myProtein.numAtoms() myRoutines = Routines(myProtein, verbose) myRoutines.setTermini() myRoutines.updateBonds() if "clean" in options: header = "" lines = myProtein.printAtoms(myProtein.getAtoms(), chainflag) # Process the extensions for ext in extmap: module = extmap[ext] call = "module.%s(myRoutines, outroot)" % ext eval(call) if verbose: print "Total time taken: %.2f seconds\n" % (time.time() - start) return header, lines if not "assign-only" in options: myRoutines.findMissingHeavy() myRoutines.updateSSbridges() if "debump" in options: myRoutines.debumpProtein() if pka: myRoutines.runPROPKA(ph, ff, pkaname) myRoutines.addHydrogens() if optflag: myhydRoutines = hydrogenRoutines(myRoutines) myhydRoutines.setOptimizeableHydrogens() if "debump" in options: myRoutines.debumpProtein() if optflag: myhydRoutines.initializeFullOptimization() myhydRoutines.optimizeHydrogens() else: myhydRoutines = hydrogenRoutines(myRoutines) myhydRoutines.initializeWaterOptimization() myhydRoutines.optimizeHydrogens() else: # Special case for HIS if using assign-only for residue in myProtein.getResidues(): if isinstance(residue, HIS): myRoutines.applyPatch("HIP", residue) myRoutines.setStates() myForcefield = Forcefield(ff, myDefinition, userff) hitlist, misslist = myRoutines.applyForcefield(myForcefield) ligsuccess = 0 if "ligand" in options: # If this is independent, we can assign charges and radii here for residue in myProtein.getResidues(): if isinstance(residue, LIG): templist = [] Lig.make_up2date(residue) for atom in residue.getAtoms(): atom.ffcharge = Lig.ligand_props[atom.name]["charge"] atom.radius = Lig.ligand_props[atom.name]["radius"] if atom in misslist: misslist.pop(misslist.index(atom)) templist.append(atom) charge = residue.getCharge() if abs(charge - int(charge)) > 0.001: # Ligand parameterization failed myRoutines.warnings.append( "WARNING: PDB2PQR could not successfully parameterize\n" ) myRoutines.warnings.append( " the desired ligand; it has been left out of\n" ) myRoutines.warnings.append(" the PQR file.\n") myRoutines.warnings.append("\n") # remove the ligand myProtein.residues.remove(residue) for chain in myProtein.chains: if residue in chain.residues: chain.residues.remove(residue) else: ligsuccess = 1 # Mark these atoms as hits hitlist = hitlist + templist # Temporary fix; if ligand was successful, pull all ligands from misslist if ligsuccess: templist = misslist[:] for atom in templist: if isinstance(atom.residue, Amino) or isinstance( atom.residue, Nucleic): continue misslist.remove(atom) # Creat the Typemap myProtein.createHTMLTypeMap(myDefinition, typemapname) # Grab the protein charge reslist, charge = myProtein.getCharge() # If we want a different naming scheme, use that if "ffout" in options: scheme = options["ffout"] userff = None # Currently not supported if scheme != ff: myNameScheme = Forcefield(scheme, myDefinition, userff) else: myNameScheme = myForcefield myRoutines.applyNameScheme(myNameScheme) header = printHeader(misslist, reslist, charge, ff, myRoutines.getWarnings(), options) lines = myProtein.printAtoms(hitlist, chainflag) # Determine if any of the atoms in misslist were ligands missedligandresidues = [] for atom in misslist: if isinstance(atom.residue, Amino) or isinstance( atom.residue, Nucleic): continue if atom.resName not in missedligandresidues: missedligandresidues.append(atom.resName) # Process the extensions for ext in extmap: module = extmap[ext] call = "module.%s(myRoutines, outroot)" % ext eval(call) if verbose: print "Total time taken: %.2f seconds\n" % (time.time() - start) return header, lines, missedligandresidues
def startpKa(): """ Function for starting pKa script from the command line. Returns protein: The protein object as generated by PDB2PQR routines: The routines object as generated by PDB2PQR forcefield: The forcefield object as generated by PDB2PQR """ print print 'PDB2PQR pKa calculations' print parser = optparse.OptionParser() ## ## set optparse options ## parser.add_option( '-v', '--verbose', dest='verbose', action="store_true", default=False, ) parser.add_option( '--pdie', dest='pdie', default=8, type='int', help='<protein dielectric constant>', ) parser.add_option( '--sdie', dest='sdie', default=80, type='int', help='<solvent dielectric constant>', ) parser.add_option( '--ff', dest='ff', type='choice', default='parse', choices=( "amber", "AMBER", "charmm", "CHARMM", "parse", "PARSE", ), help='<force field (amber, charmm, parse)>', ) parser.add_option( '--resume', dest='resume', action="store_true", default=False, help='resume run from saved state.', ) parser.add_option( '--ligand', dest='ligand', type='str', help='<ligand in MOL2 format>', ) parser.add_option( '--maps', dest='maps', default=None, type='int', help='<1 for using provided 3D maps; 2 for genereting new maps>', ) parser.add_option( '--xdiel', dest='xdiel', default=None, type='str', help='<xdiel maps>', ) parser.add_option( '--ydiel', dest='ydiel', default=None, type='str', help='<ydiel maps>', ) parser.add_option( '--zdiel', dest='zdiel', default=None, type='str', help='<zdiel maps>', ) parser.add_option( '--kappa', dest='kappa', default=None, type='str', help='<ion-accessibility map>', ) parser.add_option( '--smooth', dest='sd', default=None, type='float', help= '<st.dev [A] of Gaussian smooting of 3D maps at the boundary, bandthwith=3 st.dev>', ) # # Cut off energy for calculating non-charged-charged interaction energies # parser.add_option( '--pairene', dest='pairene', type='float', default=1.0, help= 'Cutoff energy in kT for calculating non charged-charged interaction energies. Default: %default' ) # # Options for doing partial calculations # parser.add_option( '--res_energy', dest='desolvation_res', default=[], action='append', type='string', help= 'Calculate desolvation energy and interaction energy for this residue in its default protonation state. Protonation states can be specified with the --protonation_state argument' ) parser.add_option( '--PS_file', dest='PS_file', default='', type='string', action='store', help= 'Set protonation states according to the pdb2pka protonation state file (option --PS_file)' ) ( options, args, ) = parser.parse_args() ## ## parse optparse options ## ff = options.ff.lower() pdie = options.pdie verbose = options.verbose sdie = options.sdie maps = options.maps xdiel = options.xdiel ydiel = options.ydiel zdiel = options.zdiel kappa = options.kappa sd = options.sd # # Find the PDB file # if len(args) != 2: parser.error("Usage: pka.py [options] <pdbfile> <output directory>\n") input_path = args[0] output_path = args[1] ligand = None if options.ligand is not None: try: ligand = open(options.ligand, 'rU') except IOError: print 'Unable to find ligand file %s! Skipping...' % options.ligand #Set up the protien object #In the standalone version of pdb2pka this is redundent but needed so we emulate the #interface needed by pdb2pqr pdbfile = getPDBFile(input_path) pdblist, errlist = readPDB(pdbfile) if len(errlist) != 0 and verbose: print "Warning: %s is a non-standard PDB file.\n" % input_path print errlist # # Read the definition file # myDefinition = Definition() # # # Choose whether to include the ligand or not # # Add the ligand to the pdb2pqr arrays # if ligand is None: myProtein = Protein(pdblist, myDefinition) else: from pdb2pka.ligandclean import ligff myProtein, _, _ = ligff.initialize(myDefinition, ligand, pdblist, verbose) # # Call the pre_init function # return pre_init(protein=myProtein, output_dir=output_path, ff=ff, verbose=verbose, pdie=pdie, sdie=sdie, maps=maps, xdiel=xdiel, ydiel=ydiel, zdiel=zdiel, kappa=kappa, sd=sd, ligand=ligand), options
def pre_init(original_pdb_list=None, output_dir=None, ff=None, verbose=False, pdie=8.0, sdie=80, maps=None, xdiel=None, ydiel=None, zdiel=None, kappa=None, sd=None, ligand=None): """This function cleans the PDB and prepares the APBS input file Prepares the output folder.""" #prepare the output directory output_dir = os.path.abspath(output_dir) try: os.makedirs(output_dir) except OSError: if not os.path.isdir(output_dir): raise ValueError('Target directory is a file! Aborting.') workspace_dir = os.path.join(output_dir, 'workspace') try: os.makedirs(workspace_dir) except OSError: if not os.path.isdir(output_dir): raise ValueError('Target directory is a file! Aborting.') # # remove hydrogen atoms # working_pdb_filename = os.path.join(workspace_dir, 'working.pdb') pka_help.dump_protein_no_hydrogens(original_pdb_list, working_pdb_filename) # # Get the PDBfile # pdbfile = getPDBFile(working_pdb_filename) pdblist, errlist = readPDB(pdbfile) if verbose: print "Beginning PDB2PKA...\n" # # Read the definition file # myDefinition = Definition() ligand_titratable_groups = None # # # Choose whether to include the ligand or not # # Add the ligand to the pdb2pqr arrays # Lig = None if ligand is None: myProtein = Protein(pdblist, myDefinition) else: from pdb2pka.ligandclean import ligff myProtein, myDefinition, Lig = ligff.initialize( myDefinition, ligand, pdblist, verbose) # # ======================================================================= # # We have identified the structural elements, now contiue with the setup # # Print something for some reason? # if verbose: print "Created protein object -" print "\tNumber of residues in protein: %s" % myProtein.numResidues() print "\tNumber of atoms in protein : %s" % myProtein.numAtoms() # # Set up all other routines # myRoutines = Routines(myProtein, verbose) #myDefinition) myRoutines.updateResidueTypes() myRoutines.updateSSbridges() myRoutines.updateBonds() myRoutines.setTermini() myRoutines.updateInternalBonds() myRoutines.applyNameScheme(Forcefield(ff, myDefinition, None)) myRoutines.findMissingHeavy() myRoutines.addHydrogens() myRoutines.debumpProtein() #myRoutines.randomizeWaters() myProtein.reSerialize() # # Inject the information on hydrogen conformations in the HYDROGENS.DAT arrays # We get this information from ligand_titratable_groups # from src.hydrogens import hydrogenRoutines myRoutines.updateInternalBonds() myRoutines.calculateDihedralAngles() myhydRoutines = hydrogenRoutines(myRoutines) # # Here we should inject the info!! # myhydRoutines.setOptimizeableHydrogens() myhydRoutines.initializeFullOptimization() myhydRoutines.optimizeHydrogens() myhydRoutines.cleanup() myRoutines.setStates() # # Choose the correct forcefield # myForcefield = Forcefield(ff, myDefinition, None) if Lig: hitlist, misslist = myRoutines.applyForcefield(myForcefield) # # Can we get charges for the ligand? # templist = [] ligsuccess = False for residue in myProtein.getResidues(): if isinstance(residue, LIG): templist = [] Lig.make_up2date(residue) net_charge = 0.0 print 'Ligand', residue print 'Atom\tCharge\tRadius' for atom in residue.getAtoms(): if atom.mol2charge: atom.ffcharge = atom.mol2charge else: atom.ffcharge = Lig.ligand_props[atom.name]["charge"] # # Find the net charge # net_charge = net_charge + atom.ffcharge # # Assign radius # atom.radius = Lig.ligand_props[atom.name]["radius"] print '%s\t%6.4f\t%6.4f' % (atom.name, atom.ffcharge, atom.radius) if atom in misslist: misslist.pop(misslist.index(atom)) templist.append(atom) # # Store the charge and radius in the atom instance for later use # This really should be done in a nicer way, but this will do for now # atom.secret_radius = atom.radius atom.secret_charge = atom.ffcharge # # charge = residue.getCharge() if abs(charge - round(charge)) > 0.01: # Ligand parameterization failed myProtein.residues.remove(residue) raise Exception('Non-integer charge on ligand: %8.5f' % charge) else: ligsuccess = 1 # Mark these atoms as hits hitlist = hitlist + templist # # Print the net charge # print 'Net charge for ligand %s is: %5.3f' % (residue.name, net_charge) # # Temporary fix; if ligand was successful, pull all ligands from misslist # Not sure if this is needed at all here ...? (Jens wrote this) # if ligsuccess: templist = misslist[:] for atom in templist: if isinstance(atom.residue, Amino) or isinstance( atom.residue, Nucleic): continue misslist.remove(atom) if verbose: print "Created protein object (after processing myRoutines) -" print "\tNumber of residues in protein: %s" % myProtein.numResidues() print "\tNumber of atoms in protein : %s" % myProtein.numAtoms() # # Create the APBS input file # import src.psize size = src.psize.Psize() method = "" split = 0 igen = inputgen_pKa.inputGen(working_pdb_filename) # # For convenience # igen.pdie = pdie print 'Setting protein dielectric constant to ', igen.pdie igen.sdie = sdie igen.maps = maps if maps == 1: print "Using dielectric and mobile ion-accessibility function maps in PBE" if xdiel: igen.xdiel = xdiel else: raise PDB2PKAError('X dielectric map is missing') if ydiel: igen.ydiel = ydiel else: raise PDB2PKAError("Y dielectric map is missing\n") if zdiel: igen.zdiel = zdiel else: raise PDB2PKAError("Z dielectric map is missing\n") print 'Setting dielectric function maps: %s, %s, %s' % ( igen.xdiel, igen.ydiel, igen.zdiel) if kappa: igen.kappa = kappa else: raise PDB2PKAError("Mobile ion-accessibility map is missing\n") print 'Setting mobile ion-accessibility function map to: ', igen.kappa if sd: xdiel_smooth, ydiel_smooth, zdiel_smooth = smooth( xdiel, ydiel, zdiel) igen.xdiel = xdiel_smooth igen.ydiel = ydiel_smooth igen.zdiel = zdiel_smooth # # Return all we need # return output_dir, myProtein, myRoutines, myForcefield, igen, ligand_titratable_groups, maps, sd
def runPDB2PQR(pdblist, ff, outname="", ph=None, verbose=False, selectedExtensions=[], extensionOptions=utilities.ExtraOptions(), ph_calc_method=None, ph_calc_options=None, clean=False, neutraln=False, neutralc=False, ligand=None, assign_only=False, chain=False, drop_water=False, debump=True, opt=True, typemap=False, userff=None, usernames=None, ffout=None, holdList=None, commandLine=None, include_old_header=False): """ Run the PDB2PQR Suite Arguments: pdblist: The list of objects that was read from the PDB file given as input (list) ff: The name of the forcefield (string) Keyword Arguments: outname: The name of the desired output file ph: The desired ph of the system (float) verbose: When True, script will print information to stdout When False, no detailed information will be printed (float) extensions: List of extensions to run extensionOptions:optionParser like option object that is passed to each object. ph_calc_method: pKa calculation method ("propka","propka31","pdb2pka") ph_calc_options: optionParser like option object for propka30. clean: only return original PDB file in aligned format. neutraln: Make the N-terminus of this protein neutral neutralc: Make the C-terminus of this protein neutral ligand: Calculate the parameters for the ligand in mol2 format at the given path. assign_only: Only assign charges and radii - do not add atoms, debump, or optimize. chain: Keep the chain ID in the output PQR file drop_water: Remove water molecules from output debump: When 1, debump heavy atoms (int) opt: When 1, run hydrogen optimization (int) typemap: Create Typemap output. userff: The user created forcefield file to use. Overrides ff. usernames: The user created names file to use. Required if using userff. ffout: Instead of using the standard canonical naming scheme for residue and atom names, + use the names from the given forcefield commandLine: command line used (if any) to launch the program. Included in output header. include_old_header: Include most of the PDB header in output. holdlist: A list of residues not to be optimized, as [(resid, chain, icode)] pdb2pka_params: parameters for running pdb2pka. Returns header: The PQR file header (string) lines: The PQR file atoms (list) missedligandresidues: A list of ligand residue names whose charges could not be assigned (ligand) protein: The protein object """ pkaname = "" lines = [] Lig = None atomcount = 0 # Count the number of ATOM records in pdb outroot = utilities.getPQRBaseFileName(outname) if ph_calc_method == 'propka': pkaname = outroot + ".propka" #TODO: What? Shouldn't it be up to propka on how to handle this? if os.path.isfile(pkaname): os.remove(pkaname) start = time.time() if verbose: print("Beginning PDB2PQR...\n") myDefinition = Definition() if verbose: print("Parsed Amino Acid definition file.") if drop_water: # Remove the waters pdblist_new = [] for record in pdblist: if isinstance(record, (HETATM, ATOM, SIGATM, SEQADV)): if record.resName in WAT.water_residue_names: continue pdblist_new.append(record) pdblist = pdblist_new # Check for the presence of a ligand! This code is taken from pdb2pka/pka.py if not ligand is None: from pdb2pka.ligandclean import ligff myProtein, myDefinition, Lig = ligff.initialize( myDefinition, ligand, pdblist, verbose) for atom in myProtein.getAtoms(): if atom.type == "ATOM": atomcount += 1 else: myProtein = Protein(pdblist, myDefinition) if verbose: print("Created protein object -") print("\tNumber of residues in protein: %s" % myProtein.numResidues()) print("\tNumber of atoms in protein : %s" % myProtein.numAtoms()) myRoutines = Routines(myProtein, verbose) for residue in myProtein.getResidues(): multoccupancy = 0 for atom in residue.getAtoms(): if atom.altLoc != "": multoccupancy = 1 txt = "Warning: multiple occupancies found: %s in %s\n" % ( atom.name, residue) # sys.stdout.write(txt) if multoccupancy == 1: myRoutines.warnings.append( "WARNING: multiple occupancies found in %s,\n" % (residue)) myRoutines.warnings.append( " at least one of the instances is being ignored.\n") myRoutines.setTermini(neutraln, neutralc) myRoutines.updateBonds() if clean: header = "" lines = myProtein.printAtoms(myProtein.getAtoms(), chain) # Process the extensions for ext in selectedExtensions: module = extensions.extDict[ext] #TODO: figure out a way to do this without crashing... #tempRoutines = copy.deepcopy(myRoutines) module.run_extension(myRoutines, outroot, extensionOptions) if verbose: print("Total time taken: %.2f seconds\n" % (time.time() - start)) #Be sure to include None for missed ligand residues return dict(header=header, lines=lines) #remove any future need to convert to lower case if not ff is None: ff = ff.lower() if not ffout is None: ffout = ffout.lower() if not assign_only: # It is OK to process ligands with no ATOM records in the pdb if atomcount == 0 and Lig != None: pass else: myRoutines.findMissingHeavy() myRoutines.updateSSbridges() if debump: myRoutines.debumpProtein() if ph_calc_method == 'propka': myRoutines.runPROPKA(ph, ff, outroot, pkaname, ph_calc_options, version=30) elif ph_calc_method == 'propka31': myRoutines.runPROPKA(ph, ff, outroot, pkaname, ph_calc_options, version=31) elif ph_calc_method == 'pdb2pka': myRoutines.runPDB2PKA(ph, ff, pdblist, ligand, verbose, ph_calc_options) myRoutines.addHydrogens() myhydRoutines = hydrogenRoutines(myRoutines) if debump: myRoutines.debumpProtein() if opt: myhydRoutines.setOptimizeableHydrogens() # TONI fixing residues - myhydRoutines has a reference to myProtein, so i'm altering it in place myRoutines.holdResidues(holdList) myhydRoutines.initializeFullOptimization() myhydRoutines.optimizeHydrogens() else: myhydRoutines.initializeWaterOptimization() myhydRoutines.optimizeHydrogens() # Special for GLH/ASH, since both conformations were added myhydRoutines.cleanup() else: # Special case for HIS if using assign-only for residue in myProtein.getResidues(): if isinstance(residue, HIS): myRoutines.applyPatch("HIP", residue) myRoutines.setStates() myForcefield = Forcefield(ff, myDefinition, userff, usernames) hitlist, misslist = myRoutines.applyForcefield(myForcefield) ligsuccess = 0 if not ligand is None: # If this is independent, we can assign charges and radii here for residue in myProtein.getResidues(): if isinstance(residue, LIG): templist = [] Lig.make_up2date(residue) for atom in residue.getAtoms(): atom.ffcharge = Lig.ligand_props[atom.name]["charge"] atom.radius = Lig.ligand_props[atom.name]["radius"] if atom in misslist: misslist.pop(misslist.index(atom)) templist.append(atom) charge = residue.getCharge() if abs(charge - int(charge)) > 0.001: # Ligand parameterization failed myRoutines.warnings.append( "WARNING: PDB2PQR could not successfully parameterize\n" ) myRoutines.warnings.append( " the desired ligand; it has been left out of\n" ) myRoutines.warnings.append(" the PQR file.\n") myRoutines.warnings.append("\n") # remove the ligand myProtein.residues.remove(residue) for myChain in myProtein.chains: if residue in myChain.residues: myChain.residues.remove(residue) else: ligsuccess = 1 # Mark these atoms as hits hitlist = hitlist + templist # Temporary fix; if ligand was successful, pull all ligands from misslist if ligsuccess: templist = misslist[:] for atom in templist: if isinstance(atom.residue, (Amino, Nucleic)): continue misslist.remove(atom) # Create the Typemap if typemap: typemapname = "%s-typemap.html" % outroot myProtein.createHTMLTypeMap(myDefinition, typemapname) # Grab the protein charge reslist, charge = myProtein.getCharge() # If we want a different naming scheme, use that if not ffout is None: scheme = ffout userff = None # Currently not supported if scheme != ff: myNameScheme = Forcefield(scheme, myDefinition, userff) else: myNameScheme = myForcefield myRoutines.applyNameScheme(myNameScheme) header = printPQRHeader(pdblist, misslist, reslist, charge, ff, myRoutines.getWarnings(), ph_calc_method, ph, ffout, commandLine, include_old_header=include_old_header) lines = myProtein.printAtoms(hitlist, chain) # Determine if any of the atoms in misslist were ligands missedligandresidues = [] for atom in misslist: if isinstance(atom.residue, (Amino, Nucleic)): continue if atom.resName not in missedligandresidues: missedligandresidues.append(atom.resName) # Process the extensions for ext in selectedExtensions: module = extensions.extDict[ext] #TODO: figure out a way to do this without crashing... #tempRoutines = copy.deepcopy(myRoutines) module.run_extension(myRoutines, outroot, extensionOptions) if verbose: print("Total time taken: %.2f seconds\n" % (time.time() - start)) return dict(header=header, lines=lines, missedligands=missedligandresidues, protein=myProtein, routines=myRoutines)
def runPDB2PQR(pdblist, ff, outname = "", ph = None, verbose = False, selectedExtensions = [], extensionOptions = utilities.ExtraOptions(), ph_calc_method = None, ph_calc_options = None, clean = False, neutraln = False, neutralc = False, ligand = None, assign_only = False, chain = False, drop_water = False, debump = True, opt = True, typemap = False, userff = None, usernames = None, ffout = None, holdList = None, commandLine = None, include_old_header = False): """ Run the PDB2PQR Suite Arguments: pdblist: The list of objects that was read from the PDB file given as input (list) ff: The name of the forcefield (string) Keyword Arguments: outname: The name of the desired output file ph: The desired ph of the system (float) verbose: When True, script will print information to stdout When False, no detailed information will be printed (float) extensions: List of extensions to run extensionOptions:optionParser like option object that is passed to each object. ph_calc_method: pKa calculation method ("propka","propka31","pdb2pka") ph_calc_options: optionParser like option object for propka30. clean: only return original PDB file in aligned format. neutraln: Make the N-terminus of this protein neutral neutralc: Make the C-terminus of this protein neutral ligand: Calculate the parameters for the ligand in mol2 format at the given path. assign_only: Only assign charges and radii - do not add atoms, debump, or optimize. chain: Keep the chain ID in the output PQR file drop_water: Remove water molecules from output debump: When 1, debump heavy atoms (int) opt: When 1, run hydrogen optimization (int) typemap: Create Typemap output. userff: The user created forcefield file to use. Overrides ff. usernames: The user created names file to use. Required if using userff. ffout: Instead of using the standard canonical naming scheme for residue and atom names, + use the names from the given forcefield commandLine: command line used (if any) to launch the program. Included in output header. include_old_header: Include most of the PDB header in output. holdlist: A list of residues not to be optimized, as [(resid, chain, icode)] pdb2pka_params: parameters for running pdb2pka. Returns header: The PQR file header (string) lines: The PQR file atoms (list) missedligandresidues: A list of ligand residue names whose charges could not be assigned (ligand) protein: The protein object """ pkaname = "" lines = [] Lig = None atomcount = 0 # Count the number of ATOM records in pdb outroot = utilities.getPQRBaseFileName(outname) if ph_calc_method == 'propka': pkaname = outroot + ".propka" #TODO: What? Shouldn't it be up to propka on how to handle this? if os.path.isfile(pkaname): os.remove(pkaname) start = time.time() if verbose: print("Beginning PDB2PQR...\n") myDefinition = Definition() if verbose: print("Parsed Amino Acid definition file.") if drop_water: # Remove the waters pdblist_new = [] for record in pdblist: if isinstance(record, (HETATM, ATOM, SIGATM, SEQADV)): if record.resName in WAT.water_residue_names: continue pdblist_new.append(record) pdblist = pdblist_new # Check for the presence of a ligand! This code is taken from pdb2pka/pka.py if not ligand is None: from pdb2pka.ligandclean import ligff myProtein, myDefinition, Lig = ligff.initialize(myDefinition, ligand, pdblist, verbose) for atom in myProtein.getAtoms(): if atom.type == "ATOM": atomcount += 1 else: myProtein = Protein(pdblist, myDefinition) if verbose: print("Created protein object -") print("\tNumber of residues in protein: %s" % myProtein.numResidues()) print("\tNumber of atoms in protein : %s" % myProtein.numAtoms()) myRoutines = Routines(myProtein, verbose) for residue in myProtein.getResidues(): multoccupancy = 0 for atom in residue.getAtoms(): if atom.altLoc != "": multoccupancy = 1 txt = "Warning: multiple occupancies found: %s in %s\n" % (atom.name, residue) sys.stderr.write(txt) if multoccupancy == 1: myRoutines.warnings.append("WARNING: multiple occupancies found in %s,\n" % (residue)) myRoutines.warnings.append(" at least one of the instances is being ignored.\n") myRoutines.setTermini(neutraln, neutralc) myRoutines.updateBonds() if clean: header = "" lines = myProtein.printAtoms(myProtein.getAtoms(), chain) # Process the extensions for ext in selectedExtensions: module = extensions.extDict[ext] #TODO: figure out a way to do this without crashing... #tempRoutines = copy.deepcopy(myRoutines) module.run_extension(myRoutines, outroot, extensionOptions) if verbose: print("Total time taken: %.2f seconds\n" % (time.time() - start)) #Be sure to include None for missed ligand residues return header, lines, None #remove any future need to convert to lower case if not ff is None: ff = ff.lower() if not ffout is None: ffout = ffout.lower() if not assign_only: # It is OK to process ligands with no ATOM records in the pdb if atomcount == 0 and Lig != None: pass else: myRoutines.findMissingHeavy() myRoutines.updateSSbridges() if debump: myRoutines.debumpProtein() if ph_calc_method == 'propka': myRoutines.runPROPKA(ph, ff, outroot, pkaname, ph_calc_options, version=30) elif ph_calc_method == 'propka31': myRoutines.runPROPKA(ph, ff, outroot, pkaname, ph_calc_options, version=31) elif ph_calc_method == 'pdb2pka': myRoutines.runPDB2PKA(ph, ff, pdblist, ligand, verbose, ph_calc_options) myRoutines.addHydrogens() myhydRoutines = hydrogenRoutines(myRoutines) if debump: myRoutines.debumpProtein() if opt: myhydRoutines.setOptimizeableHydrogens() # TONI fixing residues - myhydRoutines has a reference to myProtein, so i'm altering it in place myRoutines.holdResidues(holdList) myhydRoutines.initializeFullOptimization() myhydRoutines.optimizeHydrogens() else: myhydRoutines.initializeWaterOptimization() myhydRoutines.optimizeHydrogens() # Special for GLH/ASH, since both conformations were added myhydRoutines.cleanup() else: # Special case for HIS if using assign-only for residue in myProtein.getResidues(): if isinstance(residue, HIS): myRoutines.applyPatch("HIP", residue) myRoutines.setStates() myForcefield = Forcefield(ff, myDefinition, userff, usernames) hitlist, misslist = myRoutines.applyForcefield(myForcefield) ligsuccess = 0 if not ligand is None: # If this is independent, we can assign charges and radii here for residue in myProtein.getResidues(): if isinstance(residue, LIG): templist = [] Lig.make_up2date(residue) for atom in residue.getAtoms(): atom.ffcharge = Lig.ligand_props[atom.name]["charge"] atom.radius = Lig.ligand_props[atom.name]["radius"] if atom in misslist: misslist.pop(misslist.index(atom)) templist.append(atom) charge = residue.getCharge() if abs(charge - int(charge)) > 0.001: # Ligand parameterization failed myRoutines.warnings.append("WARNING: PDB2PQR could not successfully parameterize\n") myRoutines.warnings.append(" the desired ligand; it has been left out of\n") myRoutines.warnings.append(" the PQR file.\n") myRoutines.warnings.append("\n") # remove the ligand myProtein.residues.remove(residue) for myChain in myProtein.chains: if residue in myChain.residues: myChain.residues.remove(residue) else: ligsuccess = 1 # Mark these atoms as hits hitlist = hitlist + templist # Temporary fix; if ligand was successful, pull all ligands from misslist if ligsuccess: templist = misslist[:] for atom in templist: if isinstance(atom.residue, (Amino, Nucleic)): continue misslist.remove(atom) # Create the Typemap if typemap: typemapname = "%s-typemap.html" % outroot myProtein.createHTMLTypeMap(myDefinition, typemapname) # Grab the protein charge reslist, charge = myProtein.getCharge() # If we want a different naming scheme, use that if not ffout is None: scheme = ffout userff = None # Currently not supported if scheme != ff: myNameScheme = Forcefield(scheme, myDefinition, userff) else: myNameScheme = myForcefield myRoutines.applyNameScheme(myNameScheme) header = printPQRHeader(pdblist, misslist, reslist, charge, ff, myRoutines.getWarnings(), ph_calc_method, ph, ffout, commandLine, include_old_header=include_old_header) lines = myProtein.printAtoms(hitlist, chain) # Determine if any of the atoms in misslist were ligands missedligandresidues = [] for atom in misslist: if isinstance(atom.residue, (Amino, Nucleic)): continue if atom.resName not in missedligandresidues: missedligandresidues.append(atom.resName) # Process the extensions for ext in selectedExtensions: module = extensions.extDict[ext] #TODO: figure out a way to do this without crashing... #tempRoutines = copy.deepcopy(myRoutines) module.run_extension(myRoutines, outroot, extensionOptions) if verbose: print("Total time taken: %.2f seconds\n" % (time.time() - start)) return header, lines, missedligandresidues, myProtein