def __init__( self, com, debug=0, **params ): self.com = com self.rec_psf = com.rec().getPsfFile() self.lig_psf = com.lig().getPsfFile() recCode = com.rec().getPdbCode() ligCode = com.lig().getPdbCode() self.rec_in = tempfile.mktemp( recCode + ".pdb" ) self.lig_in = tempfile.mktemp( ligCode + ".pdb" ) self.lig_out = tempfile.mktemp( "lig_out.pdb" ) self.rec_out = tempfile.mktemp( "rec_out.pdb" ) self.inp_template = t.dataRoot() +\ '/xplor/rb_minimize_complex.inp' self.param19 = t.dataRoot() + \ '/xplor/toppar/param19.pro' self.result = None Xplorer.__init__( self, self.inp_template, debug=debug, **params )
def test_amberPrepParser( self ): """AmberPrepParser test""" files = glob.glob( T.dataRoot()+'/amber/residues/*in') files = [ osp.basename( f ) for f in files ] results = {} if self.local: print() for f in files: if self.local: print('working on ', f) self.p = AmberPrepParser( f ) self.r = [ r for r in self.p.residueTypes() ] self.assertTrue( len(self.r) > 10 ) if self.local: print('\tparsed %i residue types from %s' % (len(self.r), f)) results[ f ] = self.r self.assertEqual( len(results['all_amino03.in']), 33 ) dic = self.p.residueDict() self.assertTrue( isinstance( list(dic.values())[0], AmberResidueType) ) self.results = results if self.local: for res in results['all_nuc02.in']: print(res)
def __prepareFolder( self ): """ The surfrace binary, radii files and the PDB have to be in the current working directory. Otherwise we get a pretty silent segmentation fault. @return temp folder @rtype str """ try: folder = T.tempDir() binlnk = os.path.join( folder, self.exe.name ) if not os.path.exists( binlnk ): os.symlink( self.exe.bin, binlnk ) radii_txt = T.dataRoot() + '/surface_racer_3/radii.txt' target = os.path.join(folder, 'radii.txt') if not os.path.exists( target ): os.symlink( radii_txt, target ) self.exe.bin = binlnk return folder + '/' except OSError as error: raise SurfaceRacer_Error('Error preparing temporary folder for SurfaceRacer\n'+\ 'Error: %r\n' % error +\ 'folder: %r\n' % folder +\ 'binary: %r\n' % self.exe.bin +\ 'binary link: %r' % binlnk)
def __prepareFolder(self): """ The surfrace binary, radii files and the PDB have to be in the current working directory. Otherwise we get a pretty silent segmentation fault. @return temp folder @rtype str """ try: folder = T.tempDir() binlnk = os.path.join(folder, self.exe.name) if not os.path.exists(binlnk): os.symlink(self.exe.bin, binlnk) radii_txt = T.dataRoot() + '/surface_racer_3/radii.txt' target = os.path.join(folder, 'radii.txt') if not os.path.exists(target): os.symlink(radii_txt, target) self.exe.bin = binlnk return folder + '/' except OSError as error: raise SurfaceRacer_Error('Error preparing temporary folder for SurfaceRacer\n'+\ 'Error: %r\n' % error +\ 'folder: %r\n' % folder +\ 'binary: %r\n' % self.exe.bin +\ 'binary link: %r' % binlnk)
def test_Xplorer(self): """Xplorer test""" ## input template variables param = T.dataRoot() + '/xplor/toppar/param19.pro' pdb_out = self.dir_out + '/lig.pdb' log_out = self.dir_out + '/test.out' pdb_in = T.testRoot() + '/lig/1A19.pdb' psf_in = T.testRoot() + '/lig/1A19.psf' self.x = Xplorer(self.dir_out + '/test.inp', xout=log_out, verbose=self.local, lig_psf=psf_in, lig_pdb=pdb_in, param19=param, lig_out=pdb_out) self.x.run() if self.local: print(""" The minimized structure and the X-Plor log file have been written to %s and %s, respectively """ % (pdb_out, log_out)) print("""See x.logLines for the complete xplor log file!""")
def test_Xplorer( self ): """Xplorer test""" ## input template variables param = T.dataRoot() + '/xplor/toppar/param19.pro' pdb_out = self.dir_out +'/lig.pdb' log_out = self.dir_out +'/test.out' pdb_in = T.testRoot() + '/lig/1A19.pdb' psf_in = T.testRoot() + '/lig/1A19.psf' self.x = Xplorer( self.dir_out +'/test.inp', xout = log_out, verbose = self.local, lig_psf = psf_in, lig_pdb = pdb_in, param19 = param, lig_out = pdb_out ) self.x.run() if self.local: print(""" The minimized structure and the X-Plor log file have been written to %s and %s, respectively """ % (pdb_out, log_out)) print("""See x.logLines for the complete xplor log file!""")
def test_amberPrepParser(self): """AmberPrepParser test""" files = glob.glob(T.dataRoot() + '/amber/residues/*in') files = [osp.basename(f) for f in files] results = {} if self.local: print() for f in files: if self.local: print('working on ', f) self.p = AmberPrepParser(f) self.r = [r for r in self.p.residueTypes()] self.assertTrue(len(self.r) > 10) if self.local: print('\tparsed %i residue types from %s' % (len(self.r), f)) results[f] = self.r self.assertEqual(len(results['all_amino03.in']), 33) dic = self.p.residueDict() self.assertTrue(isinstance(list(dic.values())[0], AmberResidueType)) self.results = results if self.local: for res in results['all_nuc02.in']: print(res)
def __init__(self, model, tempdir=None, args='', autocap=False, capN=[], capC=[], **kw): """ @param model: input structure @type model: PDBModel @param tempdir: create dedicated temporary folder (default: None) see Executor @param tempdir: str | 0|1 @param args: additional command line arguments for reduce (default:'') example: '-OLDpdb' @type args: str @param autocap: add capping NME and ACE residues to any (auto-detected) false N- or C-terminal (default: False) @type autocap: bool @param capN: cap N-terminal of these chains (indices) with ACE ([]) @type capN: [ int ] @param capC: cap C-terminal of these chains (indices) with NME ([]) @type capN: [ int ] @param kw: additional key=value parameters for Executor: @type kw: key=value pairs :: debug - 0|1, keep all temporary files (default: 0) verbose - 0|1, print progress messages to log (log != STDOUT) node - str, host for calculation (None->local) NOT TESTED (default: None) nice - int, nice level (default: 0) log - Biskit.LogFile, program log (None->STOUT) (default: None) """ tempdir = self.newtempfolder(tempdir) self.f_pdbin = tempfile.mktemp('_in.pdb', 'reduce_', dir=tempdir) f_out = tempfile.mktemp('_out.pdb', 'reduce_', dir=tempdir) self.f_db = T.dataRoot() + '/reduce/reduce_wwPDB_het_dict.txt' self.autocap = autocap self.capN = capN self.capC = capC lenchains = model.lenChains() Executor.__init__( self, 'reduce', args= '%s -BUILD -Nterm%i -DB %s %s' %\ (args, lenchains, self.f_db, self.f_pdbin), f_out=f_out, catch_err=True, tempdir=tempdir, **kw ) self.model = model
def test_SettingsParser(self): """SettingsManager test""" p = SettingsParser(T.dataRoot() + '/defaults/settings.cfg') p.parse() t = p.result.get('testparam', Setting()) self.assertEqual((t.name, t.value), ('testparam', 42)) return t.name, t.value
def test_SettingsParser(self): """SettingsManager test""" p = SettingsParser( T.dataRoot() + '/defaults/settings.cfg') p.parse() t = p.result.get('testparam', Setting()) self.assertEqual( (t.name, t.value), ('testparam', 42) ) return t.name, t.value
def __init__(self, f_in=None ): """ :param f_in: amber "off" or "prep" file with residue definitions if not existing, we will look for a file with this name in the Biskit data folder (data/amber/residues) (default: 'all_amino03.in') :type f_in: str """ f_in = f_in or self.F_DEFAULT if not osp.exists( T.absfile( f_in ) ): f_in = T.dataRoot() + '/amber/residues/' + f_in self.firstrecord = re.compile( 'db[0-9]+\.dat' ) self.s = open( T.absfile( f_in ), 'r' ).read() self.s = self.firstrecord.split( self.s )[-1] #skip until first residue
def __init__(self, f_in=None): """ :param f_in: amber "off" or "prep" file with residue definitions if not existing, we will look for a file with this name in the Biskit data folder (data/amber/residues) (default: 'all_amino03.in') :type f_in: str """ f_in = f_in or self.F_DEFAULT if not osp.exists(T.absfile(f_in)): f_in = T.dataRoot() + '/amber/residues/' + f_in self.firstrecord = re.compile('db[0-9]+\.dat') self.s = open(T.absfile(f_in), 'r').read() self.s = self.firstrecord.split(self.s)[-1] #skip until first residue
def ramachandran_background(self): """ Creates a background (favoured regions) for a ramachandran plot. @return: list of biggles.Point objects @rtype: [ biggles.Point ] """ bg = [] mat = N0.loadtxt(T.dataRoot() + '/biggles/ramachandran_bg.dat') x, y = N0.shape(mat) for i in range(x): for j in range(y): if mat[i, j] < 200: a = (360. / y) * j - 180 b = (360. / x) * (x - i) - 180 bg += [biggles.Point(a, b, type="dot")] return bg
def ramachandran_background( self ): """ Creates a background (favoured regions) for a ramachandran plot. @return: list of biggles.Point objects @rtype: [ biggles.Point ] """ bg = [] mat = N0.loadtxt( T.dataRoot() + '/biggles/ramachandran_bg.dat') x, y = N0.shape(mat) for i in range(x): for j in range(y): if mat[i,j] < 200: a = (360./y)*j - 180 b = (360./x)*(x-i)- 180 bg += [ biggles.Point( a, b, type="dot" )] return bg
class PDBCleaner: """ PDBCleaner performs the following tasks: * remove HETAtoms from PDB * replace non-standard AA by its closest standard AA * remove non-standard atoms from standard AA residues * delete atoms that follow missing atoms (in a chain) * remove multiple occupancy atoms (except the one with highest occupancy) * add ACE and NME capping residues to C- and N-terminals or chain breaks (see capTerminals(), this is NOT done automatically in process()) Usage: ======= >>> c = PDBCleaner( model ) >>> c.process() >>> c.capTerminals( auto=True ) This will modify the model in-place and report changes to STDOUT. Alternatively, you can specify a log file instance for the output. PDBCleaner.process accepts several options to modify the processing. Capping ======= Capping will add N-methyl groups to free C-terminal carboxy ends or Acetyl groups to free N-terminal Amines and will thus 'simulate' the continuation of the protein chain -- a common practice in order to prevent fake terminal charges. The automatic discovery of missing residues is guess work at best. The more conservative approach is to use, for example: >>> c.capTerminals( breaks=1, capC=[0], capN=[2] ) In this case, only the chain break detection is used for automatic capping -- the last residue before a chain break is capped with NME and the first residue after the chain break is capped with ACE. Chain break detection relies on PDBModel.chainBreaks() (via PDBModel.chainIndex( breaks=1 )). The normal terminals to be capped are now specified explicitely. The first chain (not counting chain breaks) will receive a NME C-terminal cap and the third chain of the PDB will receive a N-terminal ACE cap. Note: Dictionaries with standard residues and atom content are defined in Biskit.molUtils. This is a duplicate effort with the new strategy to parse Amber prep files for very similar information (AmberResidueType, AmberResidueLibrary) and should change once we implement a real framework for better residue handling. """ #: these atoms always occur at the tip of of a chain or within a ring #: and, if missing, will not trigger the removal of other atoms TOLERATE_MISSING = [ 'O', 'CG2', 'CD1', 'CD2', 'OG1', 'OE1', 'NH1', 'OD1', 'OE1', 'H5T', "O5'", ] ## PDB with ACE capping residue F_ace_cap = t.dataRoot() + '/amber/leap/ace_cap.pdb' ## PDB with NME capping residue F_nme_cap = t.dataRoot() + '/amber/leap/nme_cap.pdb' def __init__(self, fpdb, log=None, verbose=True): """ :param fpdb: pdb file OR PDBModel instance :type fpdb: str OR Biskit.PDBModel :param log: biskit.LogFile object (default: STDOUT) :type log: biskit.LogFile :param verbose: log warnings and infos (default: True) :type verbose: bool """ self.model = PDBModel(fpdb) self.log = log or StdLog() self.verbose = verbose def logWrite(self, msg, force=1): if self.log: self.log.add(msg) else: if force: print(msg) def remove_multi_occupancies(self): """ Keep only atoms with alternate A field (well, or no alternate). """ if self.verbose: self.logWrite(self.model.pdbCode + ': Removing multiple occupancies of atoms ...') i = 0 to_be_removed = [] for a in self.model: if a['alternate']: try: str_id = "%i %s %s %i" % (a['serial_number'], a['name'], a['residue_name'], a['residue_number']) if a['alternate'].upper() in ['A', '1']: a['alternate'] = '' else: if float(a['occupancy']) < 1.0: to_be_removed += [i] if self.verbose: self.logWrite( 'removing %s (%s %s)' % (str_id, a['alternate'], a['occupancy'])) else: if self.verbose: self.logWrite(( 'keeping non-A duplicate %s because of 1.0 ' + 'occupancy') % str_id) except: self.logWrite("Error removing duplicate: " + t.lastError()) i += 1 try: self.model.remove(to_be_removed) if self.verbose: self.logWrite('Removed %i atoms' % len(to_be_removed)) except: if self.verbose: self.logWrite('No atoms with multiple occupancies to remove') def replace_non_standard_AA(self, amber=0, keep=[]): """ Replace amino acids with none standard names with standard amino acids according to :class:`MU.nonStandardAA` :param amber: don't rename HID, HIE, HIP, CYX, NME, ACE [0] :type amber: 1||0 :param keep: names of additional residues to keep :type keep: [ str ] """ standard = list(MU.atomDic.keys()) + keep if amber: standard.extend(['HID', 'HIE', 'HIP', 'CYX', 'NME', 'ACE']) replaced = 0 if self.verbose: self.logWrite(self.model.pdbCode + ': Looking for non-standard residue names...') resnames = self.model['residue_name'] for i in self.model.atomRange(): resname = resnames[i].upper() if resname not in standard: if resname in MU.nonStandardAA: resnames[i] = MU.nonStandardAA[resname] if self.verbose: self.logWrite('renamed %s %i to %s' % \ (resname, i, MU.nonStandardAA[ resname ])) else: resnames[i] = 'ALA' self.logWrite('Warning: unknown residue name %s %i: ' \ % (resname, i ) ) if self.verbose: self.logWrite('\t->renamed to ALA.') replaced += 1 if self.verbose: self.logWrite('Found %i atoms with non-standard residue names.'% \ replaced ) def __standard_res(self, resname, amber=0): """ Check if resname is a standard residue (according to :class:`MU.atomDic`) if not return the closest standard residue (according to :class:`MU.nonStandardAA`). :param resname: 3-letter residue name :type resname: str :return: name of closest standard residue or resname itself :rtype: str """ if resname in MU.atomDic: return resname if resname in MU.nonStandardAA: return MU.nonStandardAA[resname] return resname def remove_non_standard_atoms(self): """ First missing standard atom triggers removal of standard atoms that follow in the standard order. All non-standard atoms are removed too. Data about standard atoms are taken from :class:`MU.atomDic` and symomym atom name is defined in :class:`MU.atomSynonyms`. :return: number of atoms removed :rtype: int """ mask = [] if self.verbose: self.logWrite("Checking content of standard amino-acids...") for res in self.model.resList(): resname = self.__standard_res(res[0]['residue_name']).upper() if resname == 'DC5': pass ## bugfix: ignore non-standard residues that have no matching ## standard residue if resname in MU.atomDic: standard = copy.copy(MU.atomDic[resname]) ## replace known synonyms by standard atom name for a in res: n = a['name'] if not n in standard and MU.atomSynonyms.get( n, 0) in standard: a['name'] = MU.atomSynonyms[n] if self.verbose: self.logWrite('%s: renaming %s to %s in %s %i' %\ ( self.model.pdbCode, n, a['name'], a['residue_name'], a['residue_number'])) anames = [a['name'] for a in res] keep = 1 ## kick out all standard atoms that follow a missing one rm = [] for n in standard: if (not n in anames) and not (n in self.TOLERATE_MISSING): keep = 0 if not keep: rm += [n] for n in rm: standard.remove(n) ## keep only atoms that are standard (and not kicked out above) for a in res: if a['name'] not in standard: mask += [1] if self.verbose: self.logWrite('%s: removing atom %s in %s %i '%\ ( self.model.pdbCode, a['name'], a['residue_name'], a['residue_number'])) else: mask += [0] self.model.remove(mask) if self.verbose: self.logWrite('Removed ' + str(N0.sum(mask)) + ' atoms because they were non-standard' + ' or followed a missing atom.') return N0.sum(mask) def capACE(self, model, chain, breaks=True, checkgap=True): """ Cap N-terminal of given chain. Note: In order to allow the capping of chain breaks, the chain index is, by default, based on model.chainIndex(breaks=True), that means with chain break detection activated! This is not the default behaviour of PDBModel.chainIndex or takeChains or chainLength. Please use the wrapping method capTerminals() for more convenient handling of the index. :param model: model :type model: PDBMode :param chain: index of chain to be capped :type chain: int :param breaks: consider chain breaks when identifying chain boundaries :type breaks: bool :return: model with added NME capping :rtype : PDBModel """ if self.verbose: self.logWrite('Capping N-terminal of chain %i with ACE' % chain) c_start = model.chainIndex(breaks=breaks) c_end = model.chainEndIndex(breaks=breaks) Nterm_is_break = False Cterm_is_break = False if breaks: Nterm_is_break = c_start[chain] not in model.chainIndex() Cterm_is_break = c_end[chain] not in model.chainEndIndex() m_ace = PDBModel(self.F_ace_cap) chains_before = model.takeChains(range(chain), breaks=breaks) m_chain = model.takeChains([chain], breaks=breaks) chains_after = model.takeChains(range(chain + 1, len(c_start)), breaks=breaks) m_term = m_chain.resModels()[0] ## we need 3 atoms for superposition, CB might mess things up but ## could help if there is no HN ## if 'HN' in m_term.atomNames(): m_ace.remove(['CB']) ## use backbone 'C' rather than CB for fitting ## rename overhanging residue in cap PDB for a in m_ace: if a['residue_name'] != 'ACE': a['residue_name'] = m_term.atoms['residue_name'][0] else: a['residue_number'] = m_term.atoms['residue_number'][0] - 1 a['chain_id'] = m_term.atoms['chain_id'][0] a['segment_id'] = m_term.atoms['segment_id'][0] ## fit cap onto first residue of chain m_ace = m_ace.magicFit(m_term) cap = m_ace.resModels()[0] serial = m_term['serial_number'][0] - len(cap) cap['serial_number'] = list(range(serial, serial + len(cap))) ## concat cap on chain m_chain = cap.concat(m_chain, newChain=False) ## re-assemble whole model r = chains_before.concat(m_chain, newChain=not Nterm_is_break) r = r.concat(chains_after, newChain=not Cterm_is_break) if checkgap and len(c_start) != r.lenChains(breaks=breaks): raise CappingError('Capping ACE would mask a chain break. '+\ 'This typically indicates a tight gap with high risk of '+\ 'clashes and other issues.') return r def capNME(self, model, chain, breaks=True, checkgap=True): """ Cap C-terminal of given chain. Note: In order to allow the capping of chain breaks, the chain index is, by default, based on model.chainIndex(breaks=True), that means with chain break detection activated! This is not the default behaviour of PDBModel.chainIndex or takeChains or chainLength. Please use the wrapping method capTerminals() for more convenient handling of the index. :param model: model :type model: PDBMode :param chain: index of chain to be capped :type chain: int :param breaks: consider chain breaks when identifying chain boundaries :type breaks: bool :return: model with added NME capping residue :rtype : PDBModel """ if self.verbose: self.logWrite('Capping C-terminal of chain %i with NME.' % chain) m_nme = PDBModel(self.F_nme_cap) c_start = model.chainIndex(breaks=breaks) c_end = model.chainEndIndex(breaks=breaks) Nterm_is_break = False Cterm_is_break = False if breaks: Nterm_is_break = c_start[chain] not in model.chainIndex() Cterm_is_break = c_end[chain] not in model.chainEndIndex() chains_before = model.takeChains(range(chain), breaks=breaks) m_chain = model.takeChains([chain], breaks=breaks) chains_after = model.takeChains(range(chain + 1, len(c_start)), breaks=breaks) m_term = m_chain.resModels()[-1] ## rename overhanging residue in cap PDB, renumber cap residue for a in m_nme: if a['residue_name'] != 'NME': a['residue_name'] = m_term.atoms['residue_name'][0] else: a['residue_number'] = m_term.atoms['residue_number'][0] + 1 a['chain_id'] = m_term.atoms['chain_id'][0] a['segment_id'] = m_term.atoms['segment_id'][0] ## chain should not have any terminal O after capping m_chain.remove(['OXT']) ## fit cap onto last residue of chain m_nme = m_nme.magicFit(m_term) cap = m_nme.resModels()[-1] serial = m_term['serial_number'][-1] + 1 cap['serial_number'] = list(range(serial, serial + len(cap))) ## concat cap on chain m_chain = m_chain.concat(cap, newChain=False) ## should be obsolete now if getattr(m_chain, '_PDBModel__terAtoms', []) != []: m_chain._PDBModel__terAtoms = [len(m_chain) - 1] assert m_chain.lenChains() == 1 ## re-assemble whole model r = chains_before.concat(m_chain, newChain=not Nterm_is_break) r = r.concat(chains_after, newChain=not Cterm_is_break) if checkgap and len(c_start) != r.lenChains(breaks=breaks): raise CappingError('Capping NME would mask a chain break. '+\ 'This typically indicates a tight gap with high risk of '+\ 'clashes and other issues.') return r def convertChainIdsNter(self, model, chains): """ Convert normal chain ids to chain ids considering chain breaks. """ if len(chains) == 0: return chains i = N0.take(model.chainIndex(), chains) ## convert back to chain indices but this time including chain breaks return model.atom2chainIndices(i, breaks=1) def convertChainIdsCter(self, model, chains): """ Convert normal chain ids to chain ids considering chain breaks. """ if len(chains) == 0: return chains ## fetch last atom of given chains index = N0.concatenate((model.chainIndex(), [len(model)])) i = N0.take(index, N0.array(chains) + 1) - 1 ## convert back to chain indices but this time including chain breaks return model.atom2chainIndices(i, breaks=1) def unresolvedTerminals(self, model): """ Autodetect (aka "guess") which N- and C-terminals are most likely not the real end of each chain. This guess work is based on residue numbering: * unresolved N-terminal: a protein residue with a residue number > 1 * unresolved C-terminal: a protein residue that does not contain either OXT or OT or OT1 or OT2 atoms :param model: PDBModel :return: chains with unresolved N-term, with unresolved C-term :rtype : ([int], [int]) """ c_first = model.chainIndex() c_last = model.chainEndIndex() capN = [ i for (i,pos) in enumerate(c_first)\ if model['residue_number'][pos] > 1 ] capN = [i for i in capN if model['residue_name'][c_first[i]] != 'ACE'] capN = self.filterProteinChains(model, capN, c_first) capC = [] for (i, pos) in enumerate(c_last): atoms = model.takeResidues(model.atom2resIndices([pos ])).atomNames() if not( 'OXT' in atoms or 'OT' in atoms or 'OT1' in atoms or \ 'OT2' in atoms ): capC += [i] capC = self.filterProteinChains(model, capC, c_last) return capN, capC #@todo filter for protein positions in breaks=1 def filterProteinChains(self, model, chains, chainindex): maskProtein = model.maskProtein() chains = [i for i in chains if maskProtein[chainindex[i]]] return chains def capTerminals(self, auto=False, breaks=False, capN=[], capC=[], checkgap=True): """ Add NME and ACE capping residues to chain breaks or normal N- and C-terminals. Note: these capping residues contain hydrogen atoms. Chain indices for capN and capC arguments can be interpreted either with or without chain break detection enabled. For example, let's assume we have a two-chain protein with some missing residues (chain break) in the first chain: A: MGSKVSK---FLNAGSK B: FGHLAKSDAK Then: capTerminals( breaks=False, capN=[1], capC=[1]) will add N-and C-terminal caps to chain B. However: capTerminals( breaks=True, capN=[1], capC=[1]) will add N- and C-terminal caps to the second fragment of chain A. Note: this operation *replaces* the internal model. :param auto: put ACE and NME capping residue on chain breaks and on suspected false N- and C-termini (default: False) :type auto: bool :param breaks: switch on chain break detection before interpreting capN and capC :type breaks: False :param capN: indices of chains that should get ACE cap (default: []) :type capN: [int] :param capC: indices of chains that should get NME cap (default: []) :type capC: [int] """ m = self.model c_len = m.lenChains() i_breaks = m.chainBreaks() if auto: if not breaks: capN = self.convertChainIdsNter(m, capN) capC = self.convertChainIdsCter(m, capC) breaks = True capN, capC = self.unresolvedTerminals(m) end_broken = m.atom2chainIndices(m.chainBreaks(), breaks=1) capC = M.union(capC, end_broken) capN = M.union(capN, N0.array(end_broken) + 1) capN = self.filterProteinChains(m, capN, m.chainIndex(breaks=breaks)) capC = self.filterProteinChains(m, capC, m.chainEndIndex(breaks=breaks)) for i in capN: m = self.capACE(m, i, breaks=breaks, checkgap=checkgap) assert m.lenChains() == c_len, '%i != %i' % \ (m.lenChains(), c_len) assert len(m.chainBreaks(force=True)) == len(i_breaks) assert m[ 'serial_number'].dtype == N0.Int32, 'serial_number not int' for i in capC: m = self.capNME(m, i, breaks=breaks, checkgap=checkgap) assert m.lenChains() == c_len assert len(m.chainBreaks(force=True)) == len(i_breaks) self.model = m return self.model def process(self, keep_hetatoms=0, amber=0, keep_xaa=[]): """ Remove Hetatoms, waters. Replace non-standard names. Remove non-standard atoms. :param keep_hetatoms: option :type keep_hetatoms: 0||1 :param amber: don't rename amber residue names (HIE, HID, CYX,..) :type amber: 0||1 :param keep_xaa: names of non-standard residues to be kept :type keep_xaa: [ str ] :return: PDBModel (reference to internal) :rtype: PDBModel :raise CleanerError: if something doesn't go as expected ... """ try: if not keep_hetatoms: self.model.remove(self.model.maskHetatm()) self.model.remove(self.model.maskH2O()) self.model.remove(self.model.maskH()) self.remove_multi_occupancies() self.replace_non_standard_AA(amber=amber, keep=keep_xaa) self.remove_non_standard_atoms() except KeyboardInterrupt as why: raise KeyboardInterrupt(why) except Exception as why: self.logWrite('Error: ' + t.lastErrorTrace()) raise CleanerError('Error cleaning model: %r' % why) return self.model
class AmberParmBuilder: """ AmberParmBuilder ================ Create Amber topology and coordinate file from PDB. - parmMirror(): ...builds a fake parm that exactly mirrors a given PDB file. This parm can be used for ptraj but not for simulations. Currently, parmMirror only accepts amber-formatted PDBs as input. It should be possible to create topologies that have the same content and order of atoms as an xplor PDB but some atoms will have different names. - parmSolvated(): ...builds a solvated system for PME simulations (incl. closing of S-S bonds, capping of chain breaks). parmSolvated accepts both xplor and amber-formatted PDBs as input. Requires the amber programs C{tleap} and C{ambpdb}. Requires leap template files in C{biskit/Biskit/data/amber/leap/}. Note on forcefields: The default forcefield used is specified in exe_tleap and currently is ff10. This translates to loading amber11/dat/leap/cmd/leaprc.ff10 at the beginning of the leap run. As of 2011, ff10 is the recommended default forcefield for proteins and nucleic acids. Comment from Jason Swails on the Amber mailing list: " Try using ff99SB (which is the protein force field part of ff10, which is the version I would actually suggest using). Despite its label, it is actually a 2006 update of the ff99 force field which performs at least as well (if not better) as ff03." Unfortunately, ions are only "half" paramterized in ff10. Additional parameters need to be loaded from a frmod file, typically frcmod.ionsjc_tip3p. There are additional versions of this file optimized for other water models than TIP3. frcmod.ionsjc_tip3p is set as the default frmod file to include by parmSolvated and parmMirror. Please include it if you provide your own list of frmod files. .. note:: The design of AmberParmBuilder is less than elegant. It would make more sense to split it into two classes that are both derrived from Executor. """ ## script to create a parm that exactly mirrors a given PDB script_mirror_pdb = """ logFile %(f_out)s source %(leaprc)s %(fmod)s %(fprep)s p = loadPdb %(in_pdb)s %(delete_atoms)s saveAmberParm p %(out_parm)s %(out_crd)s quit """ ## tleap command to close a single S-S bond ss_bond = "bond p.%i.SG p.%i.SG\n" ## leap script for solvated topology F_leap_in = t.dataRoot() + '/amber/leap/solvate_box.leap' ## PDB with ACE capping residue F_ace_cap = t.dataRoot() + '/amber/leap/ace_cap.pdb' ## PDB with NME capping residue F_nme_cap = t.dataRoot() + '/amber/leap/nme_cap.pdb' def __init__( self, model, leap_template=F_leap_in, leaprc=None, leap_out=None, leap_in=None, leap_pdb=None, log=None, debug=0, verbose=0, **kw ): """ :param model: model :type model: PDBModel or str :param leap_template: path to template file for leap input :type leap_template: str :param leaprc: forcefield parameter file or code (e.g. ff99) :type leaprc: str :param leap_out: target file for leap.log (default: discard) :type leap_out: str :param leap_in: target file for leap.in script (default: discard) :type leap_in: str :param kw: kw=value pairs for additional options in the leap_template :type kw: key=value """ self.m = PDBModel( model ) self.leap_template = leap_template self.leaprc = leaprc self.leap_pdb = leap_pdb or tempfile.mktemp( '_leap_pdb' ) self.keep_leap_pdb = leap_pdb is not None self.leap_in = leap_in self.leap_out= leap_out self.log = log or StdLog() self.output = None # last output of leap self.debug = debug self.verbose = verbose self.__dict__.update( kw ) def __runLeap( self, in_script, in_pdb, norun=0, **kw ): """ Create script file and run Leap. :param in_script: content of ptraj script with place holders :type in_script: str :param in_pdb: PDB file to load into tleap :type in_pdb: str :param norun: 1 - only create leap scrip (default: 0) :type norun: 1|0 :param kw: key=value pairs for filling place holders in script :type kw: key=value :raise AmberError: if missing option for leap input file or if could not create leap input file """ x = AmberLeap( in_script, in_pdb=in_pdb, log=self.log, verbose=self.verbose, debug=self.debug, catch_out=True, f_in=self.leap_in, f_out=self.leap_out, **kw ) if norun: x.generateInp() else: x.run() self.output = x.output ## ## create leap script ## try: ## ## use own fields and given kw as parameters for leap script ## d = copy.copy( self.__dict__ ) ## d.update( kw ) ## in_script = in_script % d ## f = open( self.leap_in, 'w') ## f.write( in_script ) ## f.close() ## if self.verbose: ## self.log.add('leap-script: ') ## self.log.add( in_script ) ## except IOError: ## raise AmberError('Could not create leap input file') ## except: ## raise AmberError('missing option for leap input file\n'+\ ## 'available: %s' % (str( d.keys() ) )) ## ## run tleap ## args = '-f %s' % self.leap_in ## if not norun: ## self.exe = Executor('tleap', args, log=self.log,verbose=1, ## catch_out=0) ## self.output, self.error, self.status = self.exe.run() ## if not os.path.exists( kw['out_parm'] ): ## raise AmberError, "tleap failed" ## ## clean up ## if not self.keep_leap_in and not self.debug: ## t.tryRemove( self.leap_in ) ## if not self.keep_leap_out and not self.debug: ## t.tryRemove( self.leap_out) def parm2pdb( self, f_parm, f_crd, f_out, aatm=0 ): """ Use ambpdb to build PDB from parm and crd. :param f_parm: existing parm file :type f_parm: str :param f_crd: existing crd file :type f_crd: str :param f_out: target file name for PDB :type f_out: str :return: f_out, target file name for PDB :rtype: str :raise AmberError: if ambpdb fail """ ## cmd = '%s -p %s -aatm < %s > %s' % \ args = '-p %s %s' % (f_parm, '-aatm'*aatm ) x = Executor('ambpdb', args, f_in=f_crd, f_out=f_out, log=self.log, verbose=self.verbose, catch_err=1) output,error,status = x.run() if not os.path.exists( f_out ): raise AmberError('ambpdb failed.') return f_out def __ssBonds( self, model, cutoff=4. ): """ Identify disulfide bonds. :param model: model :type model: PDBModel :param cutoff: distance cutoff for S-S distance (default: 4.0) :type cutoff: float :return: list with numbers of residue pairs forming S-S :rtype: [(int, int)] """ m = model.compress( model.mask( ['SG'] ) ) if len( m ) < 2: return [] pw = MU.pairwiseDistances( m.xyz, m.xyz ) pw = N0.less( pw, cutoff ) r = [] for i in range( len( pw ) ): for j in range( i+1, len(pw) ): if pw[i,j]: r += [ (m.atoms['residue_number'][i], m.atoms['residue_number'][j]) ] return r def __cys2cyx( self, model, ss_residues ): """ Rename all S-S bonded CYS into CYX. :param model: model :type model: PDBModel :param ss_residues: original residue numbers of S-S pairs :type ss_residues: [(int, int)] """ ss = [] for a,b in ss_residues: ss += [a,b] for a in model: if a['residue_number'] in ss: a['residue_name'] = 'CYX' def capACE( self, model, chain ): """ Cap N-terminal of given chain. :param model: model :type model: PDBMode :param chain: index of chain to be capped :type chain: int """ cleaner = PDBCleaner( model, log=self.log, verbose=self.verbose) return cleaner.capACE( model, chain, breaks=True ) def capNME( self, model, chain ): """ Cap C-terminal of given chain. :param model: model :type model: PDBMode :param chain: index of chain to be capped :type chain: int """ cleaner = PDBCleaner( model, log=self.log, verbose=self.verbose ) return cleaner.capNME( model, chain, breaks=True) def centerModel( self, model ): """ Geometric centar of model. :param model: model :type model: PDBMode """ center = N0.average( model.getXyz() ) model.setXyz( model.xyz - center ) def leapModel( self, hetatm=0, center=True ): """ Get a clean PDBModel for input into leap. :param hetatm: keep HETATM records (default: 0) :type hetatm: 1|0 :return: model :rtype: PDBMod """ m = self.m.clone() m.xplor2amber() cleaner = PDBCleaner( m, log=self.log, verbose=self.verbose ) m = cleaner.process( keep_hetatoms=hetatm, amber=1 ) m.renumberResidues( addChainId=1 ) if center: self.centerModel( m ) return m def __fLines( self, template, values ): if not type( values ) is list: values = [ values ] return ''.join( [ template % v for v in values ] ) def parmSolvated( self, f_out, f_out_crd=None, f_out_pdb=None, hetatm=0, norun=0, cap=0, capN=[], capC=[], fmod=['frcmod.ionsjc_tip3p'], fprep=[], box=10.0, center=True, **kw ): """ :param f_out: target file for parm (topology) :type f_out: str :param f_out_crd: target file for crd (coordinates) (default:|f_out_base|.crd) :type f_out_crd: str :param f_out_pdb: target file for pdb (default:|f_out_base|.pdb) :type f_out_pdb: str :param hetatm: keep hetero atoms (default: 0) :type hetatm: 1|0 :param cap: put ACE and NME capping residue on chain breaks (default: 0) :type cap: 1|0 :param capN: indices of chains that should get ACE cap (default: []) :type capN: [int] :param capC: indices of chains that should get NME cap (default: []) :type capC: [int] :param box: minimal distance of solute from box edge (default: 10.0) :type box: float :param center: re-center coordinates (default: True) :type center: bool :param fmod: list of files with amber parameter modifications to be loaded into leap with loadAmberParams (default:['frcmod.ionsjc_tip3p'] ... mod file needed for default Amber ff10 ions -- topology saving will fail if this one is missing) :type fmod: [str] :param fprep: list of files with amber residue definitions (to be loaded into leap with loadAmberPrep) (default: []) :type fprep: [str] :param kw: additional key=value pairs for leap input template :type kw: key=value :raise IOError: """ f_out = t.absfile( f_out ) f_out_crd = t.absfile( f_out_crd ) or t.stripSuffix( f_out ) + '.crd' f_out_pdb = t.absfile( f_out_pdb ) or t.stripSuffix( f_out ) +\ '_leap.pdb' ## removed: (bugfix 3434136) #fmod = [ t.absfile( f ) for f in t.toList( fmod ) ] #fprep = [ t.absfile( f ) for f in t.toList( fprep ) ] try: if self.verbose: self.log.add( '\nCleaning PDB file for Amber:' ) m = self.leapModel( hetatm=hetatm, center=center ) if cap: end_broken = m.atom2chainIndices( m.chainBreaks() ) capC = MU.union( capC, end_broken ) capN = MU.union( capN, N0.array( end_broken ) + 1 ) for i in capN: if self.verbose: self.log.add( 'Adding ACE cap to chain %i' % i ) m = self.capACE( m, i ) for i in capC: if self.verbose: self.log.add( 'Adding NME cap to chain %i' % i ) m = self.capNME( m, i ) m.renumberResidues( addChainId=1 ) ## again, to accomodate capping template = open( self.leap_template ).read() leap_mod = self.__fLines( 'm = loadAmberParams %s\n', fmod ) leap_prep= self.__fLines( 'loadAmberPrep %s\n', fprep ) ss = self.__ssBonds( m, cutoff=4. ) self.__cys2cyx( m, ss ) leap_ss = self.__fLines( self.ss_bond, ss ) if self.verbose: self.log.add('Found %i disulfide bonds: %s' % (len(ss),str(ss))) if self.verbose: self.log.add( 'writing cleaned PDB to %s' % self.leap_pdb ) m.writePdb( self.leap_pdb, ter=3 ) self.__runLeap( template, in_pdb=self.leap_pdb, out_parm=f_out, out_crd=f_out_crd, ss_bonds=leap_ss, fmod=leap_mod, fprep=leap_prep, norun=norun, box=box, **kw ) if not norun: parm_pdb = self.parm2pdb( f_out, f_out_crd, f_out_pdb ) if not self.keep_leap_pdb and not self.debug: t.tryRemove( self.leap_pdb ) except IOError as why: raise IOError(why) def __deleteAtoms( self, m, i_atoms ): """ Delet atoms :param m: model :type m: PDBMode :param i_atoms: atom index :type i_atoms: [int] :return: leap statements for deleting given atoms :rtype: [str] """ cmd = 'remove p.%(res)i p.%(res)i.%(atom)i' rM = m.resMap() rI = m.resIndex() s = [] for i in i_atoms: res = m.atoms['residue_number'][i] ## substract atom index of first atom in of this residue atm = i - rI[ rM[ i ] ] + 1 s += [ cmd % {'res':res, 'atom':atm} ] return '\n'.join( s ) def __inverseIndices( self, model, i_atoms ): """ :param model: model :type model: PDBMode :param i_atoms: atom index :type i_atoms: [int] :return: remaining atom indices of m that are NOT in i_atoms :rtype: [int] """ mask = N0.zeros( len( model ),N0.Int ) N0.put( mask, i_atoms, 1 ) return N0.nonzero( N0.logical_not( mask ) ) def parmMirror( self, f_out, f_out_crd=None, fmod=['frcmod.ionsjc_tip3p'], fprep=[], **kw ): """ Create a parm7 file whose atom content (and order) exactly mirrors the given PDBModel. This requires two leap runs. First we get a temporary topology, then we identify all atoms added by leap and build a final topology where these atoms are deleted. This parm is hence NOT suited for simulations but can be used to parse e.g. a trajectory or PDB into ptraj. :param f_out: target parm file :type f_out: str :param f_out_crd: target crd file (default: f_out but ending .crd) :type f_out_crd: str :param fmod : list of amber Mod files (loaded with loadAmberParams) :type fmod : [str] :param fmod : list of amber Prep files (loaded with loadAmberPrep) :type fmod : [str] """ f_out = t.absfile( f_out ) f_out_crd = t.absfile( f_out_crd ) or t.stripSuffix( f_out ) + '.crd' ## if there are hydrogens, recast them to standard amber names aatm = 'HA' in self.m.atomNames() ## 'HB2' in self.m.atomNames() ## First leap round ## m_ref = self.m.clone() m_ref.xplor2amber( aatm=aatm, parm10=True ) tmp_in = tempfile.mktemp( 'leap_in0.pdb' ) m_ref.writePdb( tmp_in, ter=3 ) tmp_parm = tempfile.mktemp( '_parm0' ) tmp_crd = tempfile.mktemp( '_crd0' ) leap_mod = self.__fLines( 'm = loadAmberParams %s\n', fmod ) leap_prep= self.__fLines( 'loadAmberPrep %s\n', fprep ) self.__runLeap( self.script_mirror_pdb, leaprc=self.leaprc, fmod=leap_mod, fprep=leap_prep, in_pdb=tmp_in, out_parm=tmp_parm, out_crd=tmp_crd, delete_atoms='' ) tmp_pdb = self.parm2pdb( tmp_parm, tmp_crd, tempfile.mktemp( 'leap_out.pdb' ), aatm=aatm ) if not self.debug: t.tryRemove( tmp_parm ) t.tryRemove( tmp_crd ) t.tryRemove( tmp_in ) ## load model with missing atoms added by leap m_leap = PDBModel( tmp_pdb ) ## compare atom content iLeap, iRef = m_leap.compareAtoms( m_ref ) ## check that ref model doesn't need any change if iRef != list(range( len( m_ref ))): uLeap, uRef = m_leap.unequalAtoms( m_ref, iLeap, iRef ) atms = m_ref.reportAtoms( uRef, n=6 ) raise AmberError("Cannot create exact mirror of %s.\n" % tmp_in +\ "Leap has renamed/deleted original atoms in %s:\n"% tmp_pdb+\ atms) ## indices of atoms that were added by leap delStr = self.__deleteAtoms( m_leap, self.__inverseIndices( m_leap, iLeap ) ) ## Second leap round ## self.__runLeap( self.script_mirror_pdb, leaprc=self.leaprc, in_pdb=tmp_pdb, fmod=leap_mod, fprep=leap_prep, out_parm=f_out, out_crd=f_out_crd, delete_atoms=delStr ) if not self.debug: t.tryRemove( tmp_pdb )
def __init__( self, model, template=None, topologies=None, f_charges=None, f_radii=None, f_map=None, addcharge=True, protonate=True, autocap=False, indi=4.0, exdi=80.0, salt=0.15, ionrad=2, prbrad=1.4, bndcon=4, scale=2.3, perfil=60, **kw ): """ @param model: structure for which potential should be calculated @type model: PDBModel @param template: delphi command file template [None=use default] @type template: str @param f_radii: alternative delphi atom radii file [None=use default] @type f_radii: str @param topologies: alternative list of residue charge/topology files [default: amber/residues/all*] @type topologies: [ str ] @param f_charges: alternative delphi charge file [default: create custom] @type f_charges: str @param f_map : output file name for potential map [None= discard] @type f_map : str @param addcharge: build atomic partial charges with AtomCharger [default: True] @type addcharge: bool @param protonate: (re-)build hydrogen atoms with reduce program (True) see L{Biskit.Reduce} @type protonate: bool @param autocap: add capping NME and ACE residues to any (auto-detected) false N- or C-terminal and chain breaks (default: False) see L{Biskit.Reduce} and L{Biskit.PDBCleaner} @type autocap: bool @param indi: interior dilectric (4.0) @param exdi: exterior dielectric (80.0) @param salt: salt conc. in M (0.15) @param ionrad: ion radius (2) @param prbrad: probe radius (1.4) @param bndcon: boundary condition (4, delphi default is 2) @param scale: grid spacing (2.3) @param perfil: grid fill factor in % (for automatic grid, 60) @param kw: additional key=value parameters for Executor: @type kw: key=value pairs :: debug - 0|1, keep all temporary files (default: 0) verbose - 0|1, print progress messages to log (log != STDOUT) node - str, host for calculation (None->local) NOT TESTED (default: None) nice - int, nice level (default: 0) log - Biskit.LogFile, program log (None->STOUT) (default: None) """ template = template or os.path.join(T.dataRoot(),'delphi',self.F_PARAMS) tempdir = self.newtempfolder( tempdir=True ) ## create new temp folder f_in = tempfile.mktemp( '.inp', 'delphi_', dir=tempdir ) self.f_pdb = tempfile.mktemp( '.pdb', 'delphi_', dir=tempdir) self.keep_map = f_map is not None self.f_map = f_map or \ tempfile.mktemp( '_mapout.phi', 'delphi_', dir=tempdir ) ## self.f_map = None self.f_radii = f_radii or os.path.join( T.dataRoot(),'delphi',self.F_RADII) self.topologies = topologies or self.F_RESTYPES self.f_charges = f_charges or tempfile.mktemp( '.crg', 'delphi_', dir=tempdir ) self.protonate = protonate self.autocap = autocap self.addcharge = addcharge ## DELPHI run parameters self.indi=indi # interior dilectric(4.0) self.exdi=exdi # exterior dielectric(80.0) self.salt=salt # salt conc. in M (0.15) self.ionrad=ionrad # ion radius (2) self.prbrad=prbrad # probe radius (1.4) self.bndcon=bndcon # boundary condition (4, delphi default is 2) ## DELPHI parameters for custom grid self.scale=scale # grid spacing (2.3) self.perfil=perfil # grid fill factor in % (for automatic grid, 60) self.gsize = None self.acenter = None self.strcenter = '(0.0,0.0,0.0)' kw['tempdir'] = tempdir kw['cwd'] = tempdir Executor.__init__( self, 'delphi', template=template, f_in=f_in, args=f_in, catch_err=True, **kw ) self.model = model self.delphimodel = None
def __init__(self, model, template=None, topologies=None, f_charges=None, f_radii=None, f_map=None, addcharge=True, protonate=True, autocap=False, indi=4.0, exdi=80.0, salt=0.15, ionrad=2, prbrad=1.4, bndcon=4, scale=2.3, perfil=60, **kw): """ @param model: structure for which potential should be calculated @type model: PDBModel @param template: delphi command file template [None=use default] @type template: str @param f_radii: alternative delphi atom radii file [None=use default] @type f_radii: str @param topologies: alternative list of residue charge/topology files [default: amber/residues/all*] @type topologies: [ str ] @param f_charges: alternative delphi charge file [default: create custom] @type f_charges: str @param f_map : output file name for potential map [None= discard] @type f_map : str @param addcharge: build atomic partial charges with AtomCharger [default: True] @type addcharge: bool @param protonate: (re-)build hydrogen atoms with reduce program (True) see L{Biskit.Reduce} @type protonate: bool @param autocap: add capping NME and ACE residues to any (auto-detected) false N- or C-terminal and chain breaks (default: False) see L{Biskit.Reduce} and L{Biskit.PDBCleaner} @type autocap: bool @param indi: interior dilectric (4.0) @param exdi: exterior dielectric (80.0) @param salt: salt conc. in M (0.15) @param ionrad: ion radius (2) @param prbrad: probe radius (1.4) @param bndcon: boundary condition (4, delphi default is 2) @param scale: grid spacing (2.3) @param perfil: grid fill factor in % (for automatic grid, 60) @param kw: additional key=value parameters for Executor: @type kw: key=value pairs :: debug - 0|1, keep all temporary files (default: 0) verbose - 0|1, print progress messages to log (log != STDOUT) node - str, host for calculation (None->local) NOT TESTED (default: None) nice - int, nice level (default: 0) log - Biskit.LogFile, program log (None->STOUT) (default: None) """ template = template or os.path.join(T.dataRoot(), 'delphi', self.F_PARAMS) tempdir = self.newtempfolder(tempdir=True) ## create new temp folder f_in = tempfile.mktemp('.inp', 'delphi_', dir=tempdir) self.f_pdb = tempfile.mktemp('.pdb', 'delphi_', dir=tempdir) self.keep_map = f_map is not None self.f_map = f_map or \ tempfile.mktemp( '_mapout.phi', 'delphi_', dir=tempdir ) ## self.f_map = None self.f_radiisrc = os.path.join(T.dataRoot(), 'delphi', self.F_RADII) self.f_radii = f_radii or os.path.join(tempdir, 'radii.siz') self.topologies = topologies or self.F_RESTYPES self.f_charges = f_charges or tempfile.mktemp( '.crg', 'delphi_', dir=tempdir) self.protonate = protonate self.autocap = autocap self.addcharge = addcharge ## DELPHI run parameters self.indi = indi # interior dilectric(4.0) self.exdi = exdi # exterior dielectric(80.0) self.salt = salt # salt conc. in M (0.15) self.ionrad = ionrad # ion radius (2) self.prbrad = prbrad # probe radius (1.4) self.bndcon = bndcon # boundary condition (4, delphi default is 2) ## DELPHI parameters for custom grid self.scale = scale # grid spacing (2.3) self.perfil = perfil # grid fill factor in % (for automatic grid, 60) self.gsize = None self.acenter = None self.strcenter = '(0.0,0.0,0.0)' kw['tempdir'] = tempdir kw['cwd'] = tempdir Executor.__init__(self, 'delphi', template=template, f_in=f_in, args=f_in, catch_err=True, **kw) self.model = model self.delphimodel = None
class ExeConfig( object ): """ ExeConfig ========= Manage the settings that Executor needs for calling an external program. ExeConfig is initialised with a program name and expects to find the configuration for this program in C{ ~/.biskit/exe_|name|.dat }. Only if nothing is found there, it looks for C{ Biskit/data/defaults/exe_|name|.dat } in the biskit installation folder. If neither of the two files are found, an error is raised (strict=1) or the binary is assumed to be |name| and must be accessible in the search path (strict=0). Example ------- The configuration file (exe_name.dat) should look like this:: ---- start example configuration file ---- [BINARY] comment=the emacs editor bin=emacs cwd= shell=0 shellexe= pipes=0 ## Use new environment containing only variables given below replaceEnv=0 [ENVIRONMENT] HOME= EMACS_CONFIG=~/.emacs/config.dat ---- end of example file ---- This example config would ask Executor to look for an executable called 'emacs' in the local search path. Before running it, executor should check that a variable $HOME exists in the local shell environment (or raise an error otherwise) and set the variable $EMACS_CONFIG to the given path. The other settings specify how the program call is done (see also Python 2.4 subprocess.Popen() ): - cwd ... working directory (empty -- current working directory) - shell ... wrap process in separate shell - shellexe ... which shell (empty -- sh) - pipes ... paste input via STDIN, collect output at STDOUT Missing options are reset to their default value; See :class:` ExeConfig.reset() `. All entries in section BINARY are put into the name space of the ExeConfig object. That means an ExeConfig object x created from the above file can be used as follows: >>> x = ExeConfig( 'emacs' ) >>> x.cwd is None >>> True >>> print x.comment >>> the emacs editor """ ## default search path for exe_...config files CONFIG_PATH = [ os.path.expanduser('~/.biskit'), os.path.join( T.dataRoot(), 'defaults' ) ] SECTION_BIN = 'BINARY' SECTION_ENV = 'ENVIRONMENT' def __init__( self, name, strict=True, configpath=None): """ :param name: unique name of the program :type name: str :param strict: insist on a config file exe_name.dat and do not tolerate missing environment variables (default: True) :type strict: bool :param configpath: list of pathnames where configuration file should be searched, None means use default: ['~/.biskit', '.../biskit/data/defaults'] :type configpath: [str] :raise ExeConfigError: if strict==1 and config file incomplete/missing """ self.name = name #: identifier self.strict = strict self.dat = '' #: configuration file path searchpath = configpath or self.CONFIG_PATH #: [str] p = searchpath.copy() # don't empty out original CONFIG_PATH with pop() ! if len(p) < 1: raise ExeConfigError('Path(s) for exe config files missing.') while p and not os.path.exists(self.dat): self.dat = os.path.join( p.pop(0), 'exe_%s.dat' % name ) if self.strict and not os.path.exists(self.dat) : raise ExeConfigError( 'Could not find configuration file %s for program %s.\n'\ %(self.dat, self.name) +\ 'Searching in: %r'% searchpath) self.env_checked = 0 ## environment was verified self.conf = CaseSensitiveConfigParser() self.conf.read( self.dat ) self.reset() self.update() def reset( self ): """ Reset all required parameters. Called at creation """ ## default values self.comment = 'no comment or missing configuration file' self.bin = self.name self.shell = 0 self.shellexe = None self.pipes = 0 self.cwd = None #'./' self.replaceEnv = 0 self.env = None def update( self ): """ Load settings from associated configuration file (if available). Is automatically called at creation. :raise ExeConfigError: if section [BINARY] was not found in the file """ ## get parameters from config file if available; type-cast values try: dconf = self.conf.items( self.SECTION_BIN ) for key, value in dconf: ## default type is string t = type( self.__dict__.get( key, '' ) ) if t is type(None): t = str ## leave default value if None is given if value is not '': self.__dict__[ key ] = t( value ) except configparser.NoSectionError: if self.strict: raise ExeConfigError('Could not find BINARY section in %s.' % self.dat) try: self.env = dict( self.conf.items( self.SECTION_ENV ) ) except: pass def validate( self ): """ Validate the path to the binary. :raise ExeConfigError: if environment is not fit for running the program """ try: self.bin = T.absbinary( self.bin ) ## raises IOError if not found missing = self.update_environment() report = '%s is missing environment variables: %r'\ % (self.name, missing ) if missing and self.strict: raise ExeConfigError(report) if missing: EHandler.warning( report ) except IOError as e: ## re-raise but silence reporting of IOError in stack trace raise ExeConfigError(str(e) + ' Check %s!' % self.dat) from None def environment( self ): """ Get needed environment variables. :return: dictionary with environment for subprocess.Popen; empty, if no environment was specified :rtype: {str:str} OR None :raise ExeConfigError: if env was not yet checked by update_environment """ if not self.env_checked: raise ExeConfigError('Environment not yet checked, validate()!') return self.env def update_environment( self ): """ Check for missing environment settings. :return: names of required but missing environment variables :rtype: [str] """ missing = [] if self.env: for key,value in self.env.items(): if value is '': if os.getenv( key ) is None: missing += [ key ] else: self.env[ key ] = os.getenv( key ) self.env_checked = 1 return missing def __repr__( self ): s = 'ExeConfig for %s' % self.name for k,v in self.__dict__.items(): s += '\n%10s \t%r' % (k,v) return s def __str__( self ): return self.__repr__()
def prepare(self): root = T.testRoot() + '/amber/' self.fpdb = T.testRoot() + '/amber/1HPT_0.pdb' self.fparm = tempfile.mktemp('.parm', 'top_') self.flog = tempfile.mktemp('.log', 'leap_') self.template = T.dataRoot() + '/amber/leap/solvate_box.leap'