def setupJobs(self): """ Prepare the job dictionnary for 'AlignerSlave' @return: input informations for aligner for each project @rtype: {{str}} """ r = {} for f in self.folders: aligner_input = {} aligner_input['outFolder'] = T.absfile(f) aligner_input['fastaTemplates'] = \ self.__dir_or_none( f, self.fastaTemplates ) aligner_input['fastaSequences'] = \ self.__dir_or_none( f, self.fastaSequences ) aligner_input['fastaTarget'] = \ self.__dir_or_none( f, self.fastaTarget ) pdb_list = [] if self.pdbFolder is None: pdb_list = None else: pdbfiles = os.listdir(f + self.pdbFolder) for pdb in pdbfiles: pdb_list.append(f + self.pdbFolder + '/%s'%pdb) aligner_input['pdbFiles'] = pdb_list r[T.absfile(f)] = aligner_input return r
def setupJobs(self): """ Prepare the job dictionnary for 'AlignerSlave' @return: input informations for aligner for each project @rtype: {{str}} """ r = {} for f in self.folders: aligner_input = {} aligner_input['outFolder'] = T.absfile(f) aligner_input['fastaTemplates'] = \ self.__dir_or_none( f, self.fastaTemplates ) aligner_input['fastaSequences'] = \ self.__dir_or_none( f, self.fastaSequences ) aligner_input['fastaTarget'] = \ self.__dir_or_none( f, self.fastaTarget ) pdb_list = [] if self.pdbFolder is None: pdb_list = None else: pdbfiles = os.listdir(f + self.pdbFolder) for pdb in pdbfiles: pdb_list.append(f + self.pdbFolder + '/%s' % pdb) aligner_input['pdbFiles'] = pdb_list r[T.absfile(f)] = aligner_input return r
def __init__( self, fdcd, fref, box=0, pdbCode=None, log=StdLog(), verbose=0): """ @param fdcd: path to input dcd file @type fdcd: str @param fref: PDB or pickled PDBModel or directly an open PDBModel instancewith same atom content and order @type fref: str or PDBModel @param box: expect line with box info at the end of each frame (default: 0) @type box: 1|0 @param pdbCode: pdb code to be put into the model (default: None) @type pdbCode: str @param log: LogFile instance [Biskit.StdLog] @type log: Biskit.LogFile @param verbose: print progress to log [0] @type verbose: int """ self.fdcd = T.absfile( fdcd ) self.dcd = open(self.fdcd, "r", 0) if isinstance(fref, str) : self.ref=PDBModel(T.absfile(fref), pdbCode=pdbCode) elif fref : self.ref = fref self.box = box self.n = self.ref.lenAtoms() self.log = log self.verbose = verbose self.readHeader() self.set_pointerInfo()
def __init__(self, f_parm, f_crd, f_template=None, s=0, e=None, step=1, **kw): """ @param f_parm: path to amber topology file @type f_parm: str @param f_crd: path to amber trajectory file @type f_crd: str @param f_template: alternative ptraj input template (default: None) @type f_template: str @param s: start frame (default: 0, first) @type s: int @param e: end frame (default: None, last) @type e: int @param step: frame offset (default: 1, no offset ) @type step: int @param kw: additional key=value parameters for Executor: @type kw: key=value pairs :: debug - 0|1, keep all temporary files (default: 0) verbose - 0|1, print progress messages to log (log != STDOUT) node - str, host for calculation (None->local) NOT TESTED (default: None) nice - int, nice level (default: 0) log - Biskit.LogFile, program log (None->STOUT) (default: None) """ template = f_template or self.ptraj_script Executor.__init__(self, 'ptraj', template=template, push_inp=0, **kw) self.f_parm = T.absfile(f_parm) self.f_crd = T.absfile(f_crd) self.start = s self.stop = e or 10000000 self.step = step ## default result self.result = { 'T': None, 'mass': None, 'vibes': None, 'S_total': None, 'S_trans': None, 'S_rot': None, 'S_vibes': None, 'contributions': None, 'nframes': None, 'version': self.version(), 'node': self.node }
def prepareOutFolder( self, fout ): """ Setup an output folder @param fout: outfile name @type fout: str @return: out path @rtype: str """ ## date stamp in folder name try: datestr = "%02i%02i" % (localtime()[1], localtime()[2]) fout = fout % ( datestr ) if not os.path.exists( t.absfile( fout ) ): os.mkdir( t.absfile( fout ) ) ## user provided folder name -> formatting fails except: if not os.path.exists( t.absfile( fout ) ): os.mkdir( t.absfile( fout ) ) ## create folders for rec / lig HEX pdbs if not os.path.exists( t.absfile( fout ) + '/rec' ): os.mkdir( t.absfile( fout ) + '/rec' ) if not os.path.exists( t.absfile( fout ) + '/lig' ): os.mkdir( t.absfile( fout ) + '/lig' ) return t.absfile( fout )
def createHexInp(self, nRec, nLig): """ Create a HEX macro file for docking a rec lig pair. @param nRec: model number rec in model dictionaries @type nRec: int @param nLig: model number lig in model dictionaries @type nLig: int @return: macro file name, future out file name @rtype: (str, str) @raise DockerError: if macro dock option is different from previous call """ ## fetch PDB file names for given model numbers recPdb, ligPdb = self.recHexPdbs[nRec], self.ligHexPdbs[nLig] ## fetch the according PDBModels rec, lig = self.recDic[nRec], self.ligDic[nLig] fout_base = self.out + '/%s_%i-%s_%i' % (rec.pdbCode, nRec, lig.pdbCode, nLig) if os.path.exists(t.absfile(fout_base + '_hex.mac')) and self.verbose: fmac = t.absfile(fout_base + '_hex.mac') fout = t.absfile(fout_base + '_hex.out') macro = self.macroDock print "Dock setup: using old ", os.path.split(fmac)[1] else: silent = 1 if self.verbose: silent = 0 fmac, fout, macro = hexTools.createHexInp(recPdb, rec, ligPdb, lig, self.comPdb, outFile=fout_base, macDock=self.macroDock, sol=self.soln, silent=silent) if self.macroDock is None: self.macroDock = macro else: if self.macroDock != macro: raise DockerError('MacroDock setting changed to %i: %i : %i' %\ ( macro, nRec, nLig) ) ## remember which models and which out file are used self.runDic[fmac] = (nRec, nLig, fout) return fmac, fout
def updateModelDic( f ): """ Call update() on all models in a dict to make them aware of the new profiles.""" print 'Updating ', f d = T.load( T.absfile( f ) ) for m in d.values(): m.update( updateMissing=1 ) T.dump( d, T.absfile( f ) )
def updateModelDic(f): """ Call update() on all models in a dict to make them aware of the new profiles.""" print 'Updating ', f d = T.load(T.absfile(f)) for m in d.values(): m.update(updateMissing=1) T.dump(d, T.absfile(f))
def sloppyload( f ): """ f - str, file name -> any, unpickled object """ try: T.flushPrint( "Loading " + str(f) + '\n' ) return T.load( T.absfile( f ) ) except cPickle.UnpicklingError: print "Trying to load %s in sloppy mode..." % f return PickleUpgrader(open(T.absfile(f))).load()
def createHexInp( self, nRec, nLig ): """ Create a HEX macro file for docking a rec lig pair. @param nRec: model number rec in model dictionaries @type nRec: int @param nLig: model number lig in model dictionaries @type nLig: int @return: macro file name, future out file name @rtype: (str, str) @raise DockerError: if macro dock option is different from previous call """ ## fetch PDB file names for given model numbers recPdb, ligPdb = self.recHexPdbs[nRec], self.ligHexPdbs[nLig] ## fetch the according PDBModels rec, lig = self.recDic[ nRec ], self.ligDic[ nLig ] fout_base = self.out+'/%s_%i-%s_%i' % (rec.pdbCode, nRec, lig.pdbCode, nLig) if os.path.exists( t.absfile( fout_base + '_hex.mac' ) ) and self.verbose: fmac = t.absfile( fout_base + '_hex.mac' ) fout = t.absfile( fout_base + '_hex.out' ) macro = self.macroDock print "Dock setup: using old ", os.path.split( fmac )[1] else: silent=1 if self.verbose: silent=0 fmac, fout, macro = hexTools.createHexInp(recPdb,rec, ligPdb,lig, self.comPdb, outFile=fout_base, macDock=self.macroDock, sol=self.soln, silent=silent) if self.macroDock == None: self.macroDock = macro else: if self.macroDock != macro: raise DockerError('MacroDock setting changed to %i: %i : %i' %\ ( macro, nRec, nLig) ) ## remember which models and which out file are used self.runDic[ fmac ] = ( nRec, nLig, fout ) return fmac, fout
def inputComplex(options): if 'c' in options: return T.load(T.absfile(options['c'])) m = PDBModel(T.absfile(options['i'])) ## extract rec and lig chains rec_chains = T.toIntList(options['r']) lig_chains = T.toIntList(options['l']) rec = m.takeChains(rec_chains) lig = m.takeChains(lig_chains) ## create Protein complex com = Complex(rec, lig) return com
def saveProtocols( self ): """ Save protocol to file. """ f_prot = T.stripSuffix( T.absfile(self.fout) ) + '_protocols.dat' self.log.write( 'Saving parameters to %s...' % f_prot ) T.dump( self.protocols, f_prot )
def dumpMembers( self, traj, fname ): """ Dump ensemble member trajectories @param traj: Trajectory to dump @type traj: Trajectory @param fname: trajectory file name - used to derrive name for members @type fname: str' @return: list of trajectory files @rtype: [str] """ fname = T.stripSuffix( T.absfile( fname, resolveLinks=0 ) ) members = range( traj.n_members ) r = [] for n in members: f = fname + '_member_%02i.traj' % n if os.path.exists( f ): self.log.add('using existing ' + f ) else: self.log.write('saving ' + f + '...') m = traj.takeMember( n ) T.dump( m, f ) self.log.add('done') r += [ f ] return r
def saveProtocols(self): """ Save protocol to file. """ f_prot = T.stripSuffix(T.absfile(self.fout)) + '_protocols.dat' self.log.write('Saving parameters to %s...' % f_prot) T.dump(self.protocols, f_prot)
def __init__(self, f_in=None ): """ @param f_in: amber "off" or "prep" file with residue definitions if not existing, we will look for a file with this name in the Biskit data folder (data/amber/residues) (default: 'all_amino03.in') @type f_in: str """ f_in = f_in or self.F_DEFAULT if not osp.exists( T.absfile( f_in ) ): f_in = T.dataRoot() + '/amber/residues/' + f_in self.firstrecord = re.compile( 'db[0-9]+\.dat' ) self.s = open( T.absfile( f_in ), 'r' ).read() self.s = self.firstrecord.split( self.s )[-1] #skip until first residue
def __init__(self, f_in=None): """ @param f_in: amber "off" or "prep" file with residue definitions if not existing, we will look for a file with this name in the Biskit data folder (data/amber/residues) (default: 'all_amino03.in') @type f_in: str """ f_in = f_in or self.F_DEFAULT if not osp.exists(T.absfile(f_in)): f_in = T.dataRoot() + '/amber/residues/' + f_in self.firstrecord = re.compile('db[0-9]+\.dat') self.s = open(T.absfile(f_in), 'r').read() self.s = self.firstrecord.split(self.s)[-1] #skip until first residue
def test_Executor(self): """Executor test (run emacs ~/.biskit/settings.cfg)""" ExeConfigCache.reset() self.x = ExeConfigCache.get('emacs', strict=0) self.x.pipes = 1 args = '.biskit/settings.cfg' if not self.local: args = '-kill ' + args self.e = Executor('emacs', args=args, strict=0, f_in=None, f_out=self.fout, verbose=self.local, cwd=t.absfile('~'), tempdir=True, debug=self.DEBUG) self.r = self.e.run() if self.local: print 'Emacs was running for %.2f seconds' % self.e.runTime self.assert_(self.e.pid is not None) if self.DEBUG: if self.verbose: self.log.add("DEBUGGING mode active") else: self.assert_(not os.path.exists(self.e.tempdir), 'tempfolder not removed')
def test_Executor(self): """Executor test (run emacs ~/.biskit/settings.cfg)""" ExeConfigCache.reset() self.x = ExeConfigCache.get('emacs', strict=0) self.x.pipes = 1 args = '.biskit/settings.cfg' if not self.local: args = '-kill ' + args self.e = Executor('emacs', args=args, strict=0, f_in=None, f_out=self.fout, verbose=self.local, cwd=t.absfile('~'), tempdir=True, debug=self.debug) self.r = self.e.run() if self.local: print 'Emacs was running for %.2f seconds' % self.e.runTime self.assert_(self.e.pid != None)
def __init__(self, outFolder='.', clusterLimit=50, verbose=0, log=None): """ @param outFolder: project folder (results are put into subfolder) ['.'] @type outFolder: str @param clusterLimit: maximal number of returned sequence clusters (default: 50) @type clusterLimit: int @param verbose: keep temporary files (default: 0) @type verbose: 1|0 @param log: log file instance, if None, STDOUT is used (default: None) @type log: LogFile """ self.outFolder = T.absfile(outFolder) self.verbose = verbose self.log = log or StdLog() self.record_dic = None #: dict ID - Bio.Fasta.Record self.clusters = None #: list of lists of ids self.bestOfCluster = None #: list of non-redundant ids #: the maximal number of clusters to return self.clusterLimit = clusterLimit self.clustersCurrent = None #: current number of clusters self.prepareFolders()
def dumpMembers(self, traj, fname): """ Dump ensemble member trajectories @param traj: Trajectory to dump @type traj: Trajectory @param fname: trajectory file name - used to derrive name for members @type fname: str' @return: list of trajectory files @rtype: [str] """ fname = T.stripSuffix(T.absfile(fname, resolveLinks=0)) members = range(traj.n_members) r = [] for n in members: f = fname + '_member_%02i.traj' % n if os.path.exists(f): self.log.add('using existing ' + f) else: self.log.write('saving ' + f + '...') m = traj.takeMember(n) T.dump(m, f) self.log.add('done') r += [f] return r
def __writeBlastResult( self, parsed_blast, outFile): """ Write the result from the blast search to file (similar to the output produced by a regular blast run). writeBlastResult( parsed_blast, outFile ) @param parsed_blast: Bio.Blast.Record.Blast @type parsed_blast: Bio.Blast.Record.Blast @param outFile: file to write the blast result to @type outFile: str """ try: f = open( T.absfile( outFile ), 'w' ) i=1 for alignment in parsed_blast.alignments: for hsp in alignment.hsps: s = string.replace(alignment.title,'\n',' ') s = string.replace(s, 'pdb|', '\npdb|') f.write('Sequence %i: %s\n'%(i,s)) f.write('Score: %3.1f \tE-value: %2.1e\n'\ %(hsp.score, hsp.expect)) f.write('Lenght/Identities: %r\tPositives: %r\tGaps: %r\n'\ %(hsp.identities, hsp.positives, hsp.gaps)) f.write( '%s\n'%hsp.query ) f.write( '%s\n'%hsp.match ) f.write( '%s\n\n'%hsp.sbjct ) i += 1 f.close() except Exception, why: EHandler.warning("Error while writing blast result to %s" %outFile) globals().update(locals()) EHandler.warning("function namespace published to globals")
def __init__(self, outFolder='.', clusterLimit=50, verbose=0, log=None ): """ @param outFolder: project folder (results are put into subfolder) ['.'] @type outFolder: str @param clusterLimit: maximal number of returned sequence clusters (default: 50) @type clusterLimit: int @param verbose: keep temporary files (default: 0) @type verbose: 1|0 @param log: log file instance, if None, STDOUT is used (default: None) @type log: LogFile """ self.outFolder = T.absfile( outFolder ) self.verbose = verbose self.log = log or StdLog() self.record_dic = None #: dict ID - Bio.Fasta.Record self.clusters = None #: list of lists of ids self.bestOfCluster = None #: list of non-redundant ids #: the maximal number of clusters to return self.clusterLimit = clusterLimit self.clustersCurrent = None #: current number of clusters self.prepareFolders()
def inputComplex( options ): if 'c' in options: return T.load( T.absfile( options['c'] ) ) m = PDBModel( T.absfile( options['i'] ) ) ## extract rec and lig chains rec_chains = T.toIntList( options['r'] ) lig_chains = T.toIntList( options['l'] ) rec = m.takeChains( rec_chains ) lig = m.takeChains( lig_chains ) ## create Protein complex com = Complex( rec, lig ) return com
def convertOptions(o): """ Translate commandline options where needed. You may need to add some entries here if you want to override exotic Executor parameters from the commandline. """ o['verbose'] = int(o.get('v', 1)) del o['v'] o['outFolder'] = tools.absfile(o.get('o', '.')) del o['o'] o['zfilter'] = float(o.get('zfilter', 0)) o['idfilter'] = float(o.get('idfilter', 0)) o['log'] = o.get('log', None) if o['log']: o['log'] = LogFile(o['o'] + '/' + options['log'], 'a') o['debug'] = int(o.get('debug', 0)) o['nice'] = int(o.get('nice', 0)) if 'ending_model' in o: o['ending_model'] = int(o['ending_model']) if 'starting_model' in o: o['starting_model'] = int(o['starting_model']) return o
def findLeaprc(self, ff): """ Guess full path to an existing leaprc file name or fragment of it. We first take the file as is, then look in AMBERHOME, then look in AMBERHOME/dat/leap/cmd. @param ff: forcefield code (leaprc file ending, e.g. 'ff99', 'ff01') OR leaprc file name (e.g, 'leaprc.f99') OR leaprc path witin $AMBERHOME OR leaprc path @type ff: str @return: full path to an existing leaprc file @rtype: str @raise: LeapError, if no existing file can be found """ if P.exists(T.absfile(ff)): return T.absfile(ff) amberhome = self.exe.env['AMBERHOME'] ## interpret ff as file ending ('ff99', 'ff03', etc) r = P.join(amberhome, self.LEAPRC_PATH, self.LEAPRC + ff) if P.exists(r): return r ## interpret ff as file ending ('ff99', 'ff03', etc pointing to 'old' forcefield) r = P.join(amberhome, self.LEAPRC_PATH, 'oldff', self.LEAPRC + ff) if P.exists(r): return r ## interpret ff as file name (e.g. 'leaprc.ff99') r = P.join(amberhome, self.LEAPRC_PATH, ff) if P.exists(r): return r ## interpret ff as file name (e.g. 'leaprc.ff99') for 'old' forcefield r = P.join(amberhome, self.LEAPRC_PATH, 'oldff', ff) if P.exists(r): return r ## interpret ff as path within AMBERHOME ('dat/leap/cmd/leaprc.f99') r = P.join(amberhome, ff) if P.exists(r): return r raise LeapError,\ 'Could not find Forcefield definition %s. ' % ff +\ 'Check exe_tleap.dat or provide an explicit leaprc parameter!'
def __init__(self, fname, mode='w'): """ @param fname: name of log file @type fname: str @param mode: mode (default: w) @type mode: str """ self.fname = T.absfile( fname ) self.mode = mode self._f = None
def __init__(self, fname, outPath='', chainIdOffset=0, capBreaks=0, chainMask=0, log=None): """ @param fname: pdb filename @type fname: str @param outPath: path for log file @type outPath: str @param chainIdOffset: start chain numbering at this offset @type chainIdOffset: int @param capBreaks: add ACE and NME to N- and C-term. of chain breaks [0] @type capBreaks: 0|1 @param chainMask: chain mask for overriding the default sequence identity [None] @type chainMask: [1|0] @param log: LogFile object @type log: object """ self.pdb = Structure(fname) self.fname = fname self.outPath = T.absfile(outPath) self.chainIdOffset = chainIdOffset self.capBreaks = capBreaks self.log = LogFile(T.absfile(outPath) + '/' + self.pdbname() + '.log') if log: self.log = log self.chains = self.pdb.peptide_chains self.counter = -1 self.threshold = 0.9 # sequence identity between multiple copies in PDB self._expressionCheck( "[^\n].*[Hh][Oo][Mm][Oo].?[Dd][Ii][Mm][eE][Rr].*\n", 'HOMODIMER') self._expressionCheck("[^\n].*[Tt][Rr][Ii][Mm][Ee][Rr].*\n", 'TRIMER') self._hetatomCheck() self.log.add("Separate chains: \n------------------") self._removeDuplicateChains( chainMask) # keep only one copy of molecule self._separateChainBreaks() self._assign_seg_ids() # new segment id for each chain
def __init__( self, f_parm, f_crd, f_template=None, s=0, e=None, step=1, **kw ): """ @param f_parm: path to amber topology file @type f_parm: str @param f_crd: path to amber trajectory file @type f_crd: str @param f_template: alternative ptraj input template (default: None) @type f_template: str @param s: start frame (default: 0, first) @type s: int @param e: end frame (default: None, last) @type e: int @param step: frame offset (default: 1, no offset ) @type step: int @param kw: additional key=value parameters for Executor: @type kw: key=value pairs :: debug - 0|1, keep all temporary files (default: 0) verbose - 0|1, print progress messages to log (log != STDOUT) node - str, host for calculation (None->local) NOT TESTED (default: None) nice - int, nice level (default: 0) log - Biskit.LogFile, program log (None->STOUT) (default: None) """ template = f_template or self.ptraj_script Executor.__init__( self, 'ptraj', template=template, push_inp=0, **kw ) self.f_parm = T.absfile( f_parm ) self.f_crd = T.absfile( f_crd ) self.start = s self.stop = e or 10000000 self.step = step ## default result self.result = { 'T':None, 'mass':None, 'vibes':None, 'S_total':None, 'S_trans':None, 'S_rot':None, 'S_vibes':None, 'contributions':None, 'nframes':None, 'version':self.version(), 'node':self.node }
def __init__(self, fname, mode='w'): """ @param fname: name of log file @type fname: str @param mode: mode (default: w) @type mode: str """ self.fname = T.absfile(fname) self.mode = mode self._f = None
def __init__( self, outFolder, log=None ): """ @param outFolder: output folder @type outFolder: str @param log: None reports to STDOUT (drfault: None) @type log: LogFile instance or None """ self.outFolder = T.absfile( outFolder ) self.log = log self.prepareFolders()
def __init__(self, outFolder='.', verbose=1): """ @param outFolder: folder for output (default: .) @type outFolder: str @param verbose: write intermediary files (default: 1) @type verbose: 1|0 """ self.outFolder = T.absfile( outFolder ) self.verbose = verbose self.prepareFolders()
def loadTraj( f, trajIndex, start=0, end=None, step=1 ): """Load traj from file, add frame names, extract portion if requested""" t = T.load( T.absfile( f ) ) addFrameNames( t, trajIndex ) e = end or len( t ) if start or end or (step != 1): t = t.takeFrames( range( start, e, step ) ) return t
def loadTraj(f, trajIndex, start=0, end=None, step=1): """Load traj from file, add frame names, extract portion if requested""" t = T.load(T.absfile(f)) addFrameNames(t, trajIndex) e = end or len(t) if start or end or (step != 1): t = t.takeFrames(range(start, e, step)) return t
def __init__(self, outFolder='.', verbose=1): """ @param outFolder: folder for output (default: .) @type outFolder: str @param verbose: write intermediary files (default: 1) @type verbose: 1|0 """ self.outFolder = T.absfile(outFolder) self.verbose = verbose self.prepareFolders()
def writeReceptor(self, fname, ter=0 ): """ Write receptor to pdb without TER statement (unless removeTer!=0). @param fname: output file name @type fname: str @param ter: option for how to treat the TER statement:: - 0, don't write any TER statements (default) - 1, restore original TER statements - 2, put TER between all detected chains @type ter: 0|1|2 """ self.rec().writePdb( t.absfile( fname), ter )
def get_local(self, existing=0): """ Return a valid, absolute path. Either the existing original or with all substitutions for which environment variables exist. This function is time consuming (absfile - os.realpath is the culprit). @param existing: don't return a non-existing path @type existing: 0|1 @return: valid absolute path in current environment @rtype: str @raise LocalPathError: if existing==1 and no existing path can be constructed via environment variables """ if self.fragments: result = os.path.join(*[f[0] for f in self.fragments]) else: result = '' result = T.absfile(result) if os.path.exists(result): return result substitutions = self.get_substitution_dict() result = '' for abs, env in self.fragments: if env: result += substitutions.get(env, abs) else: result += abs result = T.absfile(result) if existing and not os.path.exists(result): raise LocalPathError, "Can't construct existing path from %s."%\ self.formatted() return result
def writeComplex(self, fname, ter=0): """ Write single pdb containing both receptor and ligand separated by END but not TER (unless ter!=0). @param fname: filename of pdb @type fname: str @param ter: option for how to treat the TER statement:: - 0, don't write any TER statements (default) - 1, restore original TER statements - 2, put TER between all detected chains """ self.model().writePdb( t.absfile( fname ), ter )
def writeFasta( self, frecords, fastaOut ): """ Create fasta file for given set of records. @param frecords: list of Bio.Blast.Records @type frecords: [Bio.Blast.Record] @param fastaOut: file name @type fastaOut: str """ f = open( T.absfile(fastaOut), 'w' ) for r in frecords: f.write( r.format('fasta') ) ## note better use direct SeqIO f.close()
def writeFasta(self, frecords, fastaOut): """ Create fasta file for given set of records. @param frecords: list of Bio.Blast.Records @type frecords: [Bio.Blast.Record] @param fastaOut: file name @type fastaOut: str """ f = open(T.absfile(fastaOut), 'w') for r in frecords: f.write(r.format('fasta')) ## note better use direct SeqIO f.close()
def tofile( self, fname ): if not self.resmap: self.prepare() fname = T.absfile( fname ) f = open( fname, 'w' ) f.write('! charge file generated by Biskit.delphi.DelphiCharges\n') f.write('atom__resnumbc_charge_\n') try: for resname, akeys in self.resmap.items(): f.write( self.manyres2delphi( akeys ) ) finally: f.close()
def setupJobs(self): """ Prepare the job dictionnary for 'ModelSlave' @return: input informations for modeller for each project @rtype: {{str}} """ r = {} for f in self.folders: modeller_input = {} modeller_input['outFolder'] = T.absfile(f) modeller_input['fastaTarget'] = \ self.__dir_or_none( f, self.fastaTarget ) modeller_input['f_pir'] = self.__dir_or_none(f, self.f_pir) modeller_input['template_folder'] = \ self.__dir_or_none( f, self.template_folder ) modeller_input['starting_model'] = self.starting_model modeller_input['ending_model'] = self.ending_model r[T.absfile(f)] = modeller_input return r
def tofile(self, fname): if not self.resmap: self.prepare() fname = T.absfile(fname) f = open(fname, 'w') f.write('! charge file generated by Biskit.delphi.DelphiCharges\n') f.write('atom__resnumbc_charge_\n') try: for resname, akeys in self.resmap.items(): f.write(self.manyres2delphi(akeys)) finally: f.close()
def setupJobs(self): """ Prepare the job dictionnary for 'ModelSlave' @return: input informations for modeller for each project @rtype: {{str}} """ r = {} for f in self.folders: modeller_input = {} modeller_input['outFolder'] = T.absfile(f) modeller_input['fastaTarget'] = \ self.__dir_or_none( f, self.fastaTarget ) modeller_input['f_pir'] = self.__dir_or_none( f, self.f_pir ) modeller_input['template_folder'] = \ self.__dir_or_none( f, self.template_folder ) modeller_input['starting_model'] = self.starting_model modeller_input['ending_model'] = self.ending_model r[T.absfile(f)] = modeller_input return r
def findLeaprc( self, ff ): """ Guess full path to an existing leaprc file name or fragment of it. We first take the file as is, then look in AMBERHOME, then look in AMBERHOME/dat/leap/cmd. @param ff: forcefield code (leaprc file ending, e.g. 'ff99', 'ff01') OR leaprc file name (e.g, 'leaprc.f99') OR leaprc path witin $AMBERHOME OR leaprc path @type ff: str @return: full path to an existing leaprc file @rtype: str @raise: LeapError, if no existing file can be found """ if P.exists( T.absfile(ff) ): return T.absfile(ff) amberhome = self.exe.env['AMBERHOME'] ## interpret ff as file ending ('ff99', 'ff03', etc) r = P.join( amberhome, self.LEAPRC_PATH, self.LEAPRC + ff ) if P.exists( r ): return r ## interpret ff as file name (e.g. 'leaprc.ff99') r = P.join( amberhome, self.LEAPRC_PATH, ff ) if P.exists( r ): return r ## interpret ff as path within AMBERHOME ('dat/leap/cmd/leaprc.f99') r = P.join( amberhome, ff ) if P.exists( r ): return r raise LeapError,\ 'Could not find Forcefield definition %s. ' % ff +\ 'Check exe_tleap.dat or provide an explicit leaprc parameter!'
def __init__(self, fname, outPath='', chainIdOffset=0, capBreaks=0, chainMask=0, log=None ): """ @param fname: pdb filename @type fname: str @param outPath: path for log file @type outPath: str @param chainIdOffset: start chain numbering at this offset @type chainIdOffset: int @param capBreaks: add ACE and NME to N- and C-term. of chain breaks [0] @type capBreaks: 0|1 @param chainMask: chain mask for overriding the default sequence identity [None] @type chainMask: [1|0] @param log: LogFile object @type log: object """ self.pdb = Structure(fname); self.fname = fname self.outPath = T.absfile( outPath ) self.chainIdOffset = chainIdOffset self.capBreaks = capBreaks self.log = LogFile( T.absfile(outPath)+'/' + self.pdbname()+'.log') if log: self.log = log self.chains = self.pdb.peptide_chains self.counter = -1 self.threshold = 0.9 # sequence identity between multiple copies in PDB self._expressionCheck( "[^\n].*[Hh][Oo][Mm][Oo].?[Dd][Ii][Mm][eE][Rr].*\n", 'HOMODIMER') self._expressionCheck("[^\n].*[Tt][Rr][Ii][Mm][Ee][Rr].*\n", 'TRIMER') self._hetatomCheck() self.log.add("Separate chains: \n------------------") self._removeDuplicateChains(chainMask) # keep only one copy of molecule self._separateChainBreaks() self._assign_seg_ids() # new segment id for each chain
def writeLigand(self, fname, ter=0 ): """ Write transformed ligand to pdb file. Remove TER record for xplor usage (unless removeTer!=0). @param fname: output filename @type fname: str @param ter: option for how to treat the TER statement:: - 0, don't write any TER statements (default) - 1, restore original TER statements - 2, put TER between all detected chains @type ter: 0|1|2 """ pdb = self.lig() pdb.writePdb( t.absfile( fname ), ter )
def get_local( self, existing=0 ): """ Return a valid, absolute path. Either the existing original or with all substitutions for which environment variables exist. This function is time consuming (absfile - os.realpath is the culprit). @param existing: don't return a non-existing path @type existing: 0|1 @return: valid absolute path in current environment @rtype: str @raise LocalPathError: if existing==1 and no existing path can be constructed via environment variables """ result = string.join( [ f[0] for f in self.fragments ], '' ) result = T.absfile( result ) if os.path.exists( result ): return result substitutions = self.get_substitution_dict() result = '' for abs, env in self.fragments: if env: result += substitutions.get( env, abs ) else: result += abs result = T.absfile( result ) if existing and not os.path.exists( result ): raise LocalPathError, "Can't construct existing path from %s."%\ self.formatted() return result
def saveXLog(self, fname): """ Save the content of the XPlor log file to a new file. @param fname: target file name @type fname: str @raise XPlorerError: if logLines is empty (Xplor hasn't been run) """ if not self.logLines: raise XplorerError('Xplor log file is (yet) empty.') f = open(T.absfile(fname), 'w') f.writelines(self.logLines) f.close()
def createHexPdb_single(model, fout=None): """ Write PDB of one structure for hex. @param model: model @type model: PDBModel @param fout: out file name (default: pdbCode + _hex.pdb) @type fout: str @return: file name of result PDB @rtype: str """ fout = fout or model.pdbCode + '_hex.pdb' fout = t.absfile(fout) model.writePdb(fout) return fout
def createHexPdb_single( model, fout=None ): """ Write PDB of one structure for hex. @param model: model @type model: PDBModel @param fout: out file name (default: pdbCode + _hex.pdb) @type fout: str @return: file name of result PDB @rtype: str """ fout = fout or model.pdbCode + '_hex.pdb' fout = t.absfile( fout ) model.writePdb( fout ) return fout
def __init__(self, outFolder='.', alignment=None, verbose=1): """ @param outFolder: base folder @type outFolder: str @param alignment: path to alignment file in non-default location @type alignment: str @param verbose: write intermediary files (default: 0) @type verbose: 1|0 """ self.outFolder = T.absfile( outFolder ) self.verbose = verbose self.f_aln = alignment or self.outFolder + self.F_INPUT_ALNS self.sequences_name=["target"] #: will hold result dict after go() self.result = None
def processTrajs( self ): """ Extract reference model and member trajectories from rec, lig, and com trajectories. Identify outlier member trajectories, if requested. """ ## free rec self.ref_frec = self.nameRef( self.rec ) t, self.ex_frec, self.members_frec = self.loadTraj( self.rec, self.ex_frec, self.ref_frec ) n_rec_members = t.n_members self.cr = self.cr or range( t.ref.lenChains( breaks=0 ) ) del t ## free lig self.ref_flig = self.nameRef( self.lig ) t, self.ex_flig, self.members_flig = self.loadTraj( self.lig, self.ex_flig, self.ref_flig ) n_lig_members = t.n_members del t ## complex fname = T.stripSuffix( T.absfile( self.com, resolveLinks=0 ) ) self.ref_com = fname + '_ref.complex' self.ref_blig= fname + '_blig.model' self.ref_brec= fname + '_brec.model' t, self.ex_com, self.members_com = self.loadTraj( self.com, self.ex_com ) n_com_members = t.n_members self.cl = self.cl or MU.difference( range(t.ref.lenChains()), self.cr) rec = t.ref.takeChains( self.cr, breaks=0 ) lig = t.ref.takeChains( self.cl, breaks=0 ) del t self.dumpMissing( Complex( rec, lig ), self.ref_com ) self.dumpMissing( rec, self.ref_brec ) self.dumpMissing( lig, self.ref_blig ) self.equalizeMemberCount( n_rec_members, n_lig_members, n_com_members ) if self.jack: self.prepareJackknife()
def test_TemplateCleaner(self): """Mod.TemplateCleaner test""" import glob self.c = TemplateCleaner( self.outfolder, log=self.l) inp_dic = modUtils.parse_tabbed_file( T.absfile( self.outfolder + '/templates/nr/chain_index.txt' ) ) self.c.process_all( inp_dic ) if self.local: print 'TemplateCleaner log file written to: %s'% self.f_out globals().update( locals() ) ## check that cleaned pdbs have been written f_cleaned = glob.glob(self.outfolder+ '/templates/nr_cleaned/*pdb') self.assert_( len(f_cleaned) > 0 )
def __init__( self, fdefault, fuser, createmissing=False, verbose=1 ): """ @param fdefault: default configuration file @type fdedault: str @param fuser: user configuration file @type fuser: str @param createmissing: create user config file if missing @type createmissing: bool @param verbose: verbosity level (default: 1) @type verbose: 1|0 """ self.verbose = verbose self.fdefault = fdefault self.fuser = fuser self.createmissing = createmissing self.fusermissing = not os.path.exists( T.absfile(fuser) ) self.settings = [] #: will hold extracted Setting's
def __is_path( self, o, minLen=3 ): """ Check whether an object is a path string (existing or not). @param minLen: minimal length of string o to be counted as path @type minLen: int @return: 1|0 @rtype: int """ r = ( type( o ) == str and o.find('/') != -1 and len(o) >= minLen\ and o.find(':') == -1 ) if r: try: s = T.absfile( o ) return 1 except: return 0 return 0
def __validPath( self, v ): """ @param v: potential path name @type v: str @return: validated absolute Path @rtype : str @raise InvalidPath: if path is not found """ try: v = T.absfile( v ) if not v or not os.path.exists( v ): raise InvalidPath, 'invalid path %r' % v return v except InvalidPath, e: raise
def createHexPdb( modelDic, fout=None ): """ Write pdb for hex with models separated by MODEL%i/ENDMODEL @param modelDic: dictionary mapping an integer to each model @type modelDic: dict {int:PCRModel} @param fout: output name, default is pdbCode + _hex.pdb @type fout: str @return: file name of result PDB @rtype: str """ fout = fout or modelDic[1].pdbCode + '_hex.pdb' fout = t.absfile( fout ) ## open new file out = open(fout, 'w') ## fetch name for temporary file tmpFile = tempfile.mktemp('_pcr2hex.pdb') numbers = modelDic.keys() numbers.sort() for n in numbers: ## write temporary pdb and open it modelDic[ n ].writePdb( tmpFile ) pdb_temp = open( tmpFile ) out.write("MODEL%6i\n" % n) lines = pdb_temp.readlines() # get all lines for line in lines: if line[:3] <> 'END': # filter out END out.write(line) out.write("ENDMDL\n") ## remove temporary file pdb_temp.close() os.remove( tmpFile ) out.write("END") out.close() return fout