def checkListStatus( cl, update=0, force_keys=[], version=-1 ): """ If the list is to be only updated then check that the list/sublist has entries that can be updated. If an unupdatable list is sent to the contactMaster the calculation will halt. update - 1||0, are we in update mode? if not return whole list force_keys - 1||0, calculation is restricted to certain keys """ if not update: return cl ## all complexes where force_keys are missing or where values are None if version == -1: todo = [ c for c in cl if None in c.values(keys=force_keys, default=None) ] else: todo = [ c[version] for c in cl if None in c.values(keys=force_keys, default=None) ] ## add missing force keys with value None if force_keys: for c in todo: missing = [ k for k in force_keys if k not in c ] for k in missing: c[ k ] = None if len( todo ) > 0: msg = "\n%i out of %i complexes are to be updated\n\n" t.flushPrint( msg%(len(todo), len(cl) ) ) return cl else: t.flushPrint( "\nList contains no data that can be updated\n" )
def go(self, dict): """ Run Modeller job. @param dict: dictionary with run parameters @type dict: {param:value} """ d = {} val = None try: T.flushPrint( self.params['progress_str'] ) for id, val in dict.items(): modeller_log = LogFile( '%s/Modeller.log' %val["outFolder"] ) d[id] = val m = M( outFolder= val["outFolder"], fasta_target=val["fastaTarget"], f_pir=val["f_pir"], template_folder=val["template_folder"], starting_model=val["starting_model"], ending_model=val["ending_model"], log=modeller_log ) m.run() except Exception, why: self.reportError( 'ERROR '+str(why), val )
def pairwise_cast( models ): """ atom cast all models in list with each other -> modified list """ for i in range( len( models ) ): for j in range( i+1, len( models) ): T.flushPrint('.') m1 = models[i] m2 = models[j] eq_res, eq_atm = m1.equals( m2 ) if not (eq_res and eq_atm): i1, i2 = m1.compareAtoms( m2 ) delta_1 = len( m1 ) - len( i1 ) delta_2 = len( m2 ) - len( i2 ) models[i].keep( i1 ) models[j].keep( i2 ) f1 = T.stripFilename( m1.sourceFile() ) f2 = T.stripFilename( m2.sourceFile() ) print "Removed %i atoms from %s" % (delta_1, f1) print "Removed %i atoms from %s" % (delta_2, f2) return models
def go(self, dict): """ Run Modeller job. @param dict: dictionary with run parameters @type dict: {param:value} """ d = {} val = None try: T.flushPrint(self.params['progress_str']) for id, val in dict.items(): modeller_log = LogFile('%s/Modeller.log' % val["outFolder"]) d[id] = val m = M(outFolder=val["outFolder"], fasta_target=val["fastaTarget"], f_pir=val["f_pir"], template_folder=val["template_folder"], starting_model=val["starting_model"], ending_model=val["ending_model"], log=modeller_log) m.run() except Exception, why: self.reportError('ERROR ' + str(why), val)
def pairwise_cast(models): """ atom cast all models in list with each other -> modified list """ for i in range(len(models)): for j in range(i + 1, len(models)): T.flushPrint('.') m1 = models[i] m2 = models[j] eq_res, eq_atm = m1.equals(m2) if not (eq_res and eq_atm): i1, i2 = m1.compareAtoms(m2) delta_1 = len(m1) - len(i1) delta_2 = len(m2) - len(i2) models[i].keep(i1) models[j].keep(i2) f1 = T.stripFilename(m1.sourceFile()) f2 = T.stripFilename(m2.sourceFile()) print "Removed %i atoms from %s" % (delta_1, f1) print "Removed %i atoms from %s" % (delta_2, f2) return models
def go(self, cmplxDic): """ Obtain contact matrix for all complexes. @param cmplxDic: dictionary of complexes:: {soln:Complex, soln:Complex, ...} @type cmplxDic: {int:Complex} @return: similar dictionary with Complex.info['soln'] as keys and file names of matrices as value:: { soln : fname, soln : fname ....} @rtype: {int:str} """ result = {} startTime = time.time() for soln, c in cmplxDic.items(): T.flushPrint("%i," % soln) ## if not os.path.exists( T.absfile('~/debug.dic') ): ## T.dump( cmplxDic, T.absfile('~/debug.dic') ) self.calcContacts(soln, c) self.calcInterfaceRms(soln, c) self.calcReducedContacts(soln, c) ## TODO: Prosa will not run when called via conatacSlave, runs as it should when ## called as c.prosa2003Energy() in the interpreter. What's wroong here? ## For mow the Prosa calculation is skipped. ## self.calcProsa(soln, c) self.calcPairScore(soln, c) ## uses res-contacts self.calcFoldX(soln, c) ##uses rec/lig.info['foldX'] if available for method in ['cons_ent', 'cons_max', 'cons_abs']: self.calcConservation(soln, c, method) c['__version_contacter'] = self.version() result[soln] = c c.slim() ## if not os.path.exists(T.absfile('~/debug_afterslave.dic') ): ## T.dump( cmplxDic, T.absfile('~/debug_afterslave.dic') ) print "\navg time for last %i complexes: %f s" %\ ( len(cmplxDic), (time.time()-startTime)/len(cmplxDic)) return result
def go(self, cmplxDic): """ Obtain contact matrix for all complexes. @param cmplxDic: dictionary of complexes:: {soln:Complex, soln:Complex, ...} @type cmplxDic: {int:Complex} @return: similar dictionary with Complex.info['soln'] as keys and file names of matrices as value:: { soln : fname, soln : fname ....} @rtype: {int:str} """ result = {} startTime = time.time() for soln, c in cmplxDic.items(): T.flushPrint( "%i," % soln ) ## if not os.path.exists( T.absfile('~/debug.dic') ): ## T.dump( cmplxDic, T.absfile('~/debug.dic') ) self.calcContacts( soln, c ) self.calcInterfaceRms( soln, c ) self.calcReducedContacts( soln, c ) ## TODO: Prosa will not run when called via conatacSlave, runs as it should when ## called as c.prosa2003Energy() in the interpreter. What's wroong here? ## For mow the Prosa calculation is skipped. ## self.calcProsa( soln, c ) self.calcPairScore( soln, c ) ## uses res-contacts self.calcFoldX( soln, c ) ##uses rec/lig.info['foldX'] if available for method in ['cons_ent', 'cons_max', 'cons_abs']: self.calcConservation( soln, c, method ) c['__version_contacter'] = self.version() result[ soln ] = c c.slim() ## if not os.path.exists(T.absfile('~/debug_afterslave.dic') ): ## T.dump( cmplxDic, T.absfile('~/debug_afterslave.dic') ) print "\navg time for last %i complexes: %f s" %\ ( len(cmplxDic), (time.time()-startTime)/len(cmplxDic)) return result
def sloppyload( f ): """ f - str, file name -> any, unpickled object """ try: T.flushPrint( "Loading " + str(f) + '\n' ) return T.load( T.absfile( f ) ) except cPickle.UnpicklingError: print "Trying to load %s in sloppy mode..." % f return PickleUpgrader(open(T.absfile(f))).load()
def go(self, dict): """ Run alignment job. @param dict: dictionary with run parameters @type dict: {param:value} """ d = {} val = None try: T.flushPrint(self.progress_str) for id, val in dict.items(): aligner_log = LogFile('%s/Aligner.log' % val["outFolder"]) d[id] = val aligner_log.add('Slave aligns %s on %s' % (id, os.uname()[1])) a = Aligner(outFolder=val["outFolder"], log=aligner_log) ## For the cross validation if not os.path.exists(val["outFolder"] + TC.F_COFFEE): input_file = val["outFolder"] + VS.F_TCOFFEE alpha_path = self.prepareT_coffee(input_file) a.align_for_modeller_inp( pdbFiles=alpha_path, fasta_templates=val["fastaTemplates"], fasta_sequences=val["fastaSequences"], fasta_target=val["fastaTarget"]) ## For a classic project folder else: a.align_for_modeller_inp( pdbFiles=val["pdbFiles"], fasta_templates=val["fastaTemplates"], fasta_sequences=val["fastaSequences"], fasta_target=val["fastaTarget"]) a.go() except Exception, why: self.reportError('ERROR ' + str(why), val)
def go(self, dict): """ Run alignment job. @param dict: dictionary with run parameters @type dict: {param:value} """ d = {} val = None try: T.flushPrint( self.progress_str ) for id, val in dict.items(): aligner_log = LogFile( '%s/Aligner.log' %val["outFolder"] ) d[id] = val aligner_log.add('Slave aligns %s on %s' % (id,os.uname()[1]) ) a = Aligner( outFolder= val["outFolder"], log=aligner_log) ## For the cross validation if not os.path.exists(val["outFolder"] + TC.F_COFFEE): input_file = val["outFolder"] + VS.F_TCOFFEE alpha_path = self.prepareT_coffee(input_file) a.align_for_modeller_inp( pdbFiles=alpha_path, fasta_templates=val["fastaTemplates"], fasta_sequences=val["fastaSequences"], fasta_target=val["fastaTarget"]) ## For a classic project folder else: a.align_for_modeller_inp(pdbFiles=val["pdbFiles"], fasta_templates=val["fastaTemplates"], fasta_sequences=val["fastaSequences"], fasta_target=val["fastaTarget"]) a.go() except Exception, why: self.reportError( 'ERROR '+str(why), val )
def go(self, dict): """ Perform slave task. """ d = {} T.flushPrint(self.params['progress_str']) for id, val in dict.items(): d[id] = val + 1 for i in range(1, 1000): f = 1.0 * i / 1200232112312.11 T.flushPrint(str(id) + ' ') time.sleep(0.5) print "Done." return d
def go(self, dict): """ Perform slave task. """ d = {} T.flushPrint( self.params['progress_str'] ) for id, val in dict.items(): d[id] = val+1 for i in range(1, 1000): f = 1.0 * i / 1200232112312.11 T.flushPrint( str(id) + ' ' ) time.sleep(0.5) print "Done." return d
def setHexComplexes(self, com_lst, n=20 ): """ add contact matrices of hex-generated (wrong) complexes for comparison @param com_lst: ComplexList with contacts calculated @type com_lst: ComplexList """ t.flushPrint('adding hex-generated complexes') self.hexContacts = [] # f = lambda a: molUtils.elementType(a['element']) == 'p' i = 0 while i < n: com = com_lst[i] t.flushPrint('#') try: if com['fractNatCont'] == 0.0:#com['fnac_10'] == 0.0: com.rec_model.remove( com.rec().maskH() ) com.lig_model.remove( com.lig_model.maskH() ) com.rec_model = com.rec_model.sort() com.lig_model = com.lig_model.sort() com.rec_model.keep( self.i_free_rec ) com.lig_model.keep( self.i_free_lig ) com.lig_transformed = None self.hexContacts += [ com.atomContacts() ] else: n += 1 i+= 1 except: print t.lastError() self.hexSurfaces = self.__categorizeHexSurf( 0.2 )
def go(self, jobs): """ The calculation. @param jobs: dictionary with { int_id : str_protocol } @type jobs: dict @return: result from AmberEntropist.run() @rtype: dict """ result = {} startTime = time.time() for id, protocol in jobs.items(): try: T.flushPrint("%s " % str(id)) protocol.update({'nice': self.nice}) x = None ## free memory from previous run x = AmberEntropist(**protocol) x.run() r = x.result if r: r['__version_AmberEntropist'] = x.version() result[id] = r else: result[id] = None except EntropistError, why: self.reportError(str(type(why)), id) except IOError, why: self.reportError(str(why), id)
def go(self, jobs): """ The calculation. @param jobs: dictionary with { int_id : str_protocol } @type jobs: dict @return: result from AmberEntropist.run() @rtype: dict """ result = {} startTime = time.time() for id, protocol in jobs.items(): try: T.flushPrint( "%s " % str(id) ) protocol.update( {'nice':self.nice} ) x = None ## free memory from previous run x = AmberEntropist( **protocol ) x.run() r = x.result if r: r['__version_AmberEntropist'] = x.version() result[ id ] = r else: result[ id ] = None except EntropistError, why: self.reportError( str(type(why)), id ) except IOError, why: self.reportError( str(why), id )
def go(self, jobs): """ Run job. @param jobs: { ((int,int),(int,int)) : (str, str) }, maps start and end position of two chunks of coordinate frames to the files where the two chunks are pickled. @type jobs: {((int,int),(int,int)) : (str, str)} @return: the rms between the frames @rtype: [float] """ result = {} frames = None startTime = time.time() try: for i, frames in jobs.items(): T.flushPrint( str(i) ) f1 = self.__getFrames( frames[0]) f2 = self.__getFrames( frames[1]) result[ i ] = self.calcRmsd(i, f1, f2 ) print "\navg time for last %i complexes: %f s" %\ ( len(jobs), (time.time()-startTime)/len(jobs)) except IOError, why: self.reportError("Cannot open temporary frame file "+\ "(can happen if slave catches exit signal)", frames )
def reduceComplexList( cl ): """ If cl comes from a multiple model docking, and HEX macrodocking was switched on, the list contains more than 512 solutions per model combination. Keep only the first 512 solutions. -> shortened or unchanged ComplexList """ if len( cl ) != len(cl.models.recModels()) * len(cl.models.ligModels()) * 512: T.flushPrint('\nRemoving HEX solutions greater than 512.\n') len_old = len( cl ) cl = cl.filter('soln', (0,512) ) T.flushPrint('%i solutions removed.\n' % (len_old - len(cl) ) ) T.flushPrint('%i solutions remain.\n' % len( cl ) ) return cl
def reduceComplexList(cl): """ If cl comes from a multiple model docking, and HEX macrodocking was switched on, the list contains more than 512 solutions per model combination. Keep only the first 512 solutions. -> shortened or unchanged ComplexList """ if len(cl) != len(cl.models.recModels()) * len( cl.models.ligModels()) * 512: T.flushPrint('\nRemoving HEX solutions greater than 512.\n') len_old = len(cl) cl = cl.filter('soln', (0, 512)) T.flushPrint('%i solutions removed.\n' % (len_old - len(cl))) T.flushPrint('%i solutions remain.\n' % len(cl)) return cl
def reduceComplexList(cl): """ If cl comes from a multiple model docking, and HEX macrodocking was switched on, the list contains more than 512 solutions per model combination. Keep only the first 512 solutions. -> shortened or unchanged ComplexList """ if len(cl) == len(cl.recModels()) * len(cl.ligModels()) * 512: return cl if len(cl) < 512: print '\nNOTE: THE COMPLEX LIST IS SHORTER THAN 512 COMPLEXES' print ' COMPLEX LIST CONTAINS %i ONLY COMPLEXES\n' % len(cl) return cl t.flushPrint('\nRemoving HEX solutions greater than 512.\n') r = cl.filter('soln', (0, 512)) t.flushPrint('%i solutions removed.\n' % (len(cl) - len(r))) t.flushPrint('%i solutions remain.\n' % len(r)) return r
def reduceComplexList( cl ): """ If cl comes from a multiple model docking, and HEX macrodocking was switched on, the list contains more than 512 solutions per model combination. Keep only the first 512 solutions. -> shortened or unchanged ComplexList """ if len( cl ) == len(cl.recModels()) * len(cl.ligModels()) * 512: return cl if len( cl ) < 512: print '\nNOTE: THE COMPLEX LIST IS SHORTER THAN 512 COMPLEXES' print ' COMPLEX LIST CONTAINS %i ONLY COMPLEXES\n'%len(cl) return cl t.flushPrint('\nRemoving HEX solutions greater than 512.\n') r = cl.filter('soln', (0,512) ) t.flushPrint('%i solutions removed.\n' % (len( cl ) - len( r ) ) ) t.flushPrint('%i solutions remain.\n' % len( r ) ) return r
########################### # MAIN ########################### if len(sys.argv) < 3: _use() options = t.cmdDict(defaultOptions()) ## ## current keys used for scoring ## scoreKeys = ['eProsa', 'ePairScore', 'foldX', 'cons_ent', 'cons_max'] ## load docking solutions t.flushPrint("\nLoading complex list %s ... " % t.absfile(options['i'])) complex_lst = t.load(options['i']) t.flushPrint("done\n") ## validate and expand list of keys to be calculated force = [] if options.has_key('f'): raw_force = t.toList(options['f']) ## check that the key is valid validKeys = [ 'fnac_4.5', 'fnac_10', 'fnrc_4.5', 'fnarc_9', 'fnarc_10', 'c_ratom_9', 'c_ratom_10', 'eProsa', 'ePairScore', 'foldX', 'cons_ent', 'cons_max', 'cons_abs', 'rms_if', 'rms_if_bb' ]
## Find homologues to the target sequence using blast against "seq_db" ## Cluster the seuences and write the result to nr.fasta ## input: target.fasta ## ## output: sequences/all.fasta ## /blast.out ## /cluster_result.out ## /nr.fasta (input for Aligner) searcher = SequenceSearcher( outFolder=outFolder, clusterLimit=clustLim, verbose=1, log=log ) if 'psi' in options: ## local PSIBlast - not fully implemented!! tools.flushPrint('Performing local PSI blast search\n') rounds = int( options['psi'] ) searcher.localPSIBlast( f_target, seq_db, e=e, rounds=rounds, alignments=aln, **ext_options ) else: ## if it looks like local Blast is installed tools.flushPrint('Performing local blast search\n') if os.environ.has_key('BLASTDB') and settings.blast_bin: searcher.localBlast( f_target, seq_db, 'blastp', alignments=aln, e=e, **ext_options ) ## try remote Blast else: tools.flushPrint('Performing remote blast search\n') searcher.remoteBlast( f_target, seq_db, 'blastp', alignments=aln, e=e, **ext_options )
pdb - PDB code to be stored in trajectory """ sys.exit(0) ########## ## MAIN ## use() o = T.cmdDict( {'n':10} ) f_in = T.absfile( o['i'] ) f_out = T.absfile( o.get('o', f_in) ) n = int( o['n'] ) T.flushPrint("Loading...") t = T.load( f_in ) T.flushPrint("Converting %i frames..." % len(t) ) if isinstance(t, EnsembleTraj ): T.flushPrint( "Nothing to be done!\n") sys.exit(0) t = traj2ensemble( t, n ) if 'pdb' in o: t.ref.pdbCode = o['pdb'] if f_in == f_out: os.rename( f_in, f_in + '_backup')
print 'Current directory is not a valid modeling project folder.' _use( options ) ## Try to add project folders ## look for default cross-validation projects d = [] if os.path.exists( f + VS.F_RESULT_FOLDER ): d = [ f ] if options.has_key('d'): folders = T.toList(options['d']) else: folders = d T.flushPrint("Starting job...\n") for f in folders: a = A(outFolder=f) a.go() T.flushPrint("Done.\n") ## show result in PyMol if options.has_key('s'): p=Pymoler() p.addPdb( folders[0] + a.F_FINAL_PDB ) p.add('color_b') p.add('select na, b<0') p.add('color grey, na') p.add('hide all')
'rdic': 'pcr_rec/models.dic' }) if len(sys.argv) < 2: syntax() fs = [T.absfile(f) for f in T.toList(options['i'])] result = ComplexList() rec_dic = T.load(T.absfile(options['rdic'])) lig_dic = T.load(T.absfile(options['ldic'])) for f in fs: T.flushPrint('Loading %s ...' % f) cl = T.load(f) cl = reduceComplexList(cl) result += cl T.flushPrint('done\n') T.flushPrint('correct model numbers...') correct_model_numbers(result, rec_dic, lig_dic) T.flushPrint('\ncasting all rec models...') pairwise_cast(result.models.recModels())
Default options: """ for key, value in o.items(): print "\t-",key, "\t",value sys.exit(0) if __name__ == '__main__': options = T.cmdDict({'o':[ os.getcwd() ]}) if '?' in options or 'help' in options: _use( options ) folders = T.toList( options['o'] ) if not os.path.exists( folders[0] +'/templates'): print 'Current directory is not a valid modeling folder.' _use( options ) T.flushPrint( "Creating folders and links...\n" ) for f in folders: sv = VS(outFolder=f) sv.go(f) T.flushPrint( "done\n" )
############### ## SequenceSearcher ## ## Find homologues to the target sequence using blast against "seq_db" ## Cluster the seuences and write the result to nr.fasta ## input: target.fasta ## ## output: sequences/all.fasta ## /blast.out ## /cluster_result.out ## /nr.fasta (input for Aligner) tools.flushPrint('Searching for homologues ...') try: # initiate searcher = SequenceSearcher( outFolder=outFolder, verbose=1, log=log ) # ## local PSIBlast - not fully implemented!! # searcher.localPSIBlast( target, seq_db, e=0.1, alignments=1000) ## local Blast searcher.localBlast( f_target, seq_db, 'blastp', alignments=500, e=0.0001 ) ## cluster blast results. Defaults: simCut=1.75, lenCut=0.9, ncpu=1 ## expects all.fasta # searcher.clusterFastaIterative( ) searcher.clusterFasta()
if not os.path.exists(f + VS.F_RESULT_FOLDER) and not options.has_key('d'): print 'Current directory is not a valid modeling project folder.' _use(options) ## Try to add project folders ## look for default cross-validation projects d = [] if os.path.exists(f + VS.F_RESULT_FOLDER): d = [f] if options.has_key('d'): folders = T.toList(options['d']) else: folders = d T.flushPrint("Starting job...\n") for f in folders: a = A(outFolder=f) a.go() T.flushPrint("Done.\n") ## show result in PyMol if options.has_key('s'): p = Pymoler() p.addPdb(folders[0] + a.F_FINAL_PDB) p.add('color_b') p.add('select na, b<0') p.add('color grey, na') p.add('hide all')
_use(options) if not os.path.exists(f + B.F_INPUT_FOLDER) and not options.has_key('d'): print 'Current directory is not a valid modeling project folder.' _use(options) ## Try to add project folders ## look for default cross-validation projects d = [] if os.path.exists(f + VS.F_RESULT_FOLDER): d = glob.glob(f + VS.F_RESULT_FOLDER + '/*') if options.has_key('d'): folders = T.toList(options['d']) else: folders = d reference = options.get('ref', None) model_list = options.get('modlist', None) T.flushPrint("Starting job...\n") for f in folders: T.flushPrint("\tWorking on %s\n" % os.path.split(f)[1]) b = B(outFolder=f) b.go(model_list=model_list, reference=reference) T.flushPrint("Done.\n")
if end: end = int( end ) step = int( o.get('step',1) ) ref = o.get('ref',None) if ref: ref = PDBModel( T.absfile( ref ) ) if 'prot' in o: ref = ref.compress( ref.maskProtein() ) result_xyz = [] result_frameNames = [] result_ref = None T.flushPrint("Loading and appending trajectories...") for i in range( len( inLst ) ): t = loadTraj( inLst[i], i, start, end, step ) if 'prot' in o: t.keepAtoms( N0.nonzero(t.ref.maskProtein()) ) result_ref = result_ref or ref or t.ref if t.ref.equals( result_ref ) != [1,1]: raise Exception( 'Incompatible reference structure.' ) for xyz in t.frames: result_xyz.append( xyz.astype('f') )
except cPickle.UnpicklingError: print "Trying to load %s in sloppy mode..." % f return PickleUpgrader(open(T.absfile(f))).load() ######### ## Main ######### __use() fs = sys.argv[1:] for f in fs: try: o = sloppyload( f ) ## don't slim PDBModels that are their own source if isinstance( o, PDBModel ) and str( o.source ) == T.absfile( f ): o.forcePickle = 1 T.flushPrint('Dumping %s\n' % f ) T.dump( o, T.absfile( f ) ) except: print "Error with ", f print T.lastError()
## input: target.fasta ## ## output: sequences/all.fasta ## /blast.out ## /cluster_result.out ## /nr.fasta (input for Aligner) searcher = SequenceSearcher(outFolder=outFolder, clusterLimit=clustLim, verbose=1, log=log) if 'psi' in options: ## local PSIBlast - not fully implemented!! tools.flushPrint('Performing local PSI blast search\n') rounds = int(options['psi']) searcher.localPSIBlast(f_target, seq_db, e=e, rounds=rounds, alignments=aln, **ext_options) else: ## if it looks like local Blast is installed tools.flushPrint('Performing local blast search\n') if os.environ.has_key('BLASTDB') and settings.blast_bin: searcher.localBlast(f_target, seq_db, 'blastp', alignments=aln,
def retrievePDBs(self, outFolder=None, pdbCodes=None): """ Get PDB from local database if it exists, if not try to download the coordinartes drom the RSCB. Write PDBs for given fasta records. Add PDB infos to internal dictionary of fasta records. NMR structures get resolution 3.5. @param outFolder: folder to put PDB files into (default: L{F_ALL}) @type outFolder: str OR None @param pdbCodes: list of PDB codes [all previously found templates] @type pdbCodes: [str] @return: list of PDB file names @rtype: [str] @raise BlastError: if can't write file """ outFolder = outFolder or self.outFolder + self.F_ALL pdbCodes = pdbCodes or self.record_dic.keys() result = [] i = 0 if not self.silent: T.flushPrint("fetching %i PDBs (l=local, r=remotely)..." % \ len( pdbCodes ) ) for c in pdbCodes: i += 1 fname = '%s/%s.pdb' % (outFolder, c) try: if os.path.exists(fname): h = open(fname, 'r') else: h = self.getLocalPDBHandle(c) if not self.silent: T.flushPrint('l') except: h = self.getRemotePDBHandle(c) if not self.silent: T.flushPrint('r') try: lines, infos = self.parsePdbFromHandle(h, first_model_only=1) infos['file'] = fname if c in self.record_dic: self.record_dic[c].__dict__.update(infos) ## close if it is a handle try: h.close() except: pass if not os.path.exists(fname): f = open(fname, 'w', 1) f.writelines(lines) f.close() result += [fname] except IOError, why: raise BlastError("Can't write file " + fname)
""" sys.exit(0) ########## ## MAIN ## use() o = T.cmdDict({'n': 10}) f_in = T.absfile(o['i']) f_out = T.absfile(o.get('o', f_in)) n = int(o['n']) T.flushPrint("Loading...") t = T.load(f_in) T.flushPrint("Converting %i frames..." % len(t)) if isinstance(t, EnsembleTraj): T.flushPrint("Nothing to be done!\n") sys.exit(0) t = traj2ensemble(t, n) if 'pdb' in o: t.ref.pdbCode = o['pdb'] if f_in == f_out: os.rename(f_in, f_in + '_backup')
Default options: """ for key, value in o.items(): print "\t-", key, "\t", value sys.exit(0) if __name__ == '__main__': options = T.cmdDict({'o': [os.getcwd()]}) if '?' in options or 'help' in options: _use(options) folders = T.toList(options['o']) if not os.path.exists(folders[0] + '/templates'): print 'Current directory is not a valid modeling folder.' _use(options) T.flushPrint("Creating folders and links...\n") for f in folders: sv = VS(outFolder=f) sv.go(f) T.flushPrint("done\n")
""" Restart a distributed calculation. Syntax: restartPVM.py -i |rst_file| [-a] Options: i .. restart file containing result of TrackingJobMaster.getRst() a .. add hosts to PVM """ sys.exit(0) ## MAIN ## use() cmd = T.cmdDict() T.flushPrint('Loading restart data...') rst = T.load( cmd['i'] ) hosts = [] if 'a' in cmd: hosts = [ h['host'] for h in rst['hosts'] ] master = restart( rst, hosts=hosts ) T.flushPrint('Master initialized for restart.') master.start()
if not os.path.exists( f + B.F_INPUT_FOLDER ) and not options.has_key('d'): print 'Current directory is not a valid modeling project folder.' _use( options ) ## Try to add project folders ## look for default cross-validation projects d = [] if os.path.exists( f + VS.F_RESULT_FOLDER ): d = glob.glob( f + VS.F_RESULT_FOLDER + '/*' ) if options.has_key('d'): folders = T.toList(options['d']) else: folders = d reference = options.get('ref', None) model_list = options.get('modlist', None) T.flushPrint("Starting job...\n") for f in folders: T.flushPrint("\tWorking on %s\n"%os.path.split(f)[1]) b = B( outFolder=f ) b.go(model_list = model_list, reference = reference) T.flushPrint( "Done.\n")
end = o.get('e', None) if end: end = int(end) step = int(o.get('step', 1)) ref = o.get('ref', None) if ref: ref = PDBModel(T.absfile(ref)) if 'prot' in o: ref = ref.compress(ref.maskProtein()) result_xyz = [] result_frameNames = [] result_ref = None T.flushPrint("Loading and appending trajectories...") for i in range(len(inLst)): t = loadTraj(inLst[i], i, start, end, step) if 'prot' in o: t.keepAtoms(N.nonzero(t.ref.maskProtein())) result_ref = result_ref or ref or t.ref if t.ref.equals(result_ref) != [1, 1]: raise Exception('Incompatible reference structure.') for xyz in t.frames: result_xyz.append(xyz.astype('f'))
if len(sys.argv) < 2: print \ """ Restart a distributed calculation. Syntax: restartPVM.py -i |rst_file| [-a] Options: i .. restart file containing result of TrackingJobMaster.getRst() a .. add hosts to PVM """ sys.exit(0) ## MAIN ## use() cmd = T.cmdDict() T.flushPrint('Loading restart data...') rst = T.load(cmd['i']) hosts = [] if 'a' in cmd: hosts = [h['host'] for h in rst['hosts']] master = restart(rst, hosts=hosts) T.flushPrint('Master initialized for restart.') master.start()
def randomSurfaces( base_folder, label, mask ): """ calculate surfaces for all peptides and return the average and SD """ ## container for results and standard deviations MS, AS = {}, {} MS_sd, AS_sd = {}, {} ## loop over peptide directories for k in MOU.aaAtoms.keys(): dir = base_folder + 'GLY-%s-GLY_pcr/pcr_00'%(k) fLst = glob.glob( dir + '/*.pdb') msLst = [] asLst = [] ## loop over pdb files for each peptide T.flushPrint( '\nNow collecting data in %s'%dir ) for f in fLst: ## load peptide and remove waters and hydrogens m = PDBModel( f ) m = m.compress( m.maskProtein() * m.maskHeavy() ) T.flushPrint( '.') ## add surface data try: d = PDBDope( m ) d.addSurfaceRacer( probe=1.4 ) ## remove tailing GLY m = m.compress( m.res2atomMask(mask) ) ## collect surface data for each peptide msLst += [ m.profile('MS') ] asLst += [ m.profile('AS') ] except: print 'Failed calculating exposure for GLY-%s-GLY'%(k) print '\t and file %s'%f ## get result dictionary for peptide T.flushPrint('\nCollecting data ...\n') msDic = {} asDic = {} msDic_sd = {} asDic_sd = {} j = 0 #atoms = [ a['name'] for a in m.atoms ] for n in m['name']: msDic[n] = N0.average(msLst)[j] asDic[n] = N0.average(asLst)[j] msDic_sd[n] = MAU.SD( msLst )[j] asDic_sd[n] = MAU.SD( asLst )[j] j += 1 MS[ k ] = msDic AS[ k ] = asDic MS_sd[ k ] = msDic_sd AS_sd[ k ] = asDic_sd return MS, AS, MS_sd, AS_sd
tmp_db = 'pdbaa' ############### ## SequenceSearcher ## ## Find homologues to the target sequence using blast against "seq_db" ## Cluster the seuences and write the result to nr.fasta ## input: target.fasta ## ## output: sequences/all.fasta ## /blast.out ## /cluster_result.out ## /nr.fasta (input for Aligner) tools.flushPrint('Searching for homologues ...') try: # initiate searcher = SequenceSearcher(outFolder=outFolder, verbose=1, log=log) # ## local PSIBlast - not fully implemented!! # searcher.localPSIBlast( target, seq_db, e=0.1, alignments=1000) ## local Blast searcher.localBlast(f_target, seq_db, 'blastp', alignments=500, e=0.0001) ## cluster blast results. Defaults: simCut=1.75, lenCut=0.9, ncpu=1 ## expects all.fasta # searcher.clusterFastaIterative( ) searcher.clusterFasta()
def __init__( self, verbose=1, **options ): """ @param verbose: verbosity level (default: 1) @type verbose: 1|0 @param options: needs:: rec,lig - file name, receptor, ligand trajectories ref - file name, pickled reference complex @type options: any @raise AnalyzeError: if atoms are not aligned """ self.options = options if verbose: t.flushPrint("\nLoading...") self.t_lig = t.load( options['lig'] ) self.t_rec = t.load( options['rec'] ) self.com= t.load( options['ref'] ) ## delete H from all players self.t_lig.removeAtoms( self.t_lig.getRef().maskH() ) self.t_rec.removeAtoms( self.t_rec.getRef().maskH() ) self.com.rec_model.remove( self.com.rec_model.maskH() ) self.com.lig_model.remove( self.com.lig_model.maskH() ) self.com.lig_transformed = None ## equalize atom content of free (trajectory) and bound models if verbose: t.flushPrint('\nCasting...') bnd_rec = self.com.rec() bnd_lig = self.com.lig_model self.t_rec.sortAtoms() self.t_lig.sortAtoms() bnd_rec = bnd_rec.sort() bnd_lig = bnd_lig.sort() #m_bnd_rec, m_t_rec = bnd_rec.equalAtoms( self.t_rec.getRef() ) #m_bnd_lig, m_t_lig = bnd_lig.equalAtoms( self.t_lig.getRef() ) i_bnd_rec, i_t_rec = bnd_rec.compareAtoms( self.t_rec.getRef() ) i_bnd_lig, i_t_lig = bnd_lig.compareAtoms( self.t_lig.getRef() ) #self.t_rec.removeAtoms( N0.logical_not( m_t_rec ) ) #self.t_lig.removeAtoms( N0.logical_not( m_t_lig ) ) self.t_rec = self.t_rec.takeAtoms( i_t_rec ) self.t_lig = self.t_lig.takeAtoms( i_t_lig ) #self.mask_free_lig = m_t_lig #self.mask_free_rec = m_t_rec self.i_free_lig = i_t_lig self.i_free_rec = i_t_rec #bnd_rec.remove( N0.logical_not( m_bnd_rec ) ) #bnd_lig.remove( N0.logical_not( m_bnd_lig ) ) bnd_rec = bnd_rec.take( i_bnd_rec ) bnd_lig = bnd_lig.take( i_bnd_lig ) #self.mask_bnd_rec = m_bnd_rec #self.mask_bnd_lig = m_bnd_lig self.i_bnd_rec = i_bnd_rec self.i_bnd_lig = i_bnd_lig ## put 'equalized' models back into ref complex self.com.rec_model = bnd_rec self.com.lig_model = bnd_lig self.com.lig_transformed = None ## check if not self.t_rec.getRef().equals( self.com.rec() )[1] or \ not self.t_lig.getRef().equals( self.com.lig() )[1]: raise AnalyzeError('Atoms are not aligned.') ## native contact matrix self.contacts = self.com.atomContacts() self.hexContacts = None
'ldic':'pcr_lig/models.dic', 'rdic':'pcr_rec/models.dic'} ) if len( sys.argv ) < 2: syntax() fs = [ T.absfile( f ) for f in T.toList( options['i'] ) ] result = ComplexList() rec_dic = T.load( T.absfile( options['rdic'] ) ) lig_dic = T.load( T.absfile( options['ldic'] ) ) for f in fs: T.flushPrint('Loading %s ...' % f ) cl = T.load( f ) cl = reduceComplexList( cl ) result += cl T.flushPrint('done\n') T.flushPrint('correct model numbers...') correct_model_numbers( result, rec_dic, lig_dic ) T.flushPrint( '\ncasting all rec models...' ) pairwise_cast( result.models.recModels() )
########################### # MAIN ########################### if len(sys.argv) < 3: _use() options = t.cmdDict( defaultOptions() ) ## ## current keys used for scoring ## scoreKeys = ['eProsa', 'ePairScore', 'foldX', 'cons_ent', 'cons_max'] ## load docking solutions t.flushPrint( "\nLoading complex list %s ... " % t.absfile( options['i'] ) ) complex_lst = t.load( options['i'] ) t.flushPrint( "done\n" ) ## validate and expand list of keys to be calculated force = [] if options.has_key('f'): raw_force = t.toList( options['f'] ) ## check that the key is valid validKeys = ['fnac_4.5', 'fnac_10', 'fnrc_4.5', 'fnarc_9', 'fnarc_10', 'c_ratom_9', 'c_ratom_10', 'eProsa', 'ePairScore', 'foldX', 'cons_ent', 'cons_max', 'cons_abs', 'rms_if', 'rms_if_bb']
def randomSurfaces( base_folder, label, mask ): """ calculate surfaces for all peptides and return the average and SD """ ## container for results and standard deviations MS, AS = {}, {} MS_sd, AS_sd = {}, {} ## loop over peptide directories for k in MOU.aaAtoms.keys(): dir = base_folder + 'GLY-%s-GLY_pcr/pcr_00'%(k) fLst = glob.glob( dir + '/*.pdb') msLst = [] asLst = [] ## loop over pdb files for each peptide T.flushPrint( '\nNow collecting data in %s'%dir ) for f in fLst: ## load peptide and remove waters and hydrogens m = PDBModel( f ) m = m.compress( m.maskProtein() * m.maskHeavy() ) T.flushPrint( '.') ## add surface data try: d = PDBDope( m ) d.addSurfaceRacer( probe=1.4 ) ## remove tailing GLY m = m.compress( m.res2atomMask(mask) ) ## collect surface data for each peptide msLst += [ m.profile('MS') ] asLst += [ m.profile('AS') ] except: print 'Failed calculating exposure for GLY-%s-GLY'%(k) print '\t and file %s'%f ## get result dictionary for peptide T.flushPrint('\nCollecting data ...\n') msDic = {} asDic = {} msDic_sd = {} asDic_sd = {} j = 0 #atoms = [ a['name'] for a in m.atoms ] for n in m['name']: msDic[n] = N.average(msLst)[j] asDic[n] = N.average(asLst)[j] msDic_sd[n] = MAU.SD( msLst )[j] asDic_sd[n] = MAU.SD( asLst )[j] j += 1 MS[ k ] = msDic AS[ k ] = asDic MS_sd[ k ] = msDic_sd AS_sd[ k ] = asDic_sd return MS, AS, MS_sd, AS_sd
def retrievePDBs( self, outFolder=None, pdbCodes=None ): """ Get PDB from local database if it exists, if not try to download the coordinartes drom the RSCB. Write PDBs for given fasta records. Add PDB infos to internal dictionary of fasta records. NMR structures get resolution 3.5. @param outFolder: folder to put PDB files into (default: L{F_ALL}) @type outFolder: str OR None @param pdbCodes: list of PDB codes [all previously found templates] @type pdbCodes: [str] @return: list of PDB file names @rtype: [str] @raise BlastError: if can't write file """ outFolder = outFolder or self.outFolder + self.F_ALL pdbCodes = pdbCodes or self.record_dic.keys() result = [] i = 0 if not self.silent: T.flushPrint("fetching %i PDBs (l=local, r=remotely)..." % \ len( pdbCodes ) ) for c in pdbCodes: i += 1 fname = '%s/%s.pdb' % (outFolder, c) try: if os.path.exists( fname ): h = open( fname, 'r' ) else: h = self.getLocalPDBHandle( c ) if not self.silent: T.flushPrint('l') except: h = self.getRemotePDBHandle( c ) if not self.silent: T.flushPrint('r') try: lines, infos = self.parsePdbFromHandle( h, first_model_only=1 ) infos['file'] = fname if c in self.record_dic: self.record_dic[ c ].__dict__.update( infos ) ## close if it is a handle try: h.close() except: pass if not os.path.exists( fname ): f = open( fname, 'w', 1 ) f.writelines( lines ) f.close() result += [ fname ] except IOError, why: raise BlastError( "Can't write file "+fname )