def finish( self ): """ Overrides Executor method """ Executor.finish( self ) self.result = self.parse_result() ## if probe radius other than 1.4 A the relative surface exposure ## cannot be calculated, but allow this check to be a little flexible ## if we ate forced to slightly increase the radii to excape round off ## SurfaceRacer errors try: if round(self.probe, 1) == 1.4 and self.vdw_set == 1: self.__relExposure('MS') self.__relExposure('AS') else: EHandler.warning("No relative accessabilities calculated "+\ "when using a prob radius other than 1.4 A"+\ " or not using the Richards vdw radii set.") except KeyError, what: EHandler.warning("Missing standard accessibilities for some "+\ "atoms. No relative accesibilities calculated.") if 'relMS' in self.result: del self.result['relMS'] if 'relAS' in self.result: del self.result['relAS']
def fastaFromIds( self, db, id_lst, remote=False ): """ Use:: fastaFromIds( id_lst, fastaOut ) -> { str: Bio.Fasta.Record } @param db: database name @type db: str @param id_lst: list of dictionaries with pdb codes and chain IDs @type id_lst: [{'pdb':str, 'chain':str}] @return: Dictionary mapping pdb codes to Bio.Fasta.Records. The returned records have an additional field: chain. @rtype: { str: Bio.Fasta.Record } """ result = {} if self.verbose: s = 'from local %s using fastacmd' % db if remote: s = 'remotely from Entrez' self.log.add('Fetching %i fasta records %s...\n'% (len(id_lst), s)) for i in id_lst: try: if remote: r = self.fastaRecordFromId_remote( i['gb'] ) r.id = i['pdb'] ## clustering expects PDB, not gb ID else: r = self.fastaRecordFromId( db, i['pdb'], i['chain'] ) r.chain = i['chain'] result[ i['pdb'] ] = r except BlastError, why: EHandler.warning("ERROR (ignored): couldn't fetch "+ str(i) )
def fastaRecordFromId( self, db, id ): """ Use:: fastaRecordFromId( db, id ) -> Bio.Fasta.Record @param db: database @type db: str @param id: sequence database ID @type id: str @return: fasta record @rtype: Bio.SeqRecord.SeqRecord @raise BlastError: if can't fetch fasta record from database """ cmd = settings.fastacmd_bin + ' -d %s -s %s' % (db, id) err, o = commands.getstatusoutput( cmd ) if err: EHandler.warning('%s returned error: %r' % (cmd, err) ) raise BlastError( 'fastacmd failed. Error code: ' + str(err) ) try: frecord = SeqIO.parse( cStringIO.StringIO(o), 'fasta').next() frecord.id = str(id) except StopIteration: raise InternalError, \ "Couldn't fetch fasta record %s from database %s" % (id,db) return frecord
def fastaFromIds(self, db, id_lst, remote=False): """ Use:: fastaFromIds( id_lst, fastaOut ) -> { str: Bio.Fasta.Record } @param db: database @type db: str @param id_lst: sequence database IDs @type id_lst: [str] @return: dictionary mapping IDs to Bio.Fasta.Records @rtype: {str: Bio.Fasta.Record} @raise BlastError: if couldn't fetch record """ result = {} if self.verbose: s = 'from local %s using fastacmd' if remote: s = 'remotely from Entrez' self.log.add('Fetching %i fasta records %s...\n' % (len(id_lst), s)) for i in id_lst: try: if remote: r = self.fastaRecordFromId_remote(i) else: r = self.fastaRecordFromId(db, i) result[i] = r except BlastError, why: EHandler.warning("couldn't fetch %s" % str(i), trace=0)
def single2longAA( seq ): """ Convert string of 1-letter AA code into list of 3-letter AA codes. @param seq: amino acid sequence in 1-letter code @type seq: str @return: list with the amino acids in 3-letter code @rtype: [str] """ ## invert AA dict invTab = {} for key in aaDicStandard: invTab[ aaDicStandard[key] ] = key result = [] for aa in seq: try: aa = aa.upper() result += [ invTab[aa].upper() ] except: EHandler.warning("unknown residue: " + str(aa)) result += ['Xaa'] return result
def concat( self, *traj ): """ Concatenate this with other trajectories. The ref model of the new Trajectory is a 'semi-deep' copy of this trajectorie's model. (see L{PDBModel.take()} ):: concat( traj [, traj2, traj3, ..] ) -> Trajectory @param traj: one or more Trajectory with identical atoms as this one @type traj: Trajectories @return: concatenated trajecties @rtype: Trajectory """ if len( traj ) == 0: return self r = self.__class__() r.frames = N.concatenate( (self.frames, traj[0].frames), 0 ) r.setRef( self.ref.clone()) if self.frameNames and traj[0].frameNames: r.frameNames = self.frameNames + traj[0].frameNames try: if self.pc is not None and traj[0].pc is not None: r.pc['p'] = N.concatenate( (self.pc['p'], traj[0].pc['p']),0) r.pc['u'] = N.concatenate( (self.pc['u'], traj[0].pc['u']),0) except TypeError, why: EHandler.error('cannot concat PC '+str(why) )
def reportClustering(self, raw=None): """ Report the clustering result. Writes: - clustering results to L{F_CLUSTER_LOG} - blast records to L{F_BLAST_OUT} - blast records of centers to L{F_CLUSTER_BLAST_OUT} - raw clustering results to L{F_CLUSTER_RAW} if raw not None @param raw: write raw clustering result to disk (default: None) @type raw: 1|0 """ try: if self.verbose: f = open(self.outFolder + self.F_CLUSTER_LOG, 'w', 1) for cluster in self.clusters: f.write("%i\t%s\n" % (len(cluster), str(cluster))) f.close() ## write blast records of centers to disc centers = [c[0] for c in self.clusters] self.writeClusteredBlastResult( \ self.outFolder + self.F_BLAST_OUT, self.outFolder + self.F_CLUSTER_BLAST_OUT, centers ) self.copyClusterOut(raw=raw) except IOError, why: EHandler.warning("Can't write cluster report." + str(why))
def fastaFromIds( self, db, id_lst, remote=False ): """ Use:: fastaFromIds( id_lst, fastaOut ) -> { str: Bio.Fasta.Record } @param db: database @type db: str @param id_lst: sequence database IDs @type id_lst: [str] @return: dictionary mapping IDs to Bio.Fasta.Records @rtype: {str: Bio.Fasta.Record} @raise BlastError: if couldn't fetch record """ result = {} if self.verbose: s = 'from local %s using fastacmd' if remote: s = 'remotely from Entrez' self.log.add('Fetching %i fasta records %s...\n'% (len(id_lst), s)) for i in id_lst: try: if remote: r = self.fastaRecordFromId_remote( i ) else: r = self.fastaRecordFromId( db, i ) result[i] = r except BlastError, why: EHandler.warning("couldn't fetch %s"%str(i),trace=0 )
def concat(self, *traj): """ Concatenate this with other trajectories. The ref model of the new Trajectory is a 'semi-deep' copy of this trajectorie's model. (see L{PDBModel.take()} ):: concat( traj [, traj2, traj3, ..] ) -> Trajectory @param traj: one or more Trajectory with identical atoms as this one @type traj: Trajectories @return: concatenated trajecties @rtype: Trajectory """ if len(traj) == 0: return self r = self.__class__() r.frames = N.concatenate((self.frames, traj[0].frames), 0) r.setRef(self.ref.clone()) if self.frameNames and traj[0].frameNames: r.frameNames = self.frameNames + traj[0].frameNames try: if self.pc is not None and traj[0].pc is not None: r.pc['p'] = N.concatenate((self.pc['p'], traj[0].pc['p']), 0) r.pc['u'] = N.concatenate((self.pc['u'], traj[0].pc['u']), 0) except TypeError, why: EHandler.error('cannot concat PC ' + str(why))
def mergeHmmSeq( self, seq1, seq2 ): """ Merges two sequence files into one. Multilple hits with one profile cannot overlap!! Overlap == ERROR @param seq1: sequence @type seq1: str @param seq2: sequence @type seq2: str @return: merged sequence or None @rtype: str OR None """ if len(seq1) != len(seq2): EHandler.warning( 'ERR in mergeHmmSeq:\n' +\ '\tSequences of different lengths cannot be merged') return None else: result = '' for i in range( len(seq1) ): ## no match in either if seq1[i] == seq2[i] == '.': result += '.' ## match in seq1 if seq1[i] > seq2[i]: result += seq1[i] ## match in seq2 if seq1[i] < seq2[i]: result += seq2[i] return result
def data3DList(c, key='fnac_10', inverse=0, rm=range(1, 12), soln=512): """ Create an matrix: len(rec_model) * len((lig_model) * solutions containing the values of the info dic with given key. c - ComplexList """ rm = range(1, max(c.valuesOf('model1')) + 1) lm = range(1, max(c.valuesOf('model2')) + 1) matrix = zeros((len(rm), len(lm), soln), 'f') try: for r in rm: rl = c.filter('model1', r) for l in lm: cl = rl.filter('model2', l) if inverse: matrix[r - 1][l - 1] = ( 1. / array(cl.valuesOf(key, default=0))).tolist() else: matrix[r - 1][l - 1] = cl.valuesOf(key, default=0) except ValueError, why: try: lenM = len(matrix[r - 1][l - 1]) lenV = len(cl.valuesOf(key, default=0)) except: lenM = lenV = 0 s = '%i : %i len(matrix)=%i <> len(values)=%i' % (r, l, lenM, lenV) EHandler.error('Cannot extract fnac data. ' + s)
def single2longAA(seq): """ Convert string of 1-letter AA code into list of 3-letter AA codes. @param seq: amino acid sequence in 1-letter code @type seq: str @return: list with the amino acids in 3-letter code @rtype: [str] """ ## invert AA dict invTab = {} for key in aaDicStandard: invTab[aaDicStandard[key]] = key result = [] for aa in seq: try: aa = aa.upper() result += [invTab[aa].upper()] except: EHandler.warning("unknown residue: " + str(aa)) result += ['Xaa'] return result
def fastaRecordFromId(self, db, id): """ Use:: fastaRecordFromId( db, id ) -> Bio.Fasta.Record @param db: database @type db: str @param id: sequence database ID @type id: str @return: fasta record @rtype: Bio.SeqRecord.SeqRecord @raise BlastError: if can't fetch fasta record from database """ cmd = settings.fastacmd_bin + ' -d %s -s %s' % (db, id) err, o = commands.getstatusoutput(cmd) if err: EHandler.warning('%s returned error: %r' % (cmd, err)) raise BlastError('fastacmd failed. Error code: ' + str(err)) try: frecord = SeqIO.parse(cStringIO.StringIO(o), 'fasta').next() frecord.id = str(id) except StopIteration: raise InternalError, \ "Couldn't fetch fasta record %s from database %s" % (id,db) return frecord
def isnoise( self, score, n_samples=1000 ): """ Test sample how a given score performs at predicting items in the positive list compared to its 'performance' at predicting random elements. The result corresponds to a two-tailed P value. See L{utest} for the analytical solution. @param score: the score predicted for each item @type score: [ float ] @param n_samples: number of random samples @type n_samples: int @return: probability P that the prediction success of score is just a random effect (1.0 means it's just perfectly random). """ from Biskit import EHandler ## list of random deviations from diagonal area 0.5 a_rand = [ self.area(c)-0.5 for c in self.random_roccurves(score,n_samples) ] sd_rand = N.std( a_rand ) av_rand = N.mean(a_rand ) if round(av_rand,2) != 0.0: EHandler.warning( 'random sampling is skewed by %f'% (av_rand-0.0)) a = self.rocarea( score ) z = a / sd_rand ## probability that a sample falls *within* z stdevs from the mean p = L.erf( z / N.sqrt(2) ) ## probability that the score hits just at random return 1.0 - p
def data3DList( c, key='fnac_10', inverse=0, rm=range(1,12), soln=512 ): """ Create an matrix: len(rec_model) * len((lig_model) * solutions containing the values of the info dic with given key. c - ComplexList """ rm = range( 1, max(c.valuesOf( 'model1'))+1 ) lm = range( 1, max(c.valuesOf( 'model2'))+1 ) matrix = zeros( ( len( rm ), len( lm ), soln ), 'f' ) try: for r in rm: rl = c.filter( 'model1', r ) for l in lm: cl = rl.filter( 'model2', l ) if inverse: matrix[r-1][l-1] = (1./array( cl.valuesOf( key, default=0 ))).tolist() else: matrix[r-1][l-1] = cl.valuesOf( key, default=0 ) except ValueError, why: try: lenM = len( matrix[r-1][l-1] ) lenV = len( cl.valuesOf( key, default=0 ) ) except: lenM = lenV = 0 s = '%i : %i len(matrix)=%i <> len(values)=%i' % (r, l, lenM, lenV) EHandler.error('Cannot extract fnac data. '+ s )
def __writeBlastResult( self, parsed_blast, outFile): """ Write the result from the blast search to file (similar to the output produced by a regular blast run). writeBlastResult( parsed_blast, outFile ) @param parsed_blast: Bio.Blast.Record.Blast @type parsed_blast: Bio.Blast.Record.Blast @param outFile: file to write the blast result to @type outFile: str """ try: f = open( T.absfile( outFile ), 'w' ) i=1 for alignment in parsed_blast.alignments: for hsp in alignment.hsps: s = string.replace(alignment.title,'\n',' ') s = string.replace(s, 'pdb|', '\npdb|') f.write('Sequence %i: %s\n'%(i,s)) f.write('Score: %3.1f \tE-value: %2.1e\n'\ %(hsp.score, hsp.expect)) f.write('Lenght/Identities: %r\tPositives: %r\tGaps: %r\n'\ %(hsp.identities, hsp.positives, hsp.gaps)) f.write( '%s\n'%hsp.query ) f.write( '%s\n'%hsp.match ) f.write( '%s\n\n'%hsp.sbjct ) i += 1 f.close() except Exception, why: EHandler.warning("Error while writing blast result to %s" %outFile) globals().update(locals()) EHandler.warning("function namespace published to globals")
def fastaFromIds(self, db, id_lst, remote=False): """ Use:: fastaFromIds( id_lst, fastaOut ) -> { str: Bio.Fasta.Record } @param db: database name @type db: str @param id_lst: list of dictionaries with pdb codes and chain IDs @type id_lst: [{'pdb':str, 'chain':str}] @return: Dictionary mapping pdb codes to Bio.Fasta.Records. The returned records have an additional field: chain. @rtype: { str: Bio.Fasta.Record } """ result = {} if self.verbose: s = 'from local %s using fastacmd' % db if remote: s = 'remotely from Entrez' self.log.add('Fetching %i fasta records %s...\n' % (len(id_lst), s)) for i in id_lst: try: if remote: r = self.fastaRecordFromId_remote(i['gb']) r.id = i['pdb'] ## clustering expects PDB, not gb ID else: r = self.fastaRecordFromId(db, i['pdb'], i['chain']) r.chain = i['chain'] result[i['pdb']] = r except BlastError, why: EHandler.warning("ERROR (ignored): couldn't fetch " + str(i))
def reportClustering( self, raw=None ): """ Report the clustering result. Writes: - clustering results to L{F_CLUSTER_LOG} - blast records to L{F_BLAST_OUT} - blast records of centers to L{F_CLUSTER_BLAST_OUT} - raw clustering results to L{F_CLUSTER_RAW} if raw not None @param raw: write raw clustering result to disk (default: None) @type raw: 1|0 """ try: if self.verbose: f = open( self.outFolder + self.F_CLUSTER_LOG, 'w', 1) for cluster in self.clusters: f.write( "%i\t%s\n" % ( len( cluster ), str( cluster ))) f.close() ## write blast records of centers to disc centers = [ c[0] for c in self.clusters ] self.writeClusteredBlastResult( \ self.outFolder + self.F_BLAST_OUT, self.outFolder + self.F_CLUSTER_BLAST_OUT, centers ) self.copyClusterOut( raw=raw ) except IOError, why: EHandler.warning( "Can't write cluster report." + str(why) )
def loadResContacts( self ): """ Uncompress residue contact matrix if necessary. @return: dict with contact matrix and parameters OR None @rtype: dict OR None """ ## Backwards compatibility if self.contacts != None and type( self.contacts ) == str: self.contacts = t.load( self.contacts ) EHandler.warning("loading old-style pickled contacts.") return self.contacts ## New, uncompression from list of indices into raveled array if self.contacts != None and \ len( N.shape( self.contacts['result'])) == 1: try: lenRec, lenLig = self.contacts['shape'] except: EHandler.warning("uncompressing contacts without shape") lenRec = self.rec().lenResidues() lenLig = self.lig().lenResidues() m = N.zeros( lenRec * lenLig ) N.put( m, self.contacts['result'], 1 ) self.contacts['result'] = N.reshape( m, (lenRec, lenLig) ) return self.contacts
def mergeHmmSeq(self, seq1, seq2): """ Merges two sequence files into one. Multilple hits with one profile cannot overlap!! Overlap == ERROR @param seq1: sequence @type seq1: str @param seq2: sequence @type seq2: str @return: merged sequence or None @rtype: str OR None """ if len(seq1) != len(seq2): EHandler.warning( 'ERR in mergeHmmSeq:\n' +\ '\tSequences of different lengths cannot be merged') return None else: result = '' for i in range(len(seq1)): ## no match in either if seq1[i] == seq2[i] == '.': result += '.' ## match in seq1 if seq1[i] > seq2[i]: result += seq1[i] ## match in seq2 if seq1[i] < seq2[i]: result += seq2[i] return result
def __defaults( self ): self.models = getattr( self, 'models', ComplexModelRegistry() ) if getattr( self, 'rec_models', 0) != 0: EHandler.warning( 're-creating model registry..re-pickle this list!') for c in self.toList(): self.models.addComplex( c ) del self.rec_models del self.lig_models
def __getstate__(self): """ Called before pickling the object. """ try: if type(self.frames) == list or self.frames.dtype.char == 'd': EHandler.warning("Converting coordinates to float array.") self.frames = N.array(self.frames).astype(N.Float32) except: EHandler.warning('Could not convert frames to float array.', 1) return self.__dict__
def add(self, str): """ Add String str and line break to xplor input file. @param str: string to add to file @type str: str """ try: self.fgenerate.write(str + '\n') except (IOError): EHandler.error( "XPlorInput.append(): Error adding str to xplor input file.")
def __getstate__(self): """ Called before pickling the object. """ try: if type( self.frames ) == list or self.frames.dtype.char == 'd': EHandler.warning("Converting coordinates to float array.") self.frames = N.array( self.frames ).astype(N.Float32) except: EHandler.warning('Could not convert frames to float array.', 1) return self.__dict__
def writePdb(self, index, fname): """ Write (possibly transformed) coordinates back to pdb. @param index: frame index in trajectory @type index: int @param fname: name of new file @type fname: str """ try: self.getPDBModel(index).writePdb(fname) except: EHandler.error('Error writing %s.' % fname)
def writePdb( self, index, fname): """ Write (possibly transformed) coordinates back to pdb. @param index: frame index in trajectory @type index: int @param fname: name of new file @type fname: str """ try: self.getPDBModel( index ).writePdb( fname ) except: EHandler.error('Error writing %s.' % fname)
def concat( self, *profiles ): """ Concatenate all profiles in this with corresponding profiles in the given ProfileCollection(s). Profiles that are not found in all ProfileCollections are skipped:: p0.concat( p1 [, p2, ..]) -> single ProfileCollection with the same number of profiles as p0 but with the length of p0+p1+p2.. @param profiles: profile(s) to concatenate @type profiles: ProfileCollection(s) @return: concatenated profile(s) @rtype: ProfileCollection / subclass """ ## end recursion (no more arguments) if len( profiles ) == 0: return self next = profiles[0] r = self.__class__() ## special case: concat something to empty profile collection ## if self.profLength() == 0: ## return next.clone().concat( *profiles[1:] ) ## ## if next.profLength() == 0: ## return self.clone().concat( *profiles[1:] ) for k, p in self.profiles.items(): try: if isinstance( p, N.ndarray ): r.set( k, N.concatenate( (p, next.get(k)) ), **self.infos[k] ) else: r.set( k, p + next.get(k), **self.infos[k] ) except: EHandler.warning("Profile %s skipped during concat." % k, error=0) r.remove( k ) return r.concat( *profiles[1:] )
def __substitute( self, fragments, name, value ): """ Look in all not yet substituted fragments for parts that can be substituted by value and, if successful, create a new fragment @param fragments: fragment tuples @type fragments: [ (str, str) ] @param name: substitution variable name @type name: str @param value: susbtitution value in current environment @type value: str @return: fragment tuples @rtype: [ (str, str) ] """ result = [] try: for abs, subst in fragments: if not subst: ## unsubstituted fragment ## pos = abs.find( value ) pos = self.__find_subpath( abs, value ) if pos != -1: end = pos + len( value ) f1, f2, f3 = abs[0:pos], abs[pos:end], abs[end:] if f1: result += [ (f1, None) ] ## unsubstituted head result += [ (f2, name) ] ## new substitution if f3: result += [ (f3, None) ] ## unsubstituted tail else: result += [ (abs, subst) ] else: result += [ (abs, subst ) ] except OSError, why: EHandler.fatal("Substituting path fragments: \n" + str( fragments ) + '\nname: ' + str( name ) + '\nvalue:' + str( value ) )
def __substitute(self, fragments, name, value): """ Look in all not yet substituted fragments for parts that can be substituted by value and, if successful, create a new fragment @param fragments: fragment tuples @type fragments: [ (str, str) ] @param name: substitution variable name @type name: str @param value: susbtitution value in current environment @type value: str @return: fragment tuples @rtype: [ (str, str) ] """ result = [] try: for abs, subst in fragments: if not subst: ## unsubstituted fragment ## pos = abs.find( value ) pos = self.__find_subpath(abs, value) if pos != -1: end = pos + len(value) f1, f2, f3 = abs[0:pos], abs[pos:end], abs[end:] if f1: result += [(f1, None)] ## unsubstituted head result += [(f2, name)] ## new substitution if f3: result += [(f3, None)] ## unsubstituted tail else: result += [(abs, subst)] else: result += [(abs, subst)] except OSError, why: EHandler.fatal("Substituting path fragments: \n" + str(fragments) + '\nname: ' + str(name) + '\nvalue:' + str(value))
def __syncModel(self, new_model, old_model): """ Connect new rec or lig model to old one, to minimize storage. @param new_model: PDBModel / PCRModel @type new_model: PDBModel @param old_model: PDBModel / PCRModel @type old_model: PDBModel @return: PDBModel / PCRModel, new model that only keeps changes relative to old, the old model becomes the source of the new, if possible @rtype: PDBModel """ ## try to fix atom order of new_model so that it is identical to old if old_model.equals(new_model) != [1, 1]: i_new, i_old = new_model.compareAtoms(old_model) if len(i_new) == len(new_model): new_model.keep(i_new) ## create result model that only keeps difference of new and old if old_model.equals(new_model) == [1, 1]: ## stays compatible with PCRModel.__init__ and PDBModel.__init r = old_model.__class__(source=old_model) r.setXyz(new_model.getXyz()) ## check for profiles identical to source and adapt 'changed' r.update() if not MU.arrayEqual(r.xyz, old_model.xyz): r.removeProfile('relASA', 'ASA_sc', 'ASA_total', 'ASA_bb') return r EHandler.warning( 'ComplexEvolving: Cannot connect new to old PDBModel.') new_model.disconnect() return new_model
def __syncModel( self, new_model, old_model ): """ Connect new rec or lig model to old one, to minimize storage. @param new_model: PDBModel / PCRModel @type new_model: PDBModel @param old_model: PDBModel / PCRModel @type old_model: PDBModel @return: PDBModel / PCRModel, new model that only keeps changes relative to old, the old model becomes the source of the new, if possible @rtype: PDBModel """ ## try to fix atom order of new_model so that it is identical to old if old_model.equals( new_model ) != [1,1]: i_new, i_old = new_model.compareAtoms( old_model ) if len( i_new ) == len( new_model ): new_model.keep( i_new ) ## create result model that only keeps difference of new and old if old_model.equals( new_model ) == [1,1]: ## stays compatible with PCRModel.__init__ and PDBModel.__init r = old_model.__class__( source=old_model ) r.setXyz( new_model.getXyz() ) ## check for profiles identical to source and adapt 'changed' r.update() if not MU.arrayEqual( r.xyz, old_model.xyz ): r.removeProfile( 'relASA', 'ASA_sc', 'ASA_total', 'ASA_bb' ) return r EHandler.warning( 'ComplexEvolving: Cannot connect new to old PDBModel.') new_model.disconnect() return new_model
def getResult(self, **arg): """ Collapse the results for different values of the variable parameter into lists and put the results into a tree ala:: r[ member_index ][ protocol_name ][ result_field ] -> [ values ] @return: tree-like dict ordered by variable value, member, protocol @rtype: dict of dict of dict of lists """ tree = self.dictionate(self.result) vvalues = tree.keys() vvalues.sort() keys = self.result.keys() sub_keys = [k for k in keys if k[0] == vvalues[0]] r = {} for v, member, protcl in sub_keys: try: if not member in r: r[member] = {} r[member][protcl] = {} run_dic = tree[v][member][protcl] for k in run_dic.keys(): r[member][protcl][k] = [ tree[v][member][protcl][k] \ for v in vvalues ] except: EHandler.warning('missing result: ' + str(T.lastError())) r['var'] = self.var r['vrange'] = self.vrange r['protocols'] = self.protocols self.result_tree = r return r
def getResult( self, **arg ): """ Collapse the results for different values of the variable parameter into lists and put the results into a tree ala:: r[ member_index ][ protocol_name ][ result_field ] -> [ values ] @return: tree-like dict ordered by variable value, member, protocol @rtype: dict of dict of dict of lists """ tree = self.dictionate( self.result ) vvalues = tree.keys() vvalues.sort() keys = self.result.keys() sub_keys = [ k for k in keys if k[0] == vvalues[0] ] r = {} for v, member, protcl in sub_keys: try: if not member in r: r[member] = {} r[member][protcl] = {} run_dic = tree[v][member][protcl] for k in run_dic.keys(): r[member][protcl][k] = [ tree[v][member][protcl][k] \ for v in vvalues ] except: EHandler.warning('missing result: ' + str(T.lastError())) r['var'] = self.var r['vrange']= self.vrange r['protocols'] = self.protocols self.result_tree = r return r
def validate(self): """ Validate the path to the binary. @raise ExeConfigError: if environment is not fit for running the program """ try: self.bin = T.absbinary(self.bin) ## raises IOError if not found missing = self.update_environment() report = '%s is missing environment variables: %r'\ % (self.name, missing ) if missing and self.strict: raise ExeConfigError, report if missing: EHandler.warning(report) except IOError, why: raise ExeConfigError, str(why) + ' Check %s!' % self.dat
def isnoise(self, score, n_samples=1000): """ Test sample how a given score performs at predicting items in the positive list compared to its 'performance' at predicting random elements. The result corresponds to a two-tailed P value. See L{utest} for the analytical solution. @param score: the score predicted for each item @type score: [ float ] @param n_samples: number of random samples @type n_samples: int @return: probability P that the prediction success of score is just a random effect (1.0 means it's just perfectly random). """ from Biskit import EHandler ## list of random deviations from diagonal area 0.5 a_rand = [ self.area(c) - 0.5 for c in self.random_roccurves(score, n_samples) ] sd_rand = N.std(a_rand) av_rand = N.mean(a_rand) if round(av_rand, 2) != 0.0: EHandler.warning('random sampling is skewed by %f' % (av_rand - 0.0)) a = self.rocarea(score) z = a / sd_rand ## probability that a sample falls *within* z stdevs from the mean p = L.erf(z / N.sqrt(2)) ## probability that the score hits just at random return 1.0 - p
def __writeBlastResult(self, parsed_blast, outFile): """ Write the result from the blast search to file (similar to the output produced by a regular blast run). writeBlastResult( parsed_blast, outFile ) @param parsed_blast: Bio.Blast.Record.Blast @type parsed_blast: Bio.Blast.Record.Blast @param outFile: file to write the blast result to @type outFile: str """ try: f = open(T.absfile(outFile), 'w') i = 1 for alignment in parsed_blast.alignments: for hsp in alignment.hsps: s = string.replace(alignment.title, '\n', ' ') s = string.replace(s, 'pdb|', '\npdb|') f.write('Sequence %i: %s\n' % (i, s)) f.write('Score: %3.1f \tE-value: %2.1e\n'\ %(hsp.score, hsp.expect)) f.write('Lenght/Identities: %r\tPositives: %r\tGaps: %r\n'\ %(hsp.identities, hsp.positives, hsp.gaps)) f.write('%s\n' % hsp.query) f.write('%s\n' % hsp.match) f.write('%s\n\n' % hsp.sbjct) i += 1 f.close() except Exception, why: EHandler.warning("Error while writing blast result to %s" % outFile) globals().update(locals()) EHandler.warning("function namespace published to globals")
issued if pypvm is missing and the public classes are exported as Pseudo-classes. Pseudo classes are empty and raise an ImportError when you try to initialize them. See also L{Biskit.tools.tryImport}. """ ## import user ## ensure that ~/.pythonrc.py is executed ## ## error-tolerant export of public classes ## from Biskit import EHandler import Biskit.tools as T pvm_installed = True pvm_installed = T.tryImport('TrackingJobMaster', 'TrackingJobMaster', namespace=globals()) pvm_installed = T.tryImport('dispatcher', 'JobSlave', namespace=globals()) and pvm_installed if not pvm_installed: EHandler.warning( 'Could not import PVM (Parallel Virtual Machine) modules.' + ' Please check that PVM and pypvm are installed!\n' + '\tParallelisation is not available.') ## ## clean up ## del EHandler, T
# searcher.localPSIBlast( target, seq_db, e=0.1, alignments=1000) ## local Blast searcher.localBlast(f_target, seq_db, 'blastp', alignments=500, e=0.0001) ## cluster blast results. Defaults: simCut=1.75, lenCut=0.9, ncpu=1 ## expects all.fasta # searcher.clusterFastaIterative( ) searcher.clusterFasta() searcher.writeFastaClustered() tools.flushPrint('Done.\n') except: EHandler.error('Error while searching for homologues.') ############### ## TemplateSearcher ## ## Find modelling templates, blasting the target sequence against "tmp_db" ## Cluster the sequences and download the pdbs to templates/all ## input: target.fasta ## ## output: templates/blast.out ## templates/all.fasta ## templates/cluster_result.out ## templates/nr.fasta (input for Aligner) ## templates/all/*.pdb ## templates/nr/chain_index.txt (input for TemplateCleaner)
## note 1: If there are more than approximately 50 sequences overall ## t_coffe will eat all the memory and the job will not finish ## This should be fixed in more recent versions of T-Coffee ## (v > 3.2) where T-Coffee, according to the manual "switches ## to a heuristic mode, named DPA, where DPA stands for Double ## Progressive Alignment." ## note 2: If there is only one template structure step 2 of T-coffee ## will not work. Solution, skip the structural alignment if ## only one template structure is provided. ## note 3: In quite som cases the sequence retrieved from the nrpdb ## sequence database is different from the sequence extracted ## from the coordinates in the pdb-file. This will sometimes ## cause t-coffee to terminate with an error (two sequences ## with the same name but with different sequences). Temporary ## solution: Choose another structure from the same cluster ## as the troublemaker. try: a = Aligner(outFolder, log, verbose=1, sap=sap) a.align_for_modeller_inp() a.go(host) except: EHandler.error('Error while building alingnments.') print "\nalign.py -? or align.py -help for help screen"
if '?' in options or 'help' in options: _use( defaultOptions() ) log = None if options['log']: log = LogFile( outFolder + '/' + options['log'], 'a' ) ################### ## TemplateCleaner ## ## Prepare pdb files in templates/nr for T-coffee and modeller ## (replace nonstandard residues, remove hydrogens, # remove atoms with nultiple configurations, etc.) ## input: templates/nr/*.pdb ## templates/nr/chain_index.txt ## ## output: templates/t_coffee/*.alpha (input for Alignar) ## templates/modeller/*.pdb (input for Modeller) try: cleaner = TemplateCleaner( outFolder, log ) inp_dic = modUtils.parse_tabbed_file( chIndex ) cleaner.process_all( inp_dic ) except: EHandler.error( 'Error while cleaning templates')
Protein-protein docking related modules """ from Biskit import EHandler try: from Complex import Complex from ComplexVC import ComplexVC from ComplexVCList import ComplexVCList from ComplexList import ComplexList from ComplexTraj import ComplexTraj from ComplexModelRegistry import ComplexModelRegistry from ComplexRandomizer import ComplexRandomizer from Docker import Docker from FixedList import FixedList from HexParser import HexParser from delphiBindingEnergy import DelphiBindingEnergy ## from Intervor import Intervor ## from PatchGenerator import PatchGenerator ## from PatchGeneratorFromOrbit import PatchGeneratorFromOrbit except IOError, why: EHandler.warning("Couldn't import all Biskit.Dock modules.\n" + str(why)) ## PVM-dependent modules try: from ContactMaster import ContactMaster from ContactSlave import ContactSlave except Exception, why: EHandler.warning("Couldn't import PVM-dependent modules of Biskit.Dock.\n"+\ str( why ) )
def concatEnsembles( self, *traj ): """ Concatenate this with other trajectories in a zig zac manner, resulting in an ensembleTraj with additional members. The ref model of the new Trajectory is a 'semi-deep' copy of this trajectorie's model.(see L{PDBModel.take()} ):: concat( traj [, traj2, traj3, ..] ) -> Trajectory @param traj: with identical atoms as this one @type traj: one or more EnsembleTrajectory @todo: fix so that pc, and profiles are not lost """ if len( traj ) == 0: return self r = self.__class__( n_members = self.n_members + traj[0].n_members ) min_members = min( self.n_members, traj[0].n_members ) min_frames = min( self.lenFrames(), traj[0].lenFrames() ) steps = self.lenFrames()/self.n_members + \ traj[0].lenFrames()/traj[0].n_members def __everyOther( traj_0, traj_1, list_0, list_1, minMembers, minFrames, loops ): result = [] for j in range( 0, minMembers/2 ): for i in range( j*loops , j*loops + minFrames*2/minMembers ): result += [ list_0[i] ] result += [ list_1[i] ] while i < j*traj_0.n_members: result += [ list_0[i] ] while i < j*traj_1.n_members: result += [ list_1[i] ] return result frames = __everyOther( self, traj[0], self.frames, traj[0].frames, min_members, min_frames, steps ) r.frames = N.array(frames) r.setRef( self.ref.clone()) if self.frameNames and traj[0].frameNames: r.frameNames = __everyOther( self, traj[0], self.frameNames, traj[0].frameNames, min_members, min_frames, steps ) try: # NOT TESTED!! if self.pc and traj[0].pc: r.pc['p'] = __everyOther( self, traj[0], self.pc['p'], traj[0].pc['p'], min_members, steps ) r.pc['u'] = __everyOther( self, traj[0], self.pc['u'], traj[0].pc['u'], min_members, steps ) # r.pc['p'] = N.concatenate( (self.pc['p'], traj[0].pc['p']),0) # r.pc['u'] = N.concatenate( (self.pc['u'], traj[0].pc['u']),0) except TypeError, why: EHandler.error('cannot concat PC '+str(why) )
Protein-protein docking related modules """ from Biskit import EHandler try: from Complex import Complex from ComplexEvolving import ComplexEvolving from ComplexEvolvingList import ComplexEvolvingList from ComplexList import ComplexList from ComplexTraj import ComplexTraj from ComplexModelRegistry import ComplexModelRegistry from ComplexRandomizer import ComplexRandomizer from Docker import Docker from FixedList import FixedList from HexParser import HexParser from delphiBindingEnergy import DelphiBindingEnergy ## from Intervor import Intervor ## from PatchGenerator import PatchGenerator ## from PatchGeneratorFromOrbit import PatchGeneratorFromOrbit except IOError, why: EHandler.warning("Couldn't import all Biskit.Dock modules.\n" + str(why)) ## PVM-dependent modules try: from ContactMaster import ContactMaster from ContactSlave import ContactSlave except Exception, why: EHandler.warning("Couldn't import PVM-dependent modules of Biskit.Dock.\n"+\ str( why ) )
# searcher.localPSIBlast( target, seq_db, e=0.1, alignments=1000) ## local Blast searcher.localBlast( f_target, seq_db, 'blastp', alignments=500, e=0.0001 ) ## cluster blast results. Defaults: simCut=1.75, lenCut=0.9, ncpu=1 ## expects all.fasta # searcher.clusterFastaIterative( ) searcher.clusterFasta() searcher.writeFastaClustered() tools.flushPrint('Done.\n') except: EHandler.error( 'Error while searching for homologues.') ############### ## TemplateSearcher ## ## Find modelling templates, blasting the target sequence against "tmp_db" ## Cluster the sequences and download the pdbs to templates/all ## input: target.fasta ## ## output: templates/blast.out ## templates/all.fasta ## templates/cluster_result.out ## templates/nr.fasta (input for Aligner) ## templates/all/*.pdb
if '?' in options or 'help' in options: _use(defaultOptions()) log = None if options['log']: log = LogFile(outFolder + '/' + options['log'], 'a') ################### ## TemplateCleaner ## ## Prepare pdb files in templates/nr for T-coffee and modeller ## (replace nonstandard residues, remove hydrogens, # remove atoms with nultiple configurations, etc.) ## input: templates/nr/*.pdb ## templates/nr/chain_index.txt ## ## output: templates/t_coffee/*.alpha (input for Alignar) ## templates/modeller/*.pdb (input for Modeller) try: cleaner = TemplateCleaner(outFolder, log) inp_dic = modUtils.parse_tabbed_file(chIndex) cleaner.process_all(inp_dic) except: EHandler.error('Error while cleaning templates')
## ## output: modeller/modeller.log ## /*.B9999000?? <- models try: if options['verbose'] > 0: print "\n"+\ "Type model.py -? or model.py -help for a full list of options!" m8 = M(**options) if not 'dry' in options: r = m8.run() ## comment out for testing except: EHandler.error('Error while modelling.') ##################### ## Show output ## show result in PyMol if options.has_key('s'): names = [] ## fit backbone of all models to average until convergence models = glob.glob('%s/target.B*' % (m8.outFolder + m8.F_RESULT_FOLDER)) traj = Trajectory(models) traj.blockFit2ref(mask=traj[0].maskBB()) ## calculate and print rmsd matrix rmsHeavy = traj.pairwiseRmsd()
def concatEnsembles(self, *traj): """ Concatenate this with other trajectories in a zig zac manner, resulting in an ensembleTraj with additional members. The ref model of the new Trajectory is a 'semi-deep' copy of this trajectorie's model.(see L{PDBModel.take()} ):: concat( traj [, traj2, traj3, ..] ) -> Trajectory @param traj: with identical atoms as this one @type traj: one or more EnsembleTrajectory @todo: fix so that pc, and profiles are not lost """ if len(traj) == 0: return self r = self.__class__(n_members=self.n_members + traj[0].n_members) min_members = min(self.n_members, traj[0].n_members) min_frames = min(self.lenFrames(), traj[0].lenFrames()) steps = self.lenFrames()/self.n_members + \ traj[0].lenFrames()/traj[0].n_members def __everyOther(traj_0, traj_1, list_0, list_1, minMembers, minFrames, loops): result = [] for j in range(0, minMembers / 2): for i in range(j * loops, j * loops + minFrames * 2 / minMembers): result += [list_0[i]] result += [list_1[i]] while i < j * traj_0.n_members: result += [list_0[i]] while i < j * traj_1.n_members: result += [list_1[i]] return result frames = __everyOther(self, traj[0], self.frames, traj[0].frames, min_members, min_frames, steps) r.frames = N0.array(frames) r.setRef(self.ref.clone()) if self.frameNames and traj[0].frameNames: r.frameNames = __everyOther(self, traj[0], self.frameNames, traj[0].frameNames, min_members, min_frames, steps) try: # NOT TESTED!! if self.pc and traj[0].pc: r.pc['p'] = __everyOther(self, traj[0], self.pc['p'], traj[0].pc['p'], min_members, steps) r.pc['u'] = __everyOther(self, traj[0], self.pc['u'], traj[0].pc['u'], min_members, steps) # r.pc['p'] = N0.concatenate( (self.pc['p'], traj[0].pc['p']),0) # r.pc['u'] = N0.concatenate( (self.pc['u'], traj[0].pc['u']),0) except TypeError, why: EHandler.error('cannot concat PC ' + str(why))
The compilation of PVM/pypvm can be tricky on some architectures. In order to support installations that don't need parallelisation, only a warning is issued if pypvm is missing and the public classes are exported as Pseudo-classes. Pseudo classes are empty and raise an ImportError when you try to initialize them. See also L{Biskit.tools.tryImport}. """ ## import user ## ensure that ~/.pythonrc.py is executed ## ## error-tolerant export of public classes ## from Biskit import EHandler import Biskit.tools as T pvm_installed = True pvm_installed = T.tryImport( 'TrackingJobMaster', 'TrackingJobMaster', namespace=globals()) pvm_installed = T.tryImport( 'dispatcher', 'JobSlave', namespace=globals() ) and pvm_installed if not pvm_installed: EHandler.warning('Could not import PVM (Parallel Virtual Machine) modules.'+ ' Please check that PVM and pypvm are installed!\n'+ '\tParallelisation is not available.') ## ## clean up ## del EHandler, T
def _cease( self, ref ): try: self.alive = False except: EHandler.warning('error in CrossView._cease') pass
def concat(self, *profiles): """ Concatenate all profiles in this with corresponding profiles in the given ProfileCollection(s). Profiles that are not found in all ProfileCollections are skipped:: p0.concat( p1 [, p2, ..]) -> single ProfileCollection with the same number of profiles as p0 but with the length of p0+p1+p2.. @param profiles: profile(s) to concatenate @type profiles: ProfileCollection(s) @return: concatenated profile(s) @rtype: ProfileCollection / subclass """ ## end recursion (no more arguments) if len(profiles) == 0: return self next = profiles[0] r = self.__class__() ##!!! BIG FAT WARNING: empty profilecollection does not imply empty model ## an empty PC w/o any profiles currently doesn't know which length ## is is supposed to have. If profLength == 0 for real, then ## the next PC's profiles don't need to be skipped ## Otherwise, ## this creates too-short profiles if the PC parent model has ## non-zero length and simply doesn't have any profiles registered. ## ## special case 1: concat something to empty profile collection ## if not self.keys(): ## return next.clone().concat( *profiles[1:] ) ## ## ## special case 2: concat empty profile collection to this one ## if not next.keys(): ## return self.clone().concat( *profiles[1:] ) ## allkeys = M.union(self.profiles.keys(), next.keys()) ## for k, p in self.profiles.items(): for k in allkeys: p = self.profiles.get(k, None) pnext = next.profiles.get(k, None) infos = {} if p is None: default = next[k, 'default'] p = self.__clonedefault(pnext, self.profLength(), default) infos = next.infos[k] if pnext is None: default = self[k, 'default'] pnext = self.__clonedefault(p, next.profLength(), default) infos = self.infos[k] try: if isinstance(p, N.ndarray): if len(pnext) == 0: pnext = pnext.astype(p.dtype) r.set(k, N.concatenate((p, pnext)), **infos) else: r.set(k, p + pnext, **infos) except: EHandler.warning("Profile %s skipped during concat." % k, error=1) r.remove(k) return r.concat(*profiles[1:])
## t_coffe will eat all the memory and the job will not finish ## This should be fixed in more recent versions of T-Coffee ## (v > 3.2) where T-Coffee, according to the manual "switches ## to a heuristic mode, named DPA, where DPA stands for Double ## Progressive Alignment." ## note 2: If there is only one template structure step 2 of T-coffee ## will not work. Solution, skip the structural alignment if ## only one template structure is provided. ## note 3: In quite som cases the sequence retrieved from the nrpdb ## sequence database is different from the sequence extracted ## from the coordinates in the pdb-file. This will sometimes ## cause t-coffee to terminate with an error (two sequences ## with the same name but with different sequences). Temporary ## solution: Choose another structure from the same cluster ## as the troublemaker. try: a = Aligner( outFolder, log, verbose=1, sap=sap ) a.align_for_modeller_inp() a.go(host) except: EHandler.error( 'Error while building alingnments.') print "\nalign.py -? or align.py -help for help screen"