    def reportClustering( self, raw=None ):
        Report the clustering result.

         - clustering results to L{F_CLUSTER_LOG}
         - blast records to L{F_BLAST_OUT}
         - blast records of centers to L{F_CLUSTER_BLAST_OUT}
         - raw clustering results to L{F_CLUSTER_RAW} if raw not None

        @param raw: write raw clustering result to disk (default: None)
        @type  raw: 1|0         
            if self.verbose:
                f = open( self.outFolder + self.F_CLUSTER_LOG, 'w', 1)

                for cluster in self.clusters:

                    f.write( "%i\t%s\n" % ( len( cluster ), str( cluster )))


                ## write blast records of centers to disc
                centers = [ c[0] for c in self.clusters ]

                self.writeClusteredBlastResult( \
                    self.outFolder + self.F_BLAST_OUT,
                    self.outFolder + self.F_CLUSTER_BLAST_OUT, centers )

                self.copyClusterOut( raw=raw )

        except IOError, why:
            EHandler.warning( "Can't write cluster report." + str(why) )
    def mergeHmmSeq(self, seq1, seq2):
        Merges two sequence files into one.
        Multilple hits with one profile cannot overlap!! Overlap == ERROR

        @param seq1: sequence
        @type  seq1: str
        @param seq2: sequence
        @type  seq2: str

        @return: merged sequence or None
        @rtype: str OR None 
        if len(seq1) != len(seq2):
            EHandler.warning( 'ERR in mergeHmmSeq:\n' +\
                         '\tSequences of different lengths cannot be merged')
            return None
            result = ''
            for i in range(len(seq1)):
                ## no match in either
                if seq1[i] == seq2[i] == '.':
                    result += '.'
                ## match in seq1
                if seq1[i] > seq2[i]:
                    result += seq1[i]
                ## match in seq2
                if seq1[i] < seq2[i]:
                    result += seq2[i]

            return result
    def fastaFromIds(self, db, id_lst, remote=False):
           fastaFromIds( id_lst, fastaOut ) -> { str: Bio.Fasta.Record }

        @param db: database name
        @type  db: str
        @param id_lst: list of dictionaries with pdb codes and chain IDs
        @type  id_lst: [{'pdb':str, 'chain':str}]

        @return: Dictionary mapping pdb codes to Bio.Fasta.Records. The
                 returned records have an additional field: chain.
        @rtype: { str: Bio.Fasta.Record }        
        result = {}
        if self.verbose:
            s = 'from local %s using fastacmd' % db
            if remote:
                s = 'remotely from Entrez'
            self.log.add('Fetching %i fasta records %s...\n' %
                         (len(id_lst), s))

        for i in id_lst:
                if remote:
                    r = self.fastaRecordFromId_remote(i['gb'])
                    r.id = i['pdb']  ## clustering expects PDB, not gb ID
                    r = self.fastaRecordFromId(db, i['pdb'], i['chain'])
                r.chain = i['chain']
                result[i['pdb']] = r
            except BlastError, why:
                EHandler.warning("ERROR (ignored): couldn't fetch " + str(i))
    def __writeBlastResult( self, parsed_blast, outFile):
        Write the result from the blast search to file (similar to the
        output produced by a regular blast run).

        writeBlastResult( parsed_blast, outFile )

        @param parsed_blast: Bio.Blast.Record.Blast
        @type  parsed_blast: Bio.Blast.Record.Blast
        @param outFile: file to write the blast result to
        @type  outFile: str
            f = open( T.absfile( outFile ), 'w' )

            for alignment in parsed_blast.alignments:
                for hsp in alignment.hsps:
                    s = string.replace(alignment.title,'\n',' ')
                    s = string.replace(s, 'pdb|',  '\npdb|')
                    f.write('Sequence %i: %s\n'%(i,s))                
                    f.write('Score: %3.1f \tE-value: %2.1e\n'\
                            %(hsp.score, hsp.expect))
                    f.write('Lenght/Identities: %r\tPositives: %r\tGaps: %r\n'\
                            %(hsp.identities, hsp.positives, hsp.gaps))

                    f.write( '%s\n'%hsp.query  )
                    f.write( '%s\n'%hsp.match )
                    f.write( '%s\n\n'%hsp.sbjct )
                    i += 1
        except Exception, why:
            EHandler.warning("Error while writing blast result to %s" %outFile)
            EHandler.warning("function namespace published to globals")
    def isnoise( self, score, n_samples=1000 ):
        Test sample how a given score performs at predicting items in the
        positive list compared to its 'performance' at  predicting random
        elements. The result corresponds to a two-tailed P value.
        See L{utest} for the analytical solution.
        @param score: the score predicted for each item
        @type  score: [ float ]
        @param n_samples: number of random samples
        @type  n_samples: int

        @return: probability P that the prediction success of score is just
        a random effect (1.0 means it's just perfectly random).
        from Biskit import EHandler

        ## list of random deviations from diagonal area 0.5
        a_rand = [ self.area(c)-0.5
                   for c in self.random_roccurves(score,n_samples) ]

        sd_rand = N.std( a_rand )
        av_rand = N.mean(a_rand )

        if round(av_rand,2) != 0.0:
            EHandler.warning( 'random sampling is skewed by %f'% (av_rand-0.0))

        a = self.rocarea( score )
        z = a / sd_rand

        ## probability that a sample falls *within* z stdevs from the mean
        p = L.erf( z / N.sqrt(2) )

        ## probability that the score hits just at random
        return 1.0 - p
    def fastaRecordFromId( self, db, id ):
           fastaRecordFromId( db, id ) -> Bio.Fasta.Record

        @param db: database
        @type  db: str
        @param id: sequence database ID
        @type  id: str

        @return: fasta record
        @rtype: Bio.SeqRecord.SeqRecord

        @raise BlastError: if can't fetch fasta record from database
        cmd = settings.fastacmd_bin + ' -d %s -s %s' % (db, id)

        err, o = commands.getstatusoutput( cmd )
        if err:
            EHandler.warning('%s returned error: %r' % (cmd, err) )
            raise BlastError( 'fastacmd failed. Error code: ' + str(err) )

            frecord = SeqIO.parse( cStringIO.StringIO(o), 'fasta').next()
            frecord.id = str(id)

        except StopIteration:
            raise InternalError, \
                  "Couldn't fetch fasta record %s from database %s" % (id,db)

        return frecord
    def fastaFromIds( self, db, id_lst, remote=False ):
           fastaFromIds( id_lst, fastaOut ) -> { str: Bio.Fasta.Record }

        @param db: database
        @type  db: str
        @param id_lst: sequence database IDs
        @type  id_lst: [str]

        @return: dictionary mapping IDs to Bio.Fasta.Records
        @rtype: {str: Bio.Fasta.Record}

        @raise BlastError: if couldn't fetch record
        result = {}
        if self.verbose:
            s = 'from local %s using fastacmd'
            if remote:
                s = 'remotely from Entrez'
            self.log.add('Fetching %i fasta records %s...\n'% (len(id_lst), s))
        for i in id_lst:
                if remote:
                    r = self.fastaRecordFromId_remote( i )
                    r = self.fastaRecordFromId( db, i )
                result[i] = r
            except BlastError, why:
                EHandler.warning("couldn't fetch %s"%str(i),trace=0 )
    def finish( self ):
        Overrides Executor method
        Executor.finish( self )

        self.result = self.parse_result()

        ## if probe radius other than 1.4 A the relative surface exposure
        ## cannot be calculated, but allow this check to be a little flexible
        ## if we ate forced to slightly increase the radii to excape round off
        ## SurfaceRacer errors
            if round(self.probe, 1) == 1.4 and self.vdw_set == 1:
                EHandler.warning("No relative accessabilities calculated "+\
                                 "when using a prob radius other than 1.4 A"+\
                                 " or not using the Richards vdw radii set.")
        except KeyError, what:
            EHandler.warning("Missing standard accessibilities for some "+\
                             "atoms. No relative accesibilities calculated.")
            if 'relMS' in self.result: del self.result['relMS']
            if 'relAS' in self.result: del self.result['relAS']
    def loadResContacts( self ):
        Uncompress residue contact matrix if necessary.
        @return: dict with contact matrix and parameters OR None
        @rtype: dict OR None
        ## Backwards compatibility
        if self.contacts != None and type( self.contacts ) == str:
            self.contacts = t.load( self.contacts )
            EHandler.warning("loading old-style pickled contacts.") 
            return self.contacts

        ## New, uncompression from list of indices into raveled array
        if self.contacts != None and \
           len( N.shape( self.contacts['result'])) == 1:

                lenRec, lenLig = self.contacts['shape']
                EHandler.warning("uncompressing contacts without shape")
                lenRec = self.rec().lenResidues()
                lenLig = self.lig().lenResidues()

            m = N.zeros( lenRec * lenLig )
            N.put( m, self.contacts['result'], 1 )

            self.contacts['result'] = N.reshape( m, (lenRec, lenLig) )

        return self.contacts
def single2longAA( seq ):
    Convert string of 1-letter AA code into list of 3-letter AA codes.
    @param seq: amino acid sequence in 1-letter code
    @type  seq: str
    @return: list with the amino acids in 3-letter code
    @rtype: [str]
    ## invert AA dict
    invTab = {}

    for key in aaDicStandard:
        invTab[ aaDicStandard[key] ] = key

    result = []
    for aa in seq:
            aa = aa.upper()
            result += [ invTab[aa].upper() ]
            EHandler.warning("unknown residue: " + str(aa))
            result += ['Xaa']

    return result
 def __defaults( self ):
     self.models = getattr( self, 'models', ComplexModelRegistry() )
     if getattr( self, 'rec_models', 0) != 0:
             're-creating model registry..re-pickle this list!')
         for c in self.toList():
             self.models.addComplex( c )
         del self.rec_models
         del self.lig_models
 def __defaults( self ):
     self.models = getattr( self, 'models', ComplexModelRegistry() )
     if getattr( self, 'rec_models', 0) != 0:
             're-creating model registry..re-pickle this list!')
         for c in self.toList():
             self.models.addComplex( c )
         del self.rec_models
         del self.lig_models
    def __getstate__(self):
        Called before pickling the object.
            if type( self.frames ) == list or self.frames.dtype.char == 'd':
                EHandler.warning("Converting coordinates to float array.")
                self.frames = N.array( self.frames ).astype(N.Float32)
            EHandler.warning('Could not convert frames to float array.', 1)

        return self.__dict__
    def __getstate__(self):
        Called before pickling the object.
            if type(self.frames) == list or self.frames.dtype.char == 'd':
                EHandler.warning("Converting coordinates to float array.")
                self.frames = N.array(self.frames).astype(N.Float32)
            EHandler.warning('Could not convert frames to float array.', 1)

        return self.__dict__
    def __syncModel( self, new_model, old_model ):
        Connect new rec or lig model to old one, to minimize storage.
        @param new_model: PDBModel / PCRModel
        @type  new_model: PDBModel
        @param old_model: PDBModel / PCRModel
        @type  old_model: PDBModel
        @return: PDBModel / PCRModel, new model that only keeps
                 changes relative to old, the old model becomes the
                 source of the new, if possible
        @rtype: PDBModel
        ## try to fix atom order of new_model so that it is identical to old
        if old_model.equals( new_model ) != [1,1]:
            i_new, i_old = new_model.compareAtoms( old_model )

            if len( i_new ) == len( new_model ):
                new_model.keep( i_new )

        ## create result model that only keeps difference of new and old
        if old_model.equals( new_model ) == [1,1]:

            ## stays compatible with PCRModel.__init__ and PDBModel.__init
            r = old_model.__class__( source=old_model )

            r.setXyz( new_model.getXyz() )

            ## check for profiles identical to source and adapt 'changed'

            if not MU.arrayEqual( r.xyz, old_model.xyz ):
                r.removeProfile( 'relASA', 'ASA_sc', 'ASA_total', 'ASA_bb' )

            return r

            'ComplexEvolving: Cannot connect new to old PDBModel.')

        return new_model
    def getResult( self, **arg ):
        Collapse the results for different values of the variable parameter
        into lists and put the results into a tree ala::
          r[ member_index ][ protocol_name ][ result_field ] -> [ values ]

        @return: tree-like dict ordered by variable value, member, protocol
        @rtype: dict of dict of dict of lists
        tree = self.dictionate( self.result )

        vvalues = tree.keys()

        keys = self.result.keys()
        sub_keys = [ k for k in keys if k[0] == vvalues[0] ]

        r = {}
        for v, member, protcl in sub_keys:

                if not member in r:
                    r[member] = {}

                r[member][protcl] = {}

                run_dic = tree[v][member][protcl]

                for k in run_dic.keys():
                    r[member][protcl][k] = [ tree[v][member][protcl][k] \
                                             for v in vvalues ]
                EHandler.warning('missing result: ' + str(T.lastError()))

        r['var'] = self.var
        r['vrange']= self.vrange
        r['protocols'] = self.protocols

        self.result_tree = r
        return r
    def getResult(self, **arg):
        Collapse the results for different values of the variable parameter
        into lists and put the results into a tree ala::
          r[ member_index ][ protocol_name ][ result_field ] -> [ values ]

        @return: tree-like dict ordered by variable value, member, protocol
        @rtype: dict of dict of dict of lists
        tree = self.dictionate(self.result)

        vvalues = tree.keys()

        keys = self.result.keys()
        sub_keys = [k for k in keys if k[0] == vvalues[0]]

        r = {}
        for v, member, protcl in sub_keys:

                if not member in r:
                    r[member] = {}

                r[member][protcl] = {}

                run_dic = tree[v][member][protcl]

                for k in run_dic.keys():
                    r[member][protcl][k] = [ tree[v][member][protcl][k] \
                                             for v in vvalues ]
                EHandler.warning('missing result: ' + str(T.lastError()))

        r['var'] = self.var
        r['vrange'] = self.vrange
        r['protocols'] = self.protocols

        self.result_tree = r
        return r
    def validate(self):
        Validate the path to the binary.
        @raise ExeConfigError: if environment is not fit for running
                               the program
            self.bin = T.absbinary(self.bin)  ## raises IOError if not found

            missing = self.update_environment()
            report = '%s is missing environment variables: %r'\
                     % (self.name, missing )

            if missing and self.strict:
                raise ExeConfigError, report

            if missing:

        except IOError, why:
            raise ExeConfigError, str(why) + ' Check %s!' % self.dat
    def isnoise(self, score, n_samples=1000):
        Test sample how a given score performs at predicting items in the
        positive list compared to its 'performance' at  predicting random
        elements. The result corresponds to a two-tailed P value.
        See L{utest} for the analytical solution.
        @param score: the score predicted for each item
        @type  score: [ float ]
        @param n_samples: number of random samples
        @type  n_samples: int

        @return: probability P that the prediction success of score is just
        a random effect (1.0 means it's just perfectly random).
        from Biskit import EHandler

        ## list of random deviations from diagonal area 0.5
        a_rand = [
            self.area(c) - 0.5
            for c in self.random_roccurves(score, n_samples)

        sd_rand = N.std(a_rand)
        av_rand = N.mean(a_rand)

        if round(av_rand, 2) != 0.0:
            EHandler.warning('random sampling is skewed by %f' %
                             (av_rand - 0.0))

        a = self.rocarea(score)
        z = a / sd_rand

        ## probability that a sample falls *within* z stdevs from the mean
        p = L.erf(z / N.sqrt(2))

        ## probability that the score hits just at random
        return 1.0 - p
    def __writeBlastResult(self, parsed_blast, outFile):
        Write the result from the blast search to file (similar to the
        output produced by a regular blast run).

        writeBlastResult( parsed_blast, outFile )

        @param parsed_blast: Bio.Blast.Record.Blast
        @type  parsed_blast: Bio.Blast.Record.Blast
        @param outFile: file to write the blast result to
        @type  outFile: str
            f = open(T.absfile(outFile), 'w')

            i = 1
            for alignment in parsed_blast.alignments:
                for hsp in alignment.hsps:
                    s = string.replace(alignment.title, '\n', ' ')
                    s = string.replace(s, 'pdb|', '\npdb|')
                    f.write('Sequence %i: %s\n' % (i, s))
                    f.write('Score: %3.1f \tE-value: %2.1e\n'\
                            %(hsp.score, hsp.expect))
                    f.write('Lenght/Identities: %r\tPositives: %r\tGaps: %r\n'\
                            %(hsp.identities, hsp.positives, hsp.gaps))

                    f.write('%s\n' % hsp.query)
                    f.write('%s\n' % hsp.match)
                    f.write('%s\n\n' % hsp.sbjct)
                    i += 1
        except Exception, why:
            EHandler.warning("Error while writing blast result to %s" %
            EHandler.warning("function namespace published to globals")
The compilation of PVM/pypvm can be tricky on some architectures. In order
to support installations that don't need parallelisation, only a warning is
issued if pypvm is missing and the public classes are exported as
Pseudo-classes. Pseudo classes are empty and raise an ImportError when you
try to initialize them. See also L{Biskit.tools.tryImport}.
## import user  ## ensure that ~/.pythonrc.py is executed

## error-tolerant export of public classes
from Biskit import EHandler
import Biskit.tools as T

pvm_installed = True

pvm_installed = T.tryImport( 'TrackingJobMaster', 'TrackingJobMaster',
pvm_installed = T.tryImport( 'dispatcher', 'JobSlave',
                             namespace=globals() ) and pvm_installed

if not pvm_installed:
    EHandler.warning('Could not import PVM (Parallel Virtual Machine) modules.'+
        ' Please check that PVM and pypvm are installed!\n'+
        '\tParallelisation is not available.')

## clean up
del EHandler, T
    def concat(self, *profiles):
        Concatenate all profiles in this with corresponding profiles in the
        given ProfileCollection(s). Profiles that are not found in all
        ProfileCollections are skipped::
          p0.concat( p1 [, p2, ..]) -> single ProfileCollection with the
          same number of profiles as p0 but with the length of p0+p1+p2..

        @param profiles: profile(s) to concatenate
        @type  profiles: ProfileCollection(s)
        @return: concatenated profile(s)  
        @rtype: ProfileCollection / subclass
        ## end recursion (no more arguments)
        if len(profiles) == 0:
            return self

        next = profiles[0]

        r = self.__class__()

        ##!!! BIG FAT WARNING: empty profilecollection does not imply empty model
        ## an empty PC w/o any profiles currently doesn't know which length
        ## is is supposed to have. If profLength == 0 for real, then
        ## the next PC's profiles don't need to be skipped
        ## Otherwise,
        ## this creates too-short profiles if the PC parent model has
        ## non-zero length and simply doesn't have any profiles registered.

        ##        ## special case 1: concat something to empty profile collection
        ##        if not self.keys():
        ##            return next.clone().concat( *profiles[1:] )
        ##        ## special case 2: concat empty profile collection to this one
        ##        if not next.keys():
        ##            return self.clone().concat( *profiles[1:] )
        allkeys = M.union(self.profiles.keys(), next.keys())

        ##        for k, p in self.profiles.items():
        for k in allkeys:
            p = self.profiles.get(k, None)
            pnext = next.profiles.get(k, None)
            infos = {}

            if p is None:
                default = next[k, 'default']
                p = self.__clonedefault(pnext, self.profLength(), default)
                infos = next.infos[k]

            if pnext is None:
                default = self[k, 'default']
                pnext = self.__clonedefault(p, next.profLength(), default)
                infos = self.infos[k]

                if isinstance(p, N.ndarray):

                    if len(pnext) == 0:
                        pnext = pnext.astype(p.dtype)

                    r.set(k, N.concatenate((p, pnext)), **infos)

                    r.set(k, p + pnext, **infos)
                EHandler.warning("Profile %s skipped during concat." % k,

        return r.concat(*profiles[1:])
 def _cease( self, ref ):
         self.alive = False
         EHandler.warning('error in CrossView._cease')
Protein-protein docking related modules
from Biskit import EHandler

    from Complex import Complex
    from ComplexEvolving import ComplexEvolving
    from ComplexEvolvingList import ComplexEvolvingList
    from ComplexList import ComplexList
    from ComplexTraj import ComplexTraj
    from ComplexModelRegistry import ComplexModelRegistry
    from ComplexRandomizer import ComplexRandomizer
    from Docker import Docker
    from FixedList import FixedList
    from HexParser import HexParser
    from delphiBindingEnergy import DelphiBindingEnergy
##     from Intervor import Intervor
##     from PatchGenerator import PatchGenerator
##     from PatchGeneratorFromOrbit import PatchGeneratorFromOrbit

except IOError, why:
    EHandler.warning("Couldn't import all Biskit.Dock modules.\n" + str(why))

## PVM-dependent modules
    from ContactMaster import ContactMaster
    from ContactSlave import ContactSlave
except Exception, why:
    EHandler.warning("Couldn't import PVM-dependent modules of Biskit.Dock.\n"+\
                     str( why ) )
issued if pypvm is missing and the public classes are exported as
Pseudo-classes. Pseudo classes are empty and raise an ImportError when you
try to initialize them. See also L{Biskit.tools.tryImport}.
## import user  ## ensure that ~/.pythonrc.py is executed

## error-tolerant export of public classes
from Biskit import EHandler
import Biskit.tools as T

pvm_installed = True

pvm_installed = T.tryImport('TrackingJobMaster',
pvm_installed = T.tryImport('dispatcher', 'JobSlave',
                            namespace=globals()) and pvm_installed

if not pvm_installed:
        'Could not import PVM (Parallel Virtual Machine) modules.' +
        ' Please check that PVM and pypvm are installed!\n' +
        '\tParallelisation is not available.')

## clean up
del EHandler, T
Protein-protein docking related modules
from Biskit import EHandler

    from Complex import Complex
    from ComplexVC import ComplexVC
    from ComplexVCList import ComplexVCList
    from ComplexList import ComplexList
    from ComplexTraj import ComplexTraj
    from ComplexModelRegistry import ComplexModelRegistry
    from ComplexRandomizer import ComplexRandomizer
    from Docker import Docker
    from FixedList import FixedList
    from HexParser import HexParser
    from delphiBindingEnergy import DelphiBindingEnergy
##     from Intervor import Intervor
##     from PatchGenerator import PatchGenerator
##     from PatchGeneratorFromOrbit import PatchGeneratorFromOrbit

except IOError, why:
    EHandler.warning("Couldn't import all Biskit.Dock modules.\n" + str(why))

## PVM-dependent modules
    from ContactMaster import ContactMaster
    from ContactSlave import ContactSlave
except Exception, why:
    EHandler.warning("Couldn't import PVM-dependent modules of Biskit.Dock.\n"+\
                     str( why ) )