Example #1
0
    def finish( self ):
        """
        Overrides Executor method
        """
        Executor.finish( self )

        self.result = self.parse_result()

        ## if probe radius other than 1.4 A the relative surface exposure
        ## cannot be calculated, but allow this check to be a little flexible
        ## if we ate forced to slightly increase the radii to excape round off
        ## SurfaceRacer errors
        try:
            if round(self.probe, 1) == 1.4 and self.vdw_set == 1:
                self.__relExposure('MS')
                self.__relExposure('AS')
            else:
                EHandler.warning("No relative accessabilities calculated "+\
                                 "when using a prob radius other than 1.4 A"+\
                                 " or not using the Richards vdw radii set.")
        except KeyError, what:
            EHandler.warning("Missing standard accessibilities for some "+\
                             "atoms. No relative accesibilities calculated.")
            if 'relMS' in self.result: del self.result['relMS']
            if 'relAS' in self.result: del self.result['relAS']
Example #2
0
    def fastaFromIds( self, db, id_lst, remote=False ):
        """
        Use::
           fastaFromIds( id_lst, fastaOut ) -> { str: Bio.Fasta.Record }

        @param db: database name
        @type  db: str
        @param id_lst: list of dictionaries with pdb codes and chain IDs
        @type  id_lst: [{'pdb':str, 'chain':str}]

        @return: Dictionary mapping pdb codes to Bio.Fasta.Records. The
                 returned records have an additional field: chain.
        @rtype: { str: Bio.Fasta.Record }        
        """
        result = {}
        if self.verbose:
            s = 'from local %s using fastacmd' % db
            if remote:
                s = 'remotely from Entrez'
            self.log.add('Fetching %i fasta records %s...\n'% (len(id_lst), s))

        for i in id_lst:
            try:
                if remote:
                    r = self.fastaRecordFromId_remote( i['gb'] )
                    r.id = i['pdb']  ## clustering expects PDB, not gb ID
                else:
                    r = self.fastaRecordFromId( db, i['pdb'], i['chain'] )
                r.chain = i['chain']
                result[ i['pdb'] ] = r
            except BlastError, why:
                EHandler.warning("ERROR (ignored): couldn't fetch "+ str(i) )
Example #3
0
    def fastaRecordFromId( self, db, id ):
        """
        Use::
           fastaRecordFromId( db, id ) -> Bio.Fasta.Record

        @param db: database
        @type  db: str
        @param id: sequence database ID
        @type  id: str

        @return: fasta record
        @rtype: Bio.SeqRecord.SeqRecord

        @raise BlastError: if can't fetch fasta record from database
        """
        cmd = settings.fastacmd_bin + ' -d %s -s %s' % (db, id)

        err, o = commands.getstatusoutput( cmd )
        if err:
            EHandler.warning('%s returned error: %r' % (cmd, err) )
            raise BlastError( 'fastacmd failed. Error code: ' + str(err) )

        try:
            frecord = SeqIO.parse( cStringIO.StringIO(o), 'fasta').next()
            frecord.id = str(id)

        except StopIteration:
            raise InternalError, \
                  "Couldn't fetch fasta record %s from database %s" % (id,db)

        return frecord
Example #4
0
    def fastaFromIds(self, db, id_lst, remote=False):
        """
        Use::
           fastaFromIds( id_lst, fastaOut ) -> { str: Bio.Fasta.Record }

        @param db: database
        @type  db: str
        @param id_lst: sequence database IDs
        @type  id_lst: [str]

        @return: dictionary mapping IDs to Bio.Fasta.Records
        @rtype: {str: Bio.Fasta.Record}

        @raise BlastError: if couldn't fetch record
        """
        result = {}
        if self.verbose:
            s = 'from local %s using fastacmd'
            if remote:
                s = 'remotely from Entrez'
            self.log.add('Fetching %i fasta records %s...\n' %
                         (len(id_lst), s))

        for i in id_lst:
            try:
                if remote:
                    r = self.fastaRecordFromId_remote(i)
                else:
                    r = self.fastaRecordFromId(db, i)
                result[i] = r
            except BlastError, why:
                EHandler.warning("couldn't fetch %s" % str(i), trace=0)
Example #5
0
def single2longAA( seq ):
    """
    Convert string of 1-letter AA code into list of 3-letter AA codes.
    
    @param seq: amino acid sequence in 1-letter code
    @type  seq: str
    
    @return: list with the amino acids in 3-letter code
    @rtype: [str]
    """
    ## invert AA dict
    invTab = {}

    for key in aaDicStandard:
        invTab[ aaDicStandard[key] ] = key

    result = []
    for aa in seq:
        try:
            aa = aa.upper()
            result += [ invTab[aa].upper() ]
        except:
            EHandler.warning("unknown residue: " + str(aa))
            result += ['Xaa']

    return result
Example #6
0
    def concat( self, *traj ):
        """
        Concatenate this with other trajectories. The ref model of the
        new Trajectory is a 'semi-deep' copy of this trajectorie's model.
        (see L{PDBModel.take()} )::
           concat( traj [, traj2, traj3, ..] ) -> Trajectory 

        @param traj: one or more Trajectory with identical atoms as this one
        @type  traj: Trajectories

        @return: concatenated trajecties
        @rtype: Trajectory
        """
        if len( traj ) == 0:
            return self

        r = self.__class__()

        r.frames = N.concatenate( (self.frames, traj[0].frames), 0 )

        r.setRef( self.ref.clone())

        if self.frameNames and traj[0].frameNames:
            r.frameNames = self.frameNames + traj[0].frameNames

        try:
            if self.pc is not None and traj[0].pc is not None:
                r.pc['p'] = N.concatenate( (self.pc['p'], traj[0].pc['p']),0)
                r.pc['u'] = N.concatenate( (self.pc['u'], traj[0].pc['u']),0)
        except TypeError, why:
            EHandler.error('cannot concat PC '+str(why) )
Example #7
0
    def reportClustering(self, raw=None):
        """
        Report the clustering result.

        Writes:
         - clustering results to L{F_CLUSTER_LOG}
         - blast records to L{F_BLAST_OUT}
         - blast records of centers to L{F_CLUSTER_BLAST_OUT}
         - raw clustering results to L{F_CLUSTER_RAW} if raw not None

        @param raw: write raw clustering result to disk (default: None)
        @type  raw: 1|0         
        """
        try:
            if self.verbose:
                f = open(self.outFolder + self.F_CLUSTER_LOG, 'w', 1)

                for cluster in self.clusters:

                    f.write("%i\t%s\n" % (len(cluster), str(cluster)))

                f.close()

                ## write blast records of centers to disc
                centers = [c[0] for c in self.clusters]

                self.writeClusteredBlastResult( \
                    self.outFolder + self.F_BLAST_OUT,
                    self.outFolder + self.F_CLUSTER_BLAST_OUT, centers )

                self.copyClusterOut(raw=raw)

        except IOError, why:
            EHandler.warning("Can't write cluster report." + str(why))
Example #8
0
    def fastaFromIds( self, db, id_lst, remote=False ):
        """
        Use::
           fastaFromIds( id_lst, fastaOut ) -> { str: Bio.Fasta.Record }

        @param db: database
        @type  db: str
        @param id_lst: sequence database IDs
        @type  id_lst: [str]

        @return: dictionary mapping IDs to Bio.Fasta.Records
        @rtype: {str: Bio.Fasta.Record}

        @raise BlastError: if couldn't fetch record
        """
        result = {}
        if self.verbose:
            s = 'from local %s using fastacmd'
            if remote:
                s = 'remotely from Entrez'
            self.log.add('Fetching %i fasta records %s...\n'% (len(id_lst), s))
            
        for i in id_lst:
            try:
                if remote:
                    r = self.fastaRecordFromId_remote( i )
                else:
                    r = self.fastaRecordFromId( db, i )
                result[i] = r
            except BlastError, why:
                EHandler.warning("couldn't fetch %s"%str(i),trace=0 )
Example #9
0
    def concat(self, *traj):
        """
        Concatenate this with other trajectories. The ref model of the
        new Trajectory is a 'semi-deep' copy of this trajectorie's model.
        (see L{PDBModel.take()} )::
           concat( traj [, traj2, traj3, ..] ) -> Trajectory 

        @param traj: one or more Trajectory with identical atoms as this one
        @type  traj: Trajectories

        @return: concatenated trajecties
        @rtype: Trajectory
        """
        if len(traj) == 0:
            return self

        r = self.__class__()

        r.frames = N.concatenate((self.frames, traj[0].frames), 0)

        r.setRef(self.ref.clone())

        if self.frameNames and traj[0].frameNames:
            r.frameNames = self.frameNames + traj[0].frameNames

        try:
            if self.pc is not None and traj[0].pc is not None:
                r.pc['p'] = N.concatenate((self.pc['p'], traj[0].pc['p']), 0)
                r.pc['u'] = N.concatenate((self.pc['u'], traj[0].pc['u']), 0)
        except TypeError, why:
            EHandler.error('cannot concat PC ' + str(why))
Example #10
0
    def mergeHmmSeq( self, seq1, seq2 ):
        """
        Merges two sequence files into one.
        Multilple hits with one profile cannot overlap!! Overlap == ERROR

        @param seq1: sequence
        @type  seq1: str
        @param seq2: sequence
        @type  seq2: str

        @return: merged sequence or None
        @rtype: str OR None 
        """
        if len(seq1) != len(seq2):
            EHandler.warning( 'ERR in mergeHmmSeq:\n' +\
                         '\tSequences of different lengths cannot be merged')
            return None
        else:
            result = ''
            for i in range( len(seq1) ):
                ## no match in either
                if seq1[i] == seq2[i] == '.':    
                    result += '.'
                ## match in seq1
                if seq1[i] > seq2[i]:
                    result += seq1[i]
                ## match in seq2
                if seq1[i] < seq2[i]:
                    result += seq2[i]

            return result
Example #11
0
def data3DList(c, key='fnac_10', inverse=0, rm=range(1, 12), soln=512):
    """
    Create an matrix: len(rec_model) * len((lig_model) * solutions
    containing the values of the info dic with given key.
    c - ComplexList
    """
    rm = range(1, max(c.valuesOf('model1')) + 1)
    lm = range(1, max(c.valuesOf('model2')) + 1)

    matrix = zeros((len(rm), len(lm), soln), 'f')

    try:

        for r in rm:
            rl = c.filter('model1', r)
            for l in lm:
                cl = rl.filter('model2', l)
                if inverse:
                    matrix[r - 1][l - 1] = (
                        1. / array(cl.valuesOf(key, default=0))).tolist()
                else:
                    matrix[r - 1][l - 1] = cl.valuesOf(key, default=0)

    except ValueError, why:
        try:
            lenM = len(matrix[r - 1][l - 1])
            lenV = len(cl.valuesOf(key, default=0))
        except:
            lenM = lenV = 0
        s = '%i : %i len(matrix)=%i <> len(values)=%i' % (r, l, lenM, lenV)
        EHandler.error('Cannot extract fnac data. ' + s)
Example #12
0
def single2longAA(seq):
    """
    Convert string of 1-letter AA code into list of 3-letter AA codes.
    
    @param seq: amino acid sequence in 1-letter code
    @type  seq: str
    
    @return: list with the amino acids in 3-letter code
    @rtype: [str]
    """
    ## invert AA dict
    invTab = {}

    for key in aaDicStandard:
        invTab[aaDicStandard[key]] = key

    result = []
    for aa in seq:
        try:
            aa = aa.upper()
            result += [invTab[aa].upper()]
        except:
            EHandler.warning("unknown residue: " + str(aa))
            result += ['Xaa']

    return result
Example #13
0
    def finish( self ):
        """
        Overrides Executor method
        """
        Executor.finish( self )

        self.result = self.parse_result()

        ## if probe radius other than 1.4 A the relative surface exposure
        ## cannot be calculated, but allow this check to be a little flexible
        ## if we ate forced to slightly increase the radii to excape round off
        ## SurfaceRacer errors
        try:
            if round(self.probe, 1) == 1.4 and self.vdw_set == 1:
                self.__relExposure('MS')
                self.__relExposure('AS')
            else:
                EHandler.warning("No relative accessabilities calculated "+\
                                 "when using a prob radius other than 1.4 A"+\
                                 " or not using the Richards vdw radii set.")
        except KeyError, what:
            EHandler.warning("Missing standard accessibilities for some "+\
                             "atoms. No relative accesibilities calculated.")
            if 'relMS' in self.result: del self.result['relMS']
            if 'relAS' in self.result: del self.result['relAS']
Example #14
0
    def fastaRecordFromId(self, db, id):
        """
        Use::
           fastaRecordFromId( db, id ) -> Bio.Fasta.Record

        @param db: database
        @type  db: str
        @param id: sequence database ID
        @type  id: str

        @return: fasta record
        @rtype: Bio.SeqRecord.SeqRecord

        @raise BlastError: if can't fetch fasta record from database
        """
        cmd = settings.fastacmd_bin + ' -d %s -s %s' % (db, id)

        err, o = commands.getstatusoutput(cmd)
        if err:
            EHandler.warning('%s returned error: %r' % (cmd, err))
            raise BlastError('fastacmd failed. Error code: ' + str(err))

        try:
            frecord = SeqIO.parse(cStringIO.StringIO(o), 'fasta').next()
            frecord.id = str(id)

        except StopIteration:
            raise InternalError, \
                  "Couldn't fetch fasta record %s from database %s" % (id,db)

        return frecord
Example #15
0
    def isnoise( self, score, n_samples=1000 ):
        """
        Test sample how a given score performs at predicting items in the
        positive list compared to its 'performance' at  predicting random
        elements. The result corresponds to a two-tailed P value.
        See L{utest} for the analytical solution.
        @param score: the score predicted for each item
        @type  score: [ float ]
        @param n_samples: number of random samples
        @type  n_samples: int

        @return: probability P that the prediction success of score is just
        a random effect (1.0 means it's just perfectly random).
        """
        from Biskit import EHandler

        ## list of random deviations from diagonal area 0.5
        a_rand = [ self.area(c)-0.5
                   for c in self.random_roccurves(score,n_samples) ]

        sd_rand = N.std( a_rand )
        av_rand = N.mean(a_rand )

        if round(av_rand,2) != 0.0:
            EHandler.warning( 'random sampling is skewed by %f'% (av_rand-0.0))

        a = self.rocarea( score )
        z = a / sd_rand

        ## probability that a sample falls *within* z stdevs from the mean
        p = L.erf( z / N.sqrt(2) )

        ## probability that the score hits just at random
        return 1.0 - p
Example #16
0
def data3DList( c, key='fnac_10', inverse=0,
                rm=range(1,12), soln=512 ):
    """
    Create an matrix: len(rec_model) * len((lig_model) * solutions
    containing the values of the info dic with given key.
    c - ComplexList
    """
    rm = range( 1, max(c.valuesOf( 'model1'))+1 )
    lm = range( 1, max(c.valuesOf( 'model2'))+1 )

    matrix = zeros( ( len( rm ), len( lm ), soln ), 'f' )
    
    try:

        for r in rm:
            rl = c.filter( 'model1', r )
            for l in lm:
                cl = rl.filter( 'model2', l )
                if inverse:
                    matrix[r-1][l-1] = (1./array(
                        cl.valuesOf( key, default=0 ))).tolist()
                else:
                    matrix[r-1][l-1] = cl.valuesOf( key, default=0 )

    except ValueError, why:
        try:
            lenM = len( matrix[r-1][l-1] )
            lenV = len( cl.valuesOf( key, default=0 ) )
        except:
            lenM = lenV = 0
        s =  '%i : %i len(matrix)=%i <> len(values)=%i' % (r, l, lenM, lenV)
        EHandler.error('Cannot extract fnac data. '+ s )
Example #17
0
    def __writeBlastResult( self, parsed_blast, outFile):
        """
        Write the result from the blast search to file (similar to the
        output produced by a regular blast run).

        writeBlastResult( parsed_blast, outFile )

        @param parsed_blast: Bio.Blast.Record.Blast
        @type  parsed_blast: Bio.Blast.Record.Blast
        @param outFile: file to write the blast result to
        @type  outFile: str
        """
        try:
            f = open( T.absfile( outFile ), 'w' )

            i=1
            for alignment in parsed_blast.alignments:
                for hsp in alignment.hsps:
                    s = string.replace(alignment.title,'\n',' ')
                    s = string.replace(s, 'pdb|',  '\npdb|')
                    f.write('Sequence %i: %s\n'%(i,s))                
                    f.write('Score: %3.1f \tE-value: %2.1e\n'\
                            %(hsp.score, hsp.expect))
                    f.write('Lenght/Identities: %r\tPositives: %r\tGaps: %r\n'\
                            %(hsp.identities, hsp.positives, hsp.gaps))

                    f.write( '%s\n'%hsp.query  )
                    f.write( '%s\n'%hsp.match )
                    f.write( '%s\n\n'%hsp.sbjct )
                    i += 1
            f.close()
        except Exception, why:
            EHandler.warning("Error while writing blast result to %s" %outFile)
            globals().update(locals())
            EHandler.warning("function namespace published to globals")
Example #18
0
    def fastaFromIds(self, db, id_lst, remote=False):
        """
        Use::
           fastaFromIds( id_lst, fastaOut ) -> { str: Bio.Fasta.Record }

        @param db: database name
        @type  db: str
        @param id_lst: list of dictionaries with pdb codes and chain IDs
        @type  id_lst: [{'pdb':str, 'chain':str}]

        @return: Dictionary mapping pdb codes to Bio.Fasta.Records. The
                 returned records have an additional field: chain.
        @rtype: { str: Bio.Fasta.Record }        
        """
        result = {}
        if self.verbose:
            s = 'from local %s using fastacmd' % db
            if remote:
                s = 'remotely from Entrez'
            self.log.add('Fetching %i fasta records %s...\n' %
                         (len(id_lst), s))

        for i in id_lst:
            try:
                if remote:
                    r = self.fastaRecordFromId_remote(i['gb'])
                    r.id = i['pdb']  ## clustering expects PDB, not gb ID
                else:
                    r = self.fastaRecordFromId(db, i['pdb'], i['chain'])
                r.chain = i['chain']
                result[i['pdb']] = r
            except BlastError, why:
                EHandler.warning("ERROR (ignored): couldn't fetch " + str(i))
Example #19
0
    def reportClustering( self, raw=None ):
        """
        Report the clustering result.

        Writes:
         - clustering results to L{F_CLUSTER_LOG}
         - blast records to L{F_BLAST_OUT}
         - blast records of centers to L{F_CLUSTER_BLAST_OUT}
         - raw clustering results to L{F_CLUSTER_RAW} if raw not None

        @param raw: write raw clustering result to disk (default: None)
        @type  raw: 1|0         
        """
        try:
            if self.verbose:
                f = open( self.outFolder + self.F_CLUSTER_LOG, 'w', 1)

                for cluster in self.clusters:

                    f.write( "%i\t%s\n" % ( len( cluster ), str( cluster )))

                f.close()

                ## write blast records of centers to disc
                centers = [ c[0] for c in self.clusters ]

                self.writeClusteredBlastResult( \
                    self.outFolder + self.F_BLAST_OUT,
                    self.outFolder + self.F_CLUSTER_BLAST_OUT, centers )


                self.copyClusterOut( raw=raw )

        except IOError, why:
            EHandler.warning( "Can't write cluster report." + str(why) )
Example #20
0
    def loadResContacts( self ):
        """
        Uncompress residue contact matrix if necessary.
        
        @return: dict with contact matrix and parameters OR None
        @rtype: dict OR None
        """
        ## Backwards compatibility
        if self.contacts != None and type( self.contacts ) == str:
            self.contacts = t.load( self.contacts )
            EHandler.warning("loading old-style pickled contacts.") 
            return self.contacts

        ## New, uncompression from list of indices into raveled array
        if self.contacts != None and \
           len( N.shape( self.contacts['result'])) == 1:

            try:
                lenRec, lenLig = self.contacts['shape']
            except:
                EHandler.warning("uncompressing contacts without shape")
                lenRec = self.rec().lenResidues()
                lenLig = self.lig().lenResidues()

            m = N.zeros( lenRec * lenLig )
            N.put( m, self.contacts['result'], 1 )

            self.contacts['result'] = N.reshape( m, (lenRec, lenLig) )

        return self.contacts
Example #21
0
    def mergeHmmSeq(self, seq1, seq2):
        """
        Merges two sequence files into one.
        Multilple hits with one profile cannot overlap!! Overlap == ERROR

        @param seq1: sequence
        @type  seq1: str
        @param seq2: sequence
        @type  seq2: str

        @return: merged sequence or None
        @rtype: str OR None 
        """
        if len(seq1) != len(seq2):
            EHandler.warning( 'ERR in mergeHmmSeq:\n' +\
                         '\tSequences of different lengths cannot be merged')
            return None
        else:
            result = ''
            for i in range(len(seq1)):
                ## no match in either
                if seq1[i] == seq2[i] == '.':
                    result += '.'
                ## match in seq1
                if seq1[i] > seq2[i]:
                    result += seq1[i]
                ## match in seq2
                if seq1[i] < seq2[i]:
                    result += seq2[i]

            return result
Example #22
0
 def __defaults( self ):
     self.models = getattr( self, 'models', ComplexModelRegistry() )
     if getattr( self, 'rec_models', 0) != 0:
         EHandler.warning(
             're-creating model registry..re-pickle this list!')
         for c in self.toList():
             self.models.addComplex( c )
         del self.rec_models
         del self.lig_models
Example #23
0
 def __defaults( self ):
     self.models = getattr( self, 'models', ComplexModelRegistry() )
     if getattr( self, 'rec_models', 0) != 0:
         EHandler.warning(
             're-creating model registry..re-pickle this list!')
         for c in self.toList():
             self.models.addComplex( c )
         del self.rec_models
         del self.lig_models
Example #24
0
    def __getstate__(self):
        """
        Called before pickling the object.
        """
        try:
            if type(self.frames) == list or self.frames.dtype.char == 'd':
                EHandler.warning("Converting coordinates to float array.")
                self.frames = N.array(self.frames).astype(N.Float32)
        except:
            EHandler.warning('Could not convert frames to float array.', 1)

        return self.__dict__
Example #25
0
    def add(self, str):
        """
        Add String str and line break to xplor input file.

        @param str: string to add to file
        @type  str: str        
        """
        try:
            self.fgenerate.write(str + '\n')
        except (IOError):
            EHandler.error(
                "XPlorInput.append(): Error adding str to xplor input file.")
Example #26
0
    def add(self, str):
        """
        Add String str and line break to xplor input file.

        @param str: string to add to file
        @type  str: str        
        """
        try:
            self.fgenerate.write(str + '\n')
        except (IOError):
            EHandler.error(
                "XPlorInput.append(): Error adding str to xplor input file.")
Example #27
0
    def __getstate__(self):
        """
        Called before pickling the object.
        """
        try:
            if type( self.frames ) == list or self.frames.dtype.char == 'd':
                EHandler.warning("Converting coordinates to float array.")
                self.frames = N.array( self.frames ).astype(N.Float32)
        except:
            EHandler.warning('Could not convert frames to float array.', 1)

        return self.__dict__
Example #28
0
    def writePdb(self, index, fname):
        """
        Write (possibly transformed) coordinates back to pdb.

        @param index: frame index in trajectory
        @type  index: int
        @param fname: name of new file
        @type  fname: str 
        """
        try:
            self.getPDBModel(index).writePdb(fname)
        except:
            EHandler.error('Error writing %s.' % fname)
Example #29
0
    def writePdb( self, index, fname):
        """
        Write (possibly transformed) coordinates back to pdb.

        @param index: frame index in trajectory
        @type  index: int
        @param fname: name of new file
        @type  fname: str 
        """
        try:
            self.getPDBModel( index ).writePdb( fname )
        except:
            EHandler.error('Error writing %s.' % fname)
Example #30
0
    def concat( self, *profiles ):
        """
        Concatenate all profiles in this with corresponding profiles in the
        given ProfileCollection(s). Profiles that are not found in all
        ProfileCollections are skipped::
          p0.concat( p1 [, p2, ..]) -> single ProfileCollection with the
          same number of profiles as p0 but with the length of p0+p1+p2..

        @param profiles: profile(s) to concatenate
        @type  profiles: ProfileCollection(s)
        
        @return: concatenated profile(s)  
        @rtype: ProfileCollection / subclass
        """
        ## end recursion (no more arguments)
        if len( profiles ) == 0:
            return self

        next = profiles[0]

        r = self.__class__()
        
        ## special case: concat something to empty profile collection
##        if self.profLength() == 0:
##            return next.clone().concat( *profiles[1:] )
##
##        if next.profLength() == 0:
##            return self.clone().concat( *profiles[1:] )
        

        for k, p in self.profiles.items():

            try:
                if isinstance( p, N.ndarray ):
                    r.set( k, N.concatenate( (p, next.get(k)) ),
                           **self.infos[k] )
                else:
                    r.set( k, p + next.get(k), **self.infos[k] )
            except:
                EHandler.warning("Profile %s skipped during concat." % k, 
                                 error=0)
                r.remove( k )

        return r.concat( *profiles[1:] )
Example #31
0
    def __substitute( self, fragments, name, value ):
        """
        Look in all not yet substituted fragments for parts that can be
        substituted by value and, if successful, create a new fragment
        
        @param fragments: fragment tuples
        @type  fragments: [ (str, str) ]
        @param name: substitution variable name
        @type  name: str
        @param value: susbtitution value in current environment
        @type  value: str
        
        @return: fragment tuples
        @rtype: [ (str, str) ]
        """
        result = []

        try:
            for abs, subst in fragments:

                if not subst:   ## unsubstituted fragment

##                     pos = abs.find( value )
                    pos = self.__find_subpath( abs, value )

                    if pos != -1:
                        end = pos + len( value )

                        f1, f2, f3 = abs[0:pos], abs[pos:end], abs[end:]

                        if f1:
                            result += [ (f1, None) ] ## unsubstituted head
                        result += [ (f2, name) ]     ## new substitution
                        if f3:
                            result += [ (f3, None) ] ## unsubstituted tail

                    else:
                        result += [ (abs, subst) ]
                else:
                    result += [ (abs, subst ) ]
        except OSError, why:
            EHandler.fatal("Substituting path fragments: \n" +
                                 str( fragments ) + '\nname: ' + str( name ) +
                                 '\nvalue:' + str( value ) )
Example #32
0
    def __substitute(self, fragments, name, value):
        """
        Look in all not yet substituted fragments for parts that can be
        substituted by value and, if successful, create a new fragment
        
        @param fragments: fragment tuples
        @type  fragments: [ (str, str) ]
        @param name: substitution variable name
        @type  name: str
        @param value: susbtitution value in current environment
        @type  value: str
        
        @return: fragment tuples
        @rtype: [ (str, str) ]
        """
        result = []

        try:
            for abs, subst in fragments:

                if not subst:  ## unsubstituted fragment

                    ##                     pos = abs.find( value )
                    pos = self.__find_subpath(abs, value)

                    if pos != -1:
                        end = pos + len(value)

                        f1, f2, f3 = abs[0:pos], abs[pos:end], abs[end:]

                        if f1:
                            result += [(f1, None)]  ## unsubstituted head
                        result += [(f2, name)]  ## new substitution
                        if f3:
                            result += [(f3, None)]  ## unsubstituted tail

                    else:
                        result += [(abs, subst)]
                else:
                    result += [(abs, subst)]
        except OSError, why:
            EHandler.fatal("Substituting path fragments: \n" + str(fragments) +
                           '\nname: ' + str(name) + '\nvalue:' + str(value))
Example #33
0
    def __syncModel(self, new_model, old_model):
        """
        Connect new rec or lig model to old one, to minimize storage.
        
        @param new_model: PDBModel / PCRModel
        @type  new_model: PDBModel
        @param old_model: PDBModel / PCRModel
        @type  old_model: PDBModel
        
        @return: PDBModel / PCRModel, new model that only keeps
                 changes relative to old, the old model becomes the
                 source of the new, if possible
        @rtype: PDBModel
        """
        ## try to fix atom order of new_model so that it is identical to old
        if old_model.equals(new_model) != [1, 1]:
            i_new, i_old = new_model.compareAtoms(old_model)

            if len(i_new) == len(new_model):
                new_model.keep(i_new)

        ## create result model that only keeps difference of new and old
        if old_model.equals(new_model) == [1, 1]:

            ## stays compatible with PCRModel.__init__ and PDBModel.__init
            r = old_model.__class__(source=old_model)

            r.setXyz(new_model.getXyz())

            ## check for profiles identical to source and adapt 'changed'
            r.update()

            if not MU.arrayEqual(r.xyz, old_model.xyz):
                r.removeProfile('relASA', 'ASA_sc', 'ASA_total', 'ASA_bb')

            return r

        EHandler.warning(
            'ComplexEvolving: Cannot connect new to old PDBModel.')

        new_model.disconnect()
        return new_model
Example #34
0
    def __syncModel( self, new_model, old_model ):
        """
        Connect new rec or lig model to old one, to minimize storage.
        
        @param new_model: PDBModel / PCRModel
        @type  new_model: PDBModel
        @param old_model: PDBModel / PCRModel
        @type  old_model: PDBModel
        
        @return: PDBModel / PCRModel, new model that only keeps
                 changes relative to old, the old model becomes the
                 source of the new, if possible
        @rtype: PDBModel
        """
        ## try to fix atom order of new_model so that it is identical to old
        if old_model.equals( new_model ) != [1,1]:
            i_new, i_old = new_model.compareAtoms( old_model )

            if len( i_new ) == len( new_model ):
                new_model.keep( i_new )

        ## create result model that only keeps difference of new and old
        if old_model.equals( new_model ) == [1,1]:

            ## stays compatible with PCRModel.__init__ and PDBModel.__init
            r = old_model.__class__( source=old_model )

            r.setXyz( new_model.getXyz() )

            ## check for profiles identical to source and adapt 'changed'
            r.update()  

            if not MU.arrayEqual( r.xyz, old_model.xyz ):
                r.removeProfile( 'relASA', 'ASA_sc', 'ASA_total', 'ASA_bb' )

            return r

        EHandler.warning(
            'ComplexEvolving: Cannot connect new to old PDBModel.')

        new_model.disconnect()
        return new_model
Example #35
0
    def getResult(self, **arg):
        """
        Collapse the results for different values of the variable parameter
        into lists and put the results into a tree ala::
          r[ member_index ][ protocol_name ][ result_field ] -> [ values ]

        @return: tree-like dict ordered by variable value, member, protocol
        @rtype: dict of dict of dict of lists
        """
        tree = self.dictionate(self.result)

        vvalues = tree.keys()
        vvalues.sort()

        keys = self.result.keys()
        sub_keys = [k for k in keys if k[0] == vvalues[0]]

        r = {}
        for v, member, protcl in sub_keys:

            try:
                if not member in r:
                    r[member] = {}

                r[member][protcl] = {}

                run_dic = tree[v][member][protcl]

                for k in run_dic.keys():
                    r[member][protcl][k] = [ tree[v][member][protcl][k] \
                                             for v in vvalues ]
            except:
                EHandler.warning('missing result: ' + str(T.lastError()))

        r['var'] = self.var
        r['vrange'] = self.vrange
        r['protocols'] = self.protocols

        self.result_tree = r
        return r
Example #36
0
    def getResult( self, **arg ):
        """
        Collapse the results for different values of the variable parameter
        into lists and put the results into a tree ala::
          r[ member_index ][ protocol_name ][ result_field ] -> [ values ]

        @return: tree-like dict ordered by variable value, member, protocol
        @rtype: dict of dict of dict of lists
        """
        tree = self.dictionate( self.result )

        vvalues = tree.keys()
        vvalues.sort()

        keys = self.result.keys()
        sub_keys = [ k for k in keys if k[0] == vvalues[0] ]

        r = {}
        for v, member, protcl in sub_keys:

            try:
                if not member in r:
                    r[member] = {}

                r[member][protcl] = {}

                run_dic = tree[v][member][protcl]

                for k in run_dic.keys():
                    r[member][protcl][k] = [ tree[v][member][protcl][k] \
                                             for v in vvalues ]
            except:
                EHandler.warning('missing result: ' + str(T.lastError()))

        r['var'] = self.var
        r['vrange']= self.vrange
        r['protocols'] = self.protocols

        self.result_tree = r
        return r
Example #37
0
    def validate(self):
        """
        Validate the path to the binary.
        
        @raise ExeConfigError: if environment is not fit for running
                               the program
        """
        try:
            self.bin = T.absbinary(self.bin)  ## raises IOError if not found

            missing = self.update_environment()
            report = '%s is missing environment variables: %r'\
                     % (self.name, missing )

            if missing and self.strict:
                raise ExeConfigError, report

            if missing:
                EHandler.warning(report)

        except IOError, why:
            raise ExeConfigError, str(why) + ' Check %s!' % self.dat
Example #38
0
    def isnoise(self, score, n_samples=1000):
        """
        Test sample how a given score performs at predicting items in the
        positive list compared to its 'performance' at  predicting random
        elements. The result corresponds to a two-tailed P value.
        See L{utest} for the analytical solution.
        @param score: the score predicted for each item
        @type  score: [ float ]
        @param n_samples: number of random samples
        @type  n_samples: int

        @return: probability P that the prediction success of score is just
        a random effect (1.0 means it's just perfectly random).
        """
        from Biskit import EHandler

        ## list of random deviations from diagonal area 0.5
        a_rand = [
            self.area(c) - 0.5
            for c in self.random_roccurves(score, n_samples)
        ]

        sd_rand = N.std(a_rand)
        av_rand = N.mean(a_rand)

        if round(av_rand, 2) != 0.0:
            EHandler.warning('random sampling is skewed by %f' %
                             (av_rand - 0.0))

        a = self.rocarea(score)
        z = a / sd_rand

        ## probability that a sample falls *within* z stdevs from the mean
        p = L.erf(z / N.sqrt(2))

        ## probability that the score hits just at random
        return 1.0 - p
Example #39
0
    def __writeBlastResult(self, parsed_blast, outFile):
        """
        Write the result from the blast search to file (similar to the
        output produced by a regular blast run).

        writeBlastResult( parsed_blast, outFile )

        @param parsed_blast: Bio.Blast.Record.Blast
        @type  parsed_blast: Bio.Blast.Record.Blast
        @param outFile: file to write the blast result to
        @type  outFile: str
        """
        try:
            f = open(T.absfile(outFile), 'w')

            i = 1
            for alignment in parsed_blast.alignments:
                for hsp in alignment.hsps:
                    s = string.replace(alignment.title, '\n', ' ')
                    s = string.replace(s, 'pdb|', '\npdb|')
                    f.write('Sequence %i: %s\n' % (i, s))
                    f.write('Score: %3.1f \tE-value: %2.1e\n'\
                            %(hsp.score, hsp.expect))
                    f.write('Lenght/Identities: %r\tPositives: %r\tGaps: %r\n'\
                            %(hsp.identities, hsp.positives, hsp.gaps))

                    f.write('%s\n' % hsp.query)
                    f.write('%s\n' % hsp.match)
                    f.write('%s\n\n' % hsp.sbjct)
                    i += 1
            f.close()
        except Exception, why:
            EHandler.warning("Error while writing blast result to %s" %
                             outFile)
            globals().update(locals())
            EHandler.warning("function namespace published to globals")
Example #40
0
issued if pypvm is missing and the public classes are exported as
Pseudo-classes. Pseudo classes are empty and raise an ImportError when you
try to initialize them. See also L{Biskit.tools.tryImport}.
"""
## import user  ## ensure that ~/.pythonrc.py is executed

##
## error-tolerant export of public classes
##
from Biskit import EHandler
import Biskit.tools as T

pvm_installed = True

pvm_installed = T.tryImport('TrackingJobMaster',
                            'TrackingJobMaster',
                            namespace=globals())
pvm_installed = T.tryImport('dispatcher', 'JobSlave',
                            namespace=globals()) and pvm_installed

if not pvm_installed:
    EHandler.warning(
        'Could not import PVM (Parallel Virtual Machine) modules.' +
        ' Please check that PVM and pypvm are installed!\n' +
        '\tParallelisation is not available.')

##
## clean up
##
del EHandler, T
Example #41
0
    #    searcher.localPSIBlast( target, seq_db, e=0.1, alignments=1000)

    ## local Blast
    searcher.localBlast(f_target, seq_db, 'blastp', alignments=500, e=0.0001)

    ## cluster blast results. Defaults: simCut=1.75, lenCut=0.9, ncpu=1
    ## expects all.fasta
    #    searcher.clusterFastaIterative( )
    searcher.clusterFasta()

    searcher.writeFastaClustered()

    tools.flushPrint('Done.\n')

except:
    EHandler.error('Error while searching for homologues.')

###############
## TemplateSearcher
##
## Find modelling templates, blasting the target sequence against "tmp_db"
## Cluster the sequences and download the pdbs to templates/all

## input: target.fasta
##
## output: templates/blast.out
##         templates/all.fasta
##         templates/cluster_result.out
##         templates/nr.fasta              (input for Aligner)
##         templates/all/*.pdb
##         templates/nr/chain_index.txt    (input for TemplateCleaner)
Example #42
0
## note 1: If there are more than approximately 50 sequences overall
##         t_coffe will eat all the memory and the job will not finish
##         This should be fixed in more recent versions of T-Coffee
##         (v > 3.2) where T-Coffee, according to the manual "switches
##         to a heuristic mode, named DPA, where DPA stands for Double
##         Progressive Alignment."

## note 2: If there is only one template structure step 2 of T-coffee
##         will not work. Solution, skip the structural alignment if
##         only one template structure is provided.

## note 3: In quite som cases the sequence retrieved from the nrpdb
##         sequence database is different from the sequence extracted
##         from the coordinates in the pdb-file. This will sometimes
##         cause t-coffee to terminate with an error (two sequences
##         with the same name but with different sequences). Temporary
##         solution: Choose another  structure from the same cluster
##         as the troublemaker.

try:
    a = Aligner(outFolder, log, verbose=1, sap=sap)

    a.align_for_modeller_inp()

    a.go(host)

except:
    EHandler.error('Error while building alingnments.')
    print "\nalign.py -? or align.py -help for help screen"
Example #43
0
if '?' in options or 'help' in options:
    _use( defaultOptions() )

log = None
if options['log']:
    log = LogFile( outFolder + '/' + options['log'], 'a' ) 

###################
## TemplateCleaner
##
## Prepare pdb files in templates/nr for T-coffee and modeller
## (replace nonstandard residues, remove hydrogens,
#    remove atoms with nultiple configurations, etc.)

## input: templates/nr/*.pdb
##        templates/nr/chain_index.txt
##
## output: templates/t_coffee/*.alpha    (input for Alignar)
##         templates/modeller/*.pdb      (input for Modeller)

try:
    cleaner = TemplateCleaner( outFolder, log )

    inp_dic = modUtils.parse_tabbed_file( chIndex )

    cleaner.process_all( inp_dic )

except:
    EHandler.error( 'Error while cleaning templates')

Example #44
0
Protein-protein docking related modules
"""
from Biskit import EHandler

try:
    from Complex import Complex
    from ComplexVC import ComplexVC
    from ComplexVCList import ComplexVCList
    from ComplexList import ComplexList
    from ComplexTraj import ComplexTraj
    from ComplexModelRegistry import ComplexModelRegistry
    from ComplexRandomizer import ComplexRandomizer
    from Docker import Docker
    from FixedList import FixedList
    from HexParser import HexParser
    from delphiBindingEnergy import DelphiBindingEnergy
##     from Intervor import Intervor
##     from PatchGenerator import PatchGenerator
##     from PatchGeneratorFromOrbit import PatchGeneratorFromOrbit

except IOError, why:
    EHandler.warning("Couldn't import all Biskit.Dock modules.\n" + str(why))

## PVM-dependent modules
try:
    from ContactMaster import ContactMaster
    from ContactSlave import ContactSlave
except Exception, why:
    EHandler.warning("Couldn't import PVM-dependent modules of Biskit.Dock.\n"+\
                     str( why ) )
Example #45
0
    def concatEnsembles( self, *traj ):
        """
        Concatenate this with other trajectories in a zig zac manner,
        resulting in an ensembleTraj with additional members.
        The ref model of the new Trajectory is a 'semi-deep' copy of this
        trajectorie's model.(see L{PDBModel.take()} )::
          concat( traj [, traj2, traj3, ..] ) -> Trajectory
        
        @param traj: with identical atoms as this one
        @type  traj: one or more EnsembleTrajectory

        @todo: fix so that pc, and profiles are not lost
        """
        if len( traj ) == 0:
            return self

        r = self.__class__( n_members = self.n_members + traj[0].n_members )

        min_members = min( self.n_members, traj[0].n_members )
        min_frames = min( self.lenFrames(), traj[0].lenFrames() )

        steps = self.lenFrames()/self.n_members + \
                traj[0].lenFrames()/traj[0].n_members

        def __everyOther( traj_0, traj_1, list_0, list_1, minMembers,
                          minFrames, loops ):
            result = []
            for j in range( 0, minMembers/2 ):

                for i in range( j*loops , j*loops + minFrames*2/minMembers ):
                    result += [ list_0[i] ]
                    result += [ list_1[i] ]

                while i < j*traj_0.n_members:
                    result += [ list_0[i] ]

                while i < j*traj_1.n_members:
                    result += [ list_1[i] ]

            return result

        frames = __everyOther( self, traj[0], self.frames,
                               traj[0].frames, min_members,
                               min_frames, steps )

        r.frames = N.array(frames) 
        r.setRef( self.ref.clone())

        if self.frameNames and traj[0].frameNames:
            r.frameNames =  __everyOther( self, traj[0], self.frameNames,
                                          traj[0].frameNames, min_members,
                                          min_frames, steps )
        try:
            # NOT TESTED!!
            if self.pc and traj[0].pc:
                r.pc['p'] =  __everyOther( self, traj[0], self.pc['p'],
                               traj[0].pc['p'], min_members, steps )

                r.pc['u'] =  __everyOther( self, traj[0], self.pc['u'],
                               traj[0].pc['u'], min_members, steps )

#                r.pc['p'] = N.concatenate( (self.pc['p'], traj[0].pc['p']),0)
#                r.pc['u'] = N.concatenate( (self.pc['u'], traj[0].pc['u']),0)
        except TypeError, why:
            EHandler.error('cannot concat PC '+str(why) )
Example #46
0
Protein-protein docking related modules
"""
from Biskit import EHandler

try:
    from Complex import Complex
    from ComplexEvolving import ComplexEvolving
    from ComplexEvolvingList import ComplexEvolvingList
    from ComplexList import ComplexList
    from ComplexTraj import ComplexTraj
    from ComplexModelRegistry import ComplexModelRegistry
    from ComplexRandomizer import ComplexRandomizer
    from Docker import Docker
    from FixedList import FixedList
    from HexParser import HexParser
    from delphiBindingEnergy import DelphiBindingEnergy
##     from Intervor import Intervor
##     from PatchGenerator import PatchGenerator
##     from PatchGeneratorFromOrbit import PatchGeneratorFromOrbit

except IOError, why:
    EHandler.warning("Couldn't import all Biskit.Dock modules.\n" + str(why))

## PVM-dependent modules
try:
    from ContactMaster import ContactMaster
    from ContactSlave import ContactSlave
except Exception, why:
    EHandler.warning("Couldn't import PVM-dependent modules of Biskit.Dock.\n"+\
                     str( why ) )
Example #47
0
#    searcher.localPSIBlast( target, seq_db, e=0.1, alignments=1000)

    ## local Blast
    searcher.localBlast( f_target, seq_db, 'blastp', alignments=500, e=0.0001 )

    ## cluster blast results. Defaults: simCut=1.75, lenCut=0.9, ncpu=1
    ## expects all.fasta
#    searcher.clusterFastaIterative( )
    searcher.clusterFasta() 

    searcher.writeFastaClustered()
    
    tools.flushPrint('Done.\n')
    
except:
    EHandler.error( 'Error while searching for homologues.')
    

###############
## TemplateSearcher
##
## Find modelling templates, blasting the target sequence against "tmp_db"
## Cluster the sequences and download the pdbs to templates/all

## input: target.fasta
##
## output: templates/blast.out
##         templates/all.fasta
##         templates/cluster_result.out
##         templates/nr.fasta              (input for Aligner)
##         templates/all/*.pdb
Example #48
0
if '?' in options or 'help' in options:
    _use(defaultOptions())

log = None
if options['log']:
    log = LogFile(outFolder + '/' + options['log'], 'a')

###################
## TemplateCleaner
##
## Prepare pdb files in templates/nr for T-coffee and modeller
## (replace nonstandard residues, remove hydrogens,
#    remove atoms with nultiple configurations, etc.)

## input: templates/nr/*.pdb
##        templates/nr/chain_index.txt
##
## output: templates/t_coffee/*.alpha    (input for Alignar)
##         templates/modeller/*.pdb      (input for Modeller)

try:
    cleaner = TemplateCleaner(outFolder, log)

    inp_dic = modUtils.parse_tabbed_file(chIndex)

    cleaner.process_all(inp_dic)

except:
    EHandler.error('Error while cleaning templates')
Example #49
0
##
## output: modeller/modeller.log
##                 /*.B9999000??   <- models
try:

    if options['verbose'] > 0:
        print "\n"+\
              "Type model.py -? or model.py -help for a full list of options!"

    m8 = M(**options)

    if not 'dry' in options:
        r = m8.run()  ## comment out for testing

except:
    EHandler.error('Error while modelling.')

#####################
## Show output

## show result in PyMol
if options.has_key('s'):
    names = []

    ## fit backbone of all models to average until convergence
    models = glob.glob('%s/target.B*' % (m8.outFolder + m8.F_RESULT_FOLDER))
    traj = Trajectory(models)
    traj.blockFit2ref(mask=traj[0].maskBB())

    ## calculate and print rmsd matrix
    rmsHeavy = traj.pairwiseRmsd()
Example #50
0
    def concatEnsembles(self, *traj):
        """
        Concatenate this with other trajectories in a zig zac manner,
        resulting in an ensembleTraj with additional members.
        The ref model of the new Trajectory is a 'semi-deep' copy of this
        trajectorie's model.(see L{PDBModel.take()} )::
          concat( traj [, traj2, traj3, ..] ) -> Trajectory
        
        @param traj: with identical atoms as this one
        @type  traj: one or more EnsembleTrajectory

        @todo: fix so that pc, and profiles are not lost
        """
        if len(traj) == 0:
            return self

        r = self.__class__(n_members=self.n_members + traj[0].n_members)

        min_members = min(self.n_members, traj[0].n_members)
        min_frames = min(self.lenFrames(), traj[0].lenFrames())

        steps = self.lenFrames()/self.n_members + \
                traj[0].lenFrames()/traj[0].n_members

        def __everyOther(traj_0, traj_1, list_0, list_1, minMembers, minFrames,
                         loops):
            result = []
            for j in range(0, minMembers / 2):

                for i in range(j * loops,
                               j * loops + minFrames * 2 / minMembers):
                    result += [list_0[i]]
                    result += [list_1[i]]

                while i < j * traj_0.n_members:
                    result += [list_0[i]]

                while i < j * traj_1.n_members:
                    result += [list_1[i]]

            return result

        frames = __everyOther(self, traj[0], self.frames, traj[0].frames,
                              min_members, min_frames, steps)

        r.frames = N0.array(frames)
        r.setRef(self.ref.clone())

        if self.frameNames and traj[0].frameNames:
            r.frameNames = __everyOther(self, traj[0], self.frameNames,
                                        traj[0].frameNames, min_members,
                                        min_frames, steps)
        try:
            # NOT TESTED!!
            if self.pc and traj[0].pc:
                r.pc['p'] = __everyOther(self, traj[0], self.pc['p'],
                                         traj[0].pc['p'], min_members, steps)

                r.pc['u'] = __everyOther(self, traj[0], self.pc['u'],
                                         traj[0].pc['u'], min_members, steps)

#                r.pc['p'] = N0.concatenate( (self.pc['p'], traj[0].pc['p']),0)
#                r.pc['u'] = N0.concatenate( (self.pc['u'], traj[0].pc['u']),0)
        except TypeError, why:
            EHandler.error('cannot concat PC ' + str(why))
Example #51
0
The compilation of PVM/pypvm can be tricky on some architectures. In order
to support installations that don't need parallelisation, only a warning is
issued if pypvm is missing and the public classes are exported as
Pseudo-classes. Pseudo classes are empty and raise an ImportError when you
try to initialize them. See also L{Biskit.tools.tryImport}.
"""
## import user  ## ensure that ~/.pythonrc.py is executed

##
## error-tolerant export of public classes
##
from Biskit import EHandler
import Biskit.tools as T

pvm_installed = True

pvm_installed = T.tryImport( 'TrackingJobMaster', 'TrackingJobMaster',
                             namespace=globals())
pvm_installed = T.tryImport( 'dispatcher', 'JobSlave',
                             namespace=globals() ) and pvm_installed

if not pvm_installed:
    EHandler.warning('Could not import PVM (Parallel Virtual Machine) modules.'+
        ' Please check that PVM and pypvm are installed!\n'+
        '\tParallelisation is not available.')

##
## clean up
##
del EHandler, T
Example #52
0
 def _cease( self, ref ):
     try:
         self.alive = False
     except:
         EHandler.warning('error in CrossView._cease')
         pass
Example #53
0
    def concat(self, *profiles):
        """
        Concatenate all profiles in this with corresponding profiles in the
        given ProfileCollection(s). Profiles that are not found in all
        ProfileCollections are skipped::
          p0.concat( p1 [, p2, ..]) -> single ProfileCollection with the
          same number of profiles as p0 but with the length of p0+p1+p2..

        @param profiles: profile(s) to concatenate
        @type  profiles: ProfileCollection(s)
        
        @return: concatenated profile(s)  
        @rtype: ProfileCollection / subclass
        """
        ## end recursion (no more arguments)
        if len(profiles) == 0:
            return self

        next = profiles[0]

        r = self.__class__()

        ##!!! BIG FAT WARNING: empty profilecollection does not imply empty model
        ## an empty PC w/o any profiles currently doesn't know which length
        ## is is supposed to have. If profLength == 0 for real, then
        ## the next PC's profiles don't need to be skipped
        ## Otherwise,
        ## this creates too-short profiles if the PC parent model has
        ## non-zero length and simply doesn't have any profiles registered.

        ##        ## special case 1: concat something to empty profile collection
        ##        if not self.keys():
        ##            return next.clone().concat( *profiles[1:] )
        ##
        ##        ## special case 2: concat empty profile collection to this one
        ##        if not next.keys():
        ##            return self.clone().concat( *profiles[1:] )
        ##
        allkeys = M.union(self.profiles.keys(), next.keys())

        ##        for k, p in self.profiles.items():
        for k in allkeys:
            p = self.profiles.get(k, None)
            pnext = next.profiles.get(k, None)
            infos = {}

            if p is None:
                default = next[k, 'default']
                p = self.__clonedefault(pnext, self.profLength(), default)
                infos = next.infos[k]

            if pnext is None:
                default = self[k, 'default']
                pnext = self.__clonedefault(p, next.profLength(), default)
                infos = self.infos[k]

            try:
                if isinstance(p, N.ndarray):

                    if len(pnext) == 0:
                        pnext = pnext.astype(p.dtype)

                    r.set(k, N.concatenate((p, pnext)), **infos)

                else:
                    r.set(k, p + pnext, **infos)
            except:
                EHandler.warning("Profile %s skipped during concat." % k,
                                 error=1)
                r.remove(k)

        return r.concat(*profiles[1:])
Example #54
0
##         t_coffe will eat all the memory and the job will not finish
##         This should be fixed in more recent versions of T-Coffee
##         (v > 3.2) where T-Coffee, according to the manual "switches
##         to a heuristic mode, named DPA, where DPA stands for Double
##         Progressive Alignment."
    
## note 2: If there is only one template structure step 2 of T-coffee
##         will not work. Solution, skip the structural alignment if
##         only one template structure is provided.

## note 3: In quite som cases the sequence retrieved from the nrpdb
##         sequence database is different from the sequence extracted
##         from the coordinates in the pdb-file. This will sometimes
##         cause t-coffee to terminate with an error (two sequences
##         with the same name but with different sequences). Temporary
##         solution: Choose another  structure from the same cluster
##         as the troublemaker.


try:
    a = Aligner( outFolder, log, verbose=1, sap=sap )

    a.align_for_modeller_inp()

    a.go(host)

except:
    EHandler.error( 'Error while building alingnments.')
    print "\nalign.py -? or align.py -help for help screen"