Ejemplo n.º 1
0
    def test_ChainSeparator( self ):
        """ChainSeparator test"""
        self.sep = ChainSeparator( self.fname, self.outPath, 1)  

        self.chain = self.sep.next()

        i=1
        all_chains = []
        while self.chain <> None:
            if self.local:
                print 'Chain %i:'%i, ''.join(singleAA(self.chain.sequence() ) )
            all_chains += self.chain.sequence()
            self.chain = self.sep.next()
            i += 1

        if self.local:
            print 'ChainSeparator log file written to: %s'%self.sep.log.fname

        r = ''.join( singleAA( all_chains ) )
        self.assertEqual(r, self.EXPECTED)
Ejemplo n.º 2
0
    def test_ChainSeparator(self):
        """ChainSeparator test"""
        self.sep = ChainSeparator(self.fname, self.outPath, 1)

        self.chain = self.sep.next()

        i = 1
        all_chains = []
        while self.chain <> None:
            if self.local:
                print 'Chain %i:' % i, ''.join(singleAA(self.chain.sequence()))
            all_chains += self.chain.sequence()
            self.chain = self.sep.next()
            i += 1

        if self.local:
            print 'ChainSeparator log file written to: %s' % self.sep.log.fname

        r = ''.join(singleAA(all_chains))
        self.assertEqual(r, self.EXPECTED)
Ejemplo n.º 3
0
    def _removeDuplicateChains(self, chainMask=None):
        """
        Get rid of identical chains by comparing all chains with Blast2seq.

        @param chainMask: chain mask for overriding the
                          chain identity checking (default: None)
        @type  chainMask: [int]
        
        @return: number of chains removed
        @rtype: int
        """
        chainCount = len(self.chains)
        matrix = 1.0 * N.zeros((chainCount,chainCount))
        chain_ids = []

        ## create identity matrix for all chains against all chains
        for i in range(0, chainCount):
            chain_ids = chain_ids + [self.chains[i].chain_id] # collect for log file
            for j in range(i, len(self.chains)):

                # convert 3-letter-code res list into 1-letter-code String
                seq1 = singleAA( self.chains[i].sequence() )
                seq2 = singleAA( self.chains[j].sequence() )

##                 if len(seq1) > len(seq2):           # take shorter sequence
##                 # aln len at least half the len of the shortest sequence
##                     alnCutoff = len(seq2) * 0.5     
##                 else:
##                     alnCutoff = len(seq1) * 0.5
##                 if id['aln_len'] > alnCutoff:
##                     matrix[i,j] = id['aln_id']
##                 else:                           # aln length too short, ignore
##                     matrix[i,j] = 0

                matrix[i,j] = self._compareSequences( seq1, seq2 )

        ## report activity
        self.log.add("\n  Chain ID's of compared chains: "+str(chain_ids))
        self.log.add("  Cross-Identity between chains:\n"+str(matrix))
        self.log.add("  Identity threshold used: "+str(self.threshold))
        
        ## override the automatic chain deletion by supplying a
        ## chain mask to this function
        if chainMask:
            if len(chainMask) == chainCount:
                self.chains = N.compress(chainMask, self.chains)
                self.log.add("NOTE: chain mask %s used for removing chains.\n"%chainMask)
           
            else:
                self.log.add("########## ERROR ###############")
                self.log.add("# Chain mask is only %i chains long"%len(chainMask))
                self.log.add("# when a mask of length %i is needed"%chainCount)
                self.log.add("# No cleaning will be performed.\n")

        if not chainMask:
            ## look at diagonals in "identity matrix"
            ## (each chain against each)
            duplicate = len(self.chains)
            for offset in range(1,chainCount):
                diag = N.diagonal(matrix, offset ,0,1)
                # diagonal of 1's mark begin of duplicate
                avg = 1.0 * N.sum(diag)/len(diag)
                if (avg >= self.threshold):
                    duplicate = offset
                    break
            self.chains = self.chains[:duplicate]
            self.log.add("NOTE: Identity matrix will be used for removing identical chains.")

        ## report activit
        self.log.add(str(chainCount - len(self.chains))+\
                     " chains have been removed.\n")
        
        # how many chains have been removed?
        return (chainCount - len(self.chains))
Ejemplo n.º 4
0
    def _removeDuplicateChains(self, chainMask=None):
        """
        Get rid of identical chains by comparing all chains with Blast2seq.

        @param chainMask: chain mask for overriding the
                          chain identity checking (default: None)
        @type  chainMask: [int]
        
        @return: number of chains removed
        @rtype: int
        """
        chainCount = len(self.chains)
        matrix = 1.0 * N.zeros((chainCount, chainCount))
        chain_ids = []

        ## create identity matrix for all chains against all chains
        for i in range(0, chainCount):
            chain_ids = chain_ids + [self.chains[i].chain_id
                                     ]  # collect for log file
            for j in range(i, len(self.chains)):

                # convert 3-letter-code res list into 1-letter-code String
                seq1 = singleAA(self.chains[i].sequence())
                seq2 = singleAA(self.chains[j].sequence())

                ##                 if len(seq1) > len(seq2):           # take shorter sequence
                ##                 # aln len at least half the len of the shortest sequence
                ##                     alnCutoff = len(seq2) * 0.5
                ##                 else:
                ##                     alnCutoff = len(seq1) * 0.5
                ##                 if id['aln_len'] > alnCutoff:
                ##                     matrix[i,j] = id['aln_id']
                ##                 else:                           # aln length too short, ignore
                ##                     matrix[i,j] = 0

                matrix[i, j] = self._compareSequences(seq1, seq2)

        ## report activity
        self.log.add("\n  Chain ID's of compared chains: " + str(chain_ids))
        self.log.add("  Cross-Identity between chains:\n" + str(matrix))
        self.log.add("  Identity threshold used: " + str(self.threshold))

        ## override the automatic chain deletion by supplying a
        ## chain mask to this function
        if chainMask:
            if len(chainMask) == chainCount:
                self.chains = N.compress(chainMask, self.chains)
                self.log.add(
                    "NOTE: chain mask %s used for removing chains.\n" %
                    chainMask)

            else:
                self.log.add("########## ERROR ###############")
                self.log.add("# Chain mask is only %i chains long" %
                             len(chainMask))
                self.log.add("# when a mask of length %i is needed" %
                             chainCount)
                self.log.add("# No cleaning will be performed.\n")

        if not chainMask:
            ## look at diagonals in "identity matrix"
            ## (each chain against each)
            duplicate = len(self.chains)
            for offset in range(1, chainCount):
                diag = N.diagonal(matrix, offset, 0, 1)
                # diagonal of 1's mark begin of duplicate
                avg = 1.0 * N.sum(diag) / len(diag)
                if (avg >= self.threshold):
                    duplicate = offset
                    break
            self.chains = self.chains[:duplicate]
            self.log.add(
                "NOTE: Identity matrix will be used for removing identical chains."
            )

        ## report activit
        self.log.add(str(chainCount - len(self.chains))+\
                     " chains have been removed.\n")

        # how many chains have been removed?
        return (chainCount - len(self.chains))