Exemple #1
0
    def __pairwiseDistances(self, u, v):
        """
        pairwise distance between 2 3-D numpy arrays of atom coordinates.

        @param u: coordinates
        @type  u: array
        @param v: coordinates
        @type  v: array
        
        @return: Numpy array len(u) x len(v)
        @rtype:array
        
        @author: Wolfgang Rieping.
        """
        ## check input
        if not type( u ) == arraytype or\
           not type( v ) == arraytype:
            raise ComplexError('unsupported argument type ' + \
                               str( type(u) ) + ' or ' + str( type(v) ) )

        diag1= N.diagonal(N.dot(u,N.transpose(u)))
        diag2= N.diagonal(N.dot(v,N.transpose(v)))
        dist= -N.dot(v,N.transpose(u))-N.transpose(N.dot(u,N.transpose(v)))
        dist= N.transpose(N.asarray(map(lambda column,a:column+a, \
                                   N.transpose(dist), diag1)))

        return N.transpose(N.sqrt(N.asarray(
            map(lambda row,a: row+a, dist, diag2))))
Exemple #2
0
def squared_distance_matrix(x, y):

    d1 = N.diagonal(N.dot(x, N.transpose(x)))
    d2 = N.diagonal(N.dot(y, N.transpose(y)))

    a1 = N.add.outer(d1,d2)
    a2 = N.dot(x, N.transpose(y))

    return a1 - 2 * a2
Exemple #3
0
def squared_distance_matrix(x, y):

    d1 = N.diagonal(N.dot(x, N.transpose(x)))
    d2 = N.diagonal(N.dot(y, N.transpose(y)))

    a1 = N.add.outer(d1, d2)
    a2 = N.dot(x, N.transpose(y))

    return a1 - 2 * a2
Exemple #4
0
 def __init__(self, points, k, normalization=NORM_NORM_T0_1, force=False):
     """
     calculate k polynomials of degree 0 to k-1 orthogonal on a set of distinct points
     map points to interval [-1,1]
     INPUT:  points: array of dictinct points where polynomials are orthogonal
             k: number of polynomials of degree 0 to k-1
             force=True creates basis even if orthogonality is not satisfied due to numerical error
     USES:   x: array of points mapped to [-1,1]
             T_: matrix of values of polynomials calculated at x, shape (k,len(x))
             TT_ = T_ * Numeric.transpose(T_)
             TTinv_ = inverse(TT_)
             sc_: scaling factors
             a, b: coefficients for calculating T (2k-4 different from 0, i.e. 6 for k=5)
             n: number of points = len(points)
             normalization = {0|1|2}
     """
     self.k = k  # number of basis polynomials of order 0 to k-1
     self._force = force
     self.points = Numeric.asarray(points, Numeric.Float)
     self.pointsMin = min(points)
     self.pointsMax = max(points)
     # scaling x to [-1,1] results in smaller a and b, T is not affected; overflow is NOT a problem!
     self.xMin = -1
     self.xMax = 1
     self.x = self._map(self.points, self.pointsMin, self.pointsMax, self.xMin, self.xMax)
     # calculate basis polynomials
     self.n = len(points) # the number of approximation points
     t = Numeric.zeros((k,self.n),Numeric.Float)
     a = Numeric.zeros((k,1),Numeric.Float)
     b = Numeric.zeros((k,1),Numeric.Float)
     t[0,:] = Numeric.ones(self.n,Numeric.Float)
     if k > 1: t[1,:] = self.x - sum(self.x)/self.n
     for i in range(1,k-1):
         a[i+1] = Numeric.innerproduct(self.x, t[i,:] * t[i,:]) / Numeric.innerproduct(t[i,:],t[i,:])
         b[i] = Numeric.innerproduct(t[i,:], t[i,:]) / Numeric.innerproduct(t[i-1,:],t[i-1,:])
         t[i+1,:] = (self.x - a[i+1]) * t[i,:] - b[i] * t[i-1,:]
     self.a = a
     self.b = b
     # prepare for approximation
     self._T0 = t
     # orthonormal
     _TT0 = Numeric.matrixmultiply(self._T0, Numeric.transpose(self._T0))
     self.sc1 = Numeric.sqrt(Numeric.reshape(Numeric.diagonal(_TT0),(self.k,1))) # scaling factors = sqrt sum squared self._T0
     self._T1 = self._T0 / self.sc1
     # orthonormal and T[0] == 1
     self.sc2 = Numeric.sqrt(Numeric.reshape(Numeric.diagonal(_TT0),(self.k,1)) / self.n) # scaling factors = sqrt 1/n * sum squared self._T0
     self._T2 = self._T0 / self.sc2
     # T[:,-1] == 1
     self.sc3 = Numeric.take(self._T0, (-1,), 1) # scaling factors = self._T0[:,-1]
     self._T3 = self._T0 / self.sc3
     # set the variables according to the chosen normalization
     self.setNormalization(normalization)
Exemple #5
0
def pairwiseDistances(u, v):
    """
    Pairwise distances between two arrays.

    @param u: first array 
    @type  u: array
    @param v: second array 
    @type  v: array

    @return: Numeric.array( len(u) x len(v) ) of double
    @rtype: array
    """
    diag1 = N.diagonal(N.dot(u, N.transpose(u)))
    diag2 = N.diagonal(N.dot(v, N.transpose(v)))
    dist = -N.dot( v,N.transpose(u) )\
         -N.transpose( N.dot( u, N.transpose(v) ) )
    dist = N.transpose( N.asarray( map( lambda column,a:column+a, \
                                        N.transpose(dist), diag1) ) )
    return N.transpose(
        N.sqrt(N.asarray(map(lambda row, a: row + a, dist, diag2))))
Exemple #6
0
def pairwiseDistances(u, v):
    """
    Pairwise distances between two arrays.

    @param u: first array 
    @type  u: array
    @param v: second array 
    @type  v: array

    @return: Numeric.array( len(u) x len(v) ) of double
    @rtype: array
    """
    diag1 = N.diagonal( N.dot( u, N.transpose(u) ) )
    diag2 = N.diagonal( N.dot( v, N.transpose(v) ) )
    dist = -N.dot( v,N.transpose(u) )\
         -N.transpose( N.dot( u, N.transpose(v) ) )
    dist = N.transpose( N.asarray( map( lambda column,a:column+a, \
                                        N.transpose(dist), diag1) ) )
    return N.transpose( N.sqrt( N.asarray(
        map( lambda row,a: row+a, dist, diag2 ) ) ))
Exemple #7
0
    def _removeDuplicateChains(self, chainMask=None):
        """
        Get rid of identical chains by comparing all chains with Blast2seq.

        @param chainMask: chain mask for overriding the
                          chain identity checking (default: None)
        @type  chainMask: [int]
        
        @return: number of chains removed
        @rtype: int
        """
        chainCount = len(self.chains)
        matrix = 1.0 * N.zeros((chainCount,chainCount))
        chain_ids = []

        ## create identity matrix for all chains against all chains
        for i in range(0, chainCount):
            chain_ids = chain_ids + [self.chains[i].chain_id] # collect for log file
            for j in range(i, len(self.chains)):

                # convert 3-letter-code res list into 1-letter-code String
                seq1 = singleAA( self.chains[i].sequence() )
                seq2 = singleAA( self.chains[j].sequence() )

##                 if len(seq1) > len(seq2):           # take shorter sequence
##                 # aln len at least half the len of the shortest sequence
##                     alnCutoff = len(seq2) * 0.5     
##                 else:
##                     alnCutoff = len(seq1) * 0.5
##                 if id['aln_len'] > alnCutoff:
##                     matrix[i,j] = id['aln_id']
##                 else:                           # aln length too short, ignore
##                     matrix[i,j] = 0

                matrix[i,j] = self._compareSequences( seq1, seq2 )

        ## report activity
        self.log.add("\n  Chain ID's of compared chains: "+str(chain_ids))
        self.log.add("  Cross-Identity between chains:\n"+str(matrix))
        self.log.add("  Identity threshold used: "+str(self.threshold))
        
        ## override the automatic chain deletion by supplying a
        ## chain mask to this function
        if chainMask:
            if len(chainMask) == chainCount:
                self.chains = N.compress(chainMask, self.chains)
                self.log.add("NOTE: chain mask %s used for removing chains.\n"%chainMask)
           
            else:
                self.log.add("########## ERROR ###############")
                self.log.add("# Chain mask is only %i chains long"%len(chainMask))
                self.log.add("# when a mask of length %i is needed"%chainCount)
                self.log.add("# No cleaning will be performed.\n")

        if not chainMask:
            ## look at diagonals in "identity matrix"
            ## (each chain against each)
            duplicate = len(self.chains)
            for offset in range(1,chainCount):
                diag = N.diagonal(matrix, offset ,0,1)
                # diagonal of 1's mark begin of duplicate
                avg = 1.0 * N.sum(diag)/len(diag)
                if (avg >= self.threshold):
                    duplicate = offset
                    break
            self.chains = self.chains[:duplicate]
            self.log.add("NOTE: Identity matrix will be used for removing identical chains.")

        ## report activit
        self.log.add(str(chainCount - len(self.chains))+\
                     " chains have been removed.\n")
        
        # how many chains have been removed?
        return (chainCount - len(self.chains))
Exemple #8
0
    def _removeDuplicateChains(self, chainMask=None):
        """
        Get rid of identical chains by comparing all chains with Blast2seq.

        @param chainMask: chain mask for overriding the
                          chain identity checking (default: None)
        @type  chainMask: [int]
        
        @return: number of chains removed
        @rtype: int
        """
        chainCount = len(self.chains)
        matrix = 1.0 * N.zeros((chainCount, chainCount))
        chain_ids = []

        ## create identity matrix for all chains against all chains
        for i in range(0, chainCount):
            chain_ids = chain_ids + [self.chains[i].chain_id
                                     ]  # collect for log file
            for j in range(i, len(self.chains)):

                # convert 3-letter-code res list into 1-letter-code String
                seq1 = singleAA(self.chains[i].sequence())
                seq2 = singleAA(self.chains[j].sequence())

                ##                 if len(seq1) > len(seq2):           # take shorter sequence
                ##                 # aln len at least half the len of the shortest sequence
                ##                     alnCutoff = len(seq2) * 0.5
                ##                 else:
                ##                     alnCutoff = len(seq1) * 0.5
                ##                 if id['aln_len'] > alnCutoff:
                ##                     matrix[i,j] = id['aln_id']
                ##                 else:                           # aln length too short, ignore
                ##                     matrix[i,j] = 0

                matrix[i, j] = self._compareSequences(seq1, seq2)

        ## report activity
        self.log.add("\n  Chain ID's of compared chains: " + str(chain_ids))
        self.log.add("  Cross-Identity between chains:\n" + str(matrix))
        self.log.add("  Identity threshold used: " + str(self.threshold))

        ## override the automatic chain deletion by supplying a
        ## chain mask to this function
        if chainMask:
            if len(chainMask) == chainCount:
                self.chains = N.compress(chainMask, self.chains)
                self.log.add(
                    "NOTE: chain mask %s used for removing chains.\n" %
                    chainMask)

            else:
                self.log.add("########## ERROR ###############")
                self.log.add("# Chain mask is only %i chains long" %
                             len(chainMask))
                self.log.add("# when a mask of length %i is needed" %
                             chainCount)
                self.log.add("# No cleaning will be performed.\n")

        if not chainMask:
            ## look at diagonals in "identity matrix"
            ## (each chain against each)
            duplicate = len(self.chains)
            for offset in range(1, chainCount):
                diag = N.diagonal(matrix, offset, 0, 1)
                # diagonal of 1's mark begin of duplicate
                avg = 1.0 * N.sum(diag) / len(diag)
                if (avg >= self.threshold):
                    duplicate = offset
                    break
            self.chains = self.chains[:duplicate]
            self.log.add(
                "NOTE: Identity matrix will be used for removing identical chains."
            )

        ## report activit
        self.log.add(str(chainCount - len(self.chains))+\
                     " chains have been removed.\n")

        # how many chains have been removed?
        return (chainCount - len(self.chains))
Exemple #9
0
 def clusterEntropy(self):
     centropy = N.diagonal(N.dot(self.msm,
                                 N.transpose(N.log(self.msm))))
     return -1/float(self.npoints)*centropy
Exemple #10
0
 def clusterEntropy(self):
     centropy = N.diagonal(N.dot(self.msm, N.transpose(N.log(self.msm))))
     return -1 / float(self.npoints) * centropy
Exemple #11
0
 def __init__(self, points, k, normalization=NORM_NORM_T0_1, force=False):
     """
     calculate k polynomials of degree 0 to k-1 orthogonal on a set of distinct points
     map points to interval [-1,1]
     INPUT:  points: array of dictinct points where polynomials are orthogonal
             k: number of polynomials of degree 0 to k-1
             force=True creates basis even if orthogonality is not satisfied due to numerical error
     USES:   x: array of points mapped to [-1,1]
             T_: matrix of values of polynomials calculated at x, shape (k,len(x))
             TT_ = T_ * Numeric.transpose(T_)
             TTinv_ = inverse(TT_)
             sc_: scaling factors
             a, b: coefficients for calculating T (2k-4 different from 0, i.e. 6 for k=5)
             n: number of points = len(points)
             normalization = {0|1|2}
     """
     self.k = k  # number of basis polynomials of order 0 to k-1
     self._force = force
     self.points = Numeric.asarray(points, Numeric.Float)
     self.pointsMin = min(points)
     self.pointsMax = max(points)
     # scaling x to [-1,1] results in smaller a and b, T is not affected; overflow is NOT a problem!
     self.xMin = -1
     self.xMax = 1
     self.x = self._map(self.points, self.pointsMin, self.pointsMax,
                        self.xMin, self.xMax)
     # calculate basis polynomials
     self.n = len(points)  # the number of approximation points
     t = Numeric.zeros((k, self.n), Numeric.Float)
     a = Numeric.zeros((k, 1), Numeric.Float)
     b = Numeric.zeros((k, 1), Numeric.Float)
     t[0, :] = Numeric.ones(self.n, Numeric.Float)
     if k > 1: t[1, :] = self.x - sum(self.x) / self.n
     for i in range(1, k - 1):
         a[i + 1] = Numeric.innerproduct(
             self.x, t[i, :] * t[i, :]) / Numeric.innerproduct(
                 t[i, :], t[i, :])
         b[i] = Numeric.innerproduct(t[i, :],
                                     t[i, :]) / Numeric.innerproduct(
                                         t[i - 1, :], t[i - 1, :])
         t[i + 1, :] = (self.x - a[i + 1]) * t[i, :] - b[i] * t[i - 1, :]
     self.a = a
     self.b = b
     # prepare for approximation
     self._T0 = t
     # orthonormal
     _TT0 = Numeric.matrixmultiply(self._T0, Numeric.transpose(self._T0))
     self.sc1 = Numeric.sqrt(
         Numeric.reshape(
             Numeric.diagonal(_TT0),
             (self.k, 1)))  # scaling factors = sqrt sum squared self._T0
     self._T1 = self._T0 / self.sc1
     # orthonormal and T[0] == 1
     self.sc2 = Numeric.sqrt(
         Numeric.reshape(Numeric.diagonal(_TT0), (self.k, 1)) /
         self.n)  # scaling factors = sqrt 1/n * sum squared self._T0
     self._T2 = self._T0 / self.sc2
     # T[:,-1] == 1
     self.sc3 = Numeric.take(self._T0, (-1, ),
                             1)  # scaling factors = self._T0[:,-1]
     self._T3 = self._T0 / self.sc3
     # set the variables according to the chosen normalization
     self.setNormalization(normalization)