def __pairwiseDistances(self, u, v): """ pairwise distance between 2 3-D numpy arrays of atom coordinates. @param u: coordinates @type u: array @param v: coordinates @type v: array @return: Numpy array len(u) x len(v) @rtype:array @author: Wolfgang Rieping. """ ## check input if not type( u ) == arraytype or\ not type( v ) == arraytype: raise ComplexError('unsupported argument type ' + \ str( type(u) ) + ' or ' + str( type(v) ) ) diag1= N.diagonal(N.dot(u,N.transpose(u))) diag2= N.diagonal(N.dot(v,N.transpose(v))) dist= -N.dot(v,N.transpose(u))-N.transpose(N.dot(u,N.transpose(v))) dist= N.transpose(N.asarray(map(lambda column,a:column+a, \ N.transpose(dist), diag1))) return N.transpose(N.sqrt(N.asarray( map(lambda row,a: row+a, dist, diag2))))
def squared_distance_matrix(x, y): d1 = N.diagonal(N.dot(x, N.transpose(x))) d2 = N.diagonal(N.dot(y, N.transpose(y))) a1 = N.add.outer(d1,d2) a2 = N.dot(x, N.transpose(y)) return a1 - 2 * a2
def squared_distance_matrix(x, y): d1 = N.diagonal(N.dot(x, N.transpose(x))) d2 = N.diagonal(N.dot(y, N.transpose(y))) a1 = N.add.outer(d1, d2) a2 = N.dot(x, N.transpose(y)) return a1 - 2 * a2
def __init__(self, points, k, normalization=NORM_NORM_T0_1, force=False): """ calculate k polynomials of degree 0 to k-1 orthogonal on a set of distinct points map points to interval [-1,1] INPUT: points: array of dictinct points where polynomials are orthogonal k: number of polynomials of degree 0 to k-1 force=True creates basis even if orthogonality is not satisfied due to numerical error USES: x: array of points mapped to [-1,1] T_: matrix of values of polynomials calculated at x, shape (k,len(x)) TT_ = T_ * Numeric.transpose(T_) TTinv_ = inverse(TT_) sc_: scaling factors a, b: coefficients for calculating T (2k-4 different from 0, i.e. 6 for k=5) n: number of points = len(points) normalization = {0|1|2} """ self.k = k # number of basis polynomials of order 0 to k-1 self._force = force self.points = Numeric.asarray(points, Numeric.Float) self.pointsMin = min(points) self.pointsMax = max(points) # scaling x to [-1,1] results in smaller a and b, T is not affected; overflow is NOT a problem! self.xMin = -1 self.xMax = 1 self.x = self._map(self.points, self.pointsMin, self.pointsMax, self.xMin, self.xMax) # calculate basis polynomials self.n = len(points) # the number of approximation points t = Numeric.zeros((k,self.n),Numeric.Float) a = Numeric.zeros((k,1),Numeric.Float) b = Numeric.zeros((k,1),Numeric.Float) t[0,:] = Numeric.ones(self.n,Numeric.Float) if k > 1: t[1,:] = self.x - sum(self.x)/self.n for i in range(1,k-1): a[i+1] = Numeric.innerproduct(self.x, t[i,:] * t[i,:]) / Numeric.innerproduct(t[i,:],t[i,:]) b[i] = Numeric.innerproduct(t[i,:], t[i,:]) / Numeric.innerproduct(t[i-1,:],t[i-1,:]) t[i+1,:] = (self.x - a[i+1]) * t[i,:] - b[i] * t[i-1,:] self.a = a self.b = b # prepare for approximation self._T0 = t # orthonormal _TT0 = Numeric.matrixmultiply(self._T0, Numeric.transpose(self._T0)) self.sc1 = Numeric.sqrt(Numeric.reshape(Numeric.diagonal(_TT0),(self.k,1))) # scaling factors = sqrt sum squared self._T0 self._T1 = self._T0 / self.sc1 # orthonormal and T[0] == 1 self.sc2 = Numeric.sqrt(Numeric.reshape(Numeric.diagonal(_TT0),(self.k,1)) / self.n) # scaling factors = sqrt 1/n * sum squared self._T0 self._T2 = self._T0 / self.sc2 # T[:,-1] == 1 self.sc3 = Numeric.take(self._T0, (-1,), 1) # scaling factors = self._T0[:,-1] self._T3 = self._T0 / self.sc3 # set the variables according to the chosen normalization self.setNormalization(normalization)
def pairwiseDistances(u, v): """ Pairwise distances between two arrays. @param u: first array @type u: array @param v: second array @type v: array @return: Numeric.array( len(u) x len(v) ) of double @rtype: array """ diag1 = N.diagonal(N.dot(u, N.transpose(u))) diag2 = N.diagonal(N.dot(v, N.transpose(v))) dist = -N.dot( v,N.transpose(u) )\ -N.transpose( N.dot( u, N.transpose(v) ) ) dist = N.transpose( N.asarray( map( lambda column,a:column+a, \ N.transpose(dist), diag1) ) ) return N.transpose( N.sqrt(N.asarray(map(lambda row, a: row + a, dist, diag2))))
def pairwiseDistances(u, v): """ Pairwise distances between two arrays. @param u: first array @type u: array @param v: second array @type v: array @return: Numeric.array( len(u) x len(v) ) of double @rtype: array """ diag1 = N.diagonal( N.dot( u, N.transpose(u) ) ) diag2 = N.diagonal( N.dot( v, N.transpose(v) ) ) dist = -N.dot( v,N.transpose(u) )\ -N.transpose( N.dot( u, N.transpose(v) ) ) dist = N.transpose( N.asarray( map( lambda column,a:column+a, \ N.transpose(dist), diag1) ) ) return N.transpose( N.sqrt( N.asarray( map( lambda row,a: row+a, dist, diag2 ) ) ))
def _removeDuplicateChains(self, chainMask=None): """ Get rid of identical chains by comparing all chains with Blast2seq. @param chainMask: chain mask for overriding the chain identity checking (default: None) @type chainMask: [int] @return: number of chains removed @rtype: int """ chainCount = len(self.chains) matrix = 1.0 * N.zeros((chainCount,chainCount)) chain_ids = [] ## create identity matrix for all chains against all chains for i in range(0, chainCount): chain_ids = chain_ids + [self.chains[i].chain_id] # collect for log file for j in range(i, len(self.chains)): # convert 3-letter-code res list into 1-letter-code String seq1 = singleAA( self.chains[i].sequence() ) seq2 = singleAA( self.chains[j].sequence() ) ## if len(seq1) > len(seq2): # take shorter sequence ## # aln len at least half the len of the shortest sequence ## alnCutoff = len(seq2) * 0.5 ## else: ## alnCutoff = len(seq1) * 0.5 ## if id['aln_len'] > alnCutoff: ## matrix[i,j] = id['aln_id'] ## else: # aln length too short, ignore ## matrix[i,j] = 0 matrix[i,j] = self._compareSequences( seq1, seq2 ) ## report activity self.log.add("\n Chain ID's of compared chains: "+str(chain_ids)) self.log.add(" Cross-Identity between chains:\n"+str(matrix)) self.log.add(" Identity threshold used: "+str(self.threshold)) ## override the automatic chain deletion by supplying a ## chain mask to this function if chainMask: if len(chainMask) == chainCount: self.chains = N.compress(chainMask, self.chains) self.log.add("NOTE: chain mask %s used for removing chains.\n"%chainMask) else: self.log.add("########## ERROR ###############") self.log.add("# Chain mask is only %i chains long"%len(chainMask)) self.log.add("# when a mask of length %i is needed"%chainCount) self.log.add("# No cleaning will be performed.\n") if not chainMask: ## look at diagonals in "identity matrix" ## (each chain against each) duplicate = len(self.chains) for offset in range(1,chainCount): diag = N.diagonal(matrix, offset ,0,1) # diagonal of 1's mark begin of duplicate avg = 1.0 * N.sum(diag)/len(diag) if (avg >= self.threshold): duplicate = offset break self.chains = self.chains[:duplicate] self.log.add("NOTE: Identity matrix will be used for removing identical chains.") ## report activit self.log.add(str(chainCount - len(self.chains))+\ " chains have been removed.\n") # how many chains have been removed? return (chainCount - len(self.chains))
def _removeDuplicateChains(self, chainMask=None): """ Get rid of identical chains by comparing all chains with Blast2seq. @param chainMask: chain mask for overriding the chain identity checking (default: None) @type chainMask: [int] @return: number of chains removed @rtype: int """ chainCount = len(self.chains) matrix = 1.0 * N.zeros((chainCount, chainCount)) chain_ids = [] ## create identity matrix for all chains against all chains for i in range(0, chainCount): chain_ids = chain_ids + [self.chains[i].chain_id ] # collect for log file for j in range(i, len(self.chains)): # convert 3-letter-code res list into 1-letter-code String seq1 = singleAA(self.chains[i].sequence()) seq2 = singleAA(self.chains[j].sequence()) ## if len(seq1) > len(seq2): # take shorter sequence ## # aln len at least half the len of the shortest sequence ## alnCutoff = len(seq2) * 0.5 ## else: ## alnCutoff = len(seq1) * 0.5 ## if id['aln_len'] > alnCutoff: ## matrix[i,j] = id['aln_id'] ## else: # aln length too short, ignore ## matrix[i,j] = 0 matrix[i, j] = self._compareSequences(seq1, seq2) ## report activity self.log.add("\n Chain ID's of compared chains: " + str(chain_ids)) self.log.add(" Cross-Identity between chains:\n" + str(matrix)) self.log.add(" Identity threshold used: " + str(self.threshold)) ## override the automatic chain deletion by supplying a ## chain mask to this function if chainMask: if len(chainMask) == chainCount: self.chains = N.compress(chainMask, self.chains) self.log.add( "NOTE: chain mask %s used for removing chains.\n" % chainMask) else: self.log.add("########## ERROR ###############") self.log.add("# Chain mask is only %i chains long" % len(chainMask)) self.log.add("# when a mask of length %i is needed" % chainCount) self.log.add("# No cleaning will be performed.\n") if not chainMask: ## look at diagonals in "identity matrix" ## (each chain against each) duplicate = len(self.chains) for offset in range(1, chainCount): diag = N.diagonal(matrix, offset, 0, 1) # diagonal of 1's mark begin of duplicate avg = 1.0 * N.sum(diag) / len(diag) if (avg >= self.threshold): duplicate = offset break self.chains = self.chains[:duplicate] self.log.add( "NOTE: Identity matrix will be used for removing identical chains." ) ## report activit self.log.add(str(chainCount - len(self.chains))+\ " chains have been removed.\n") # how many chains have been removed? return (chainCount - len(self.chains))
def clusterEntropy(self): centropy = N.diagonal(N.dot(self.msm, N.transpose(N.log(self.msm)))) return -1/float(self.npoints)*centropy
def clusterEntropy(self): centropy = N.diagonal(N.dot(self.msm, N.transpose(N.log(self.msm)))) return -1 / float(self.npoints) * centropy
def __init__(self, points, k, normalization=NORM_NORM_T0_1, force=False): """ calculate k polynomials of degree 0 to k-1 orthogonal on a set of distinct points map points to interval [-1,1] INPUT: points: array of dictinct points where polynomials are orthogonal k: number of polynomials of degree 0 to k-1 force=True creates basis even if orthogonality is not satisfied due to numerical error USES: x: array of points mapped to [-1,1] T_: matrix of values of polynomials calculated at x, shape (k,len(x)) TT_ = T_ * Numeric.transpose(T_) TTinv_ = inverse(TT_) sc_: scaling factors a, b: coefficients for calculating T (2k-4 different from 0, i.e. 6 for k=5) n: number of points = len(points) normalization = {0|1|2} """ self.k = k # number of basis polynomials of order 0 to k-1 self._force = force self.points = Numeric.asarray(points, Numeric.Float) self.pointsMin = min(points) self.pointsMax = max(points) # scaling x to [-1,1] results in smaller a and b, T is not affected; overflow is NOT a problem! self.xMin = -1 self.xMax = 1 self.x = self._map(self.points, self.pointsMin, self.pointsMax, self.xMin, self.xMax) # calculate basis polynomials self.n = len(points) # the number of approximation points t = Numeric.zeros((k, self.n), Numeric.Float) a = Numeric.zeros((k, 1), Numeric.Float) b = Numeric.zeros((k, 1), Numeric.Float) t[0, :] = Numeric.ones(self.n, Numeric.Float) if k > 1: t[1, :] = self.x - sum(self.x) / self.n for i in range(1, k - 1): a[i + 1] = Numeric.innerproduct( self.x, t[i, :] * t[i, :]) / Numeric.innerproduct( t[i, :], t[i, :]) b[i] = Numeric.innerproduct(t[i, :], t[i, :]) / Numeric.innerproduct( t[i - 1, :], t[i - 1, :]) t[i + 1, :] = (self.x - a[i + 1]) * t[i, :] - b[i] * t[i - 1, :] self.a = a self.b = b # prepare for approximation self._T0 = t # orthonormal _TT0 = Numeric.matrixmultiply(self._T0, Numeric.transpose(self._T0)) self.sc1 = Numeric.sqrt( Numeric.reshape( Numeric.diagonal(_TT0), (self.k, 1))) # scaling factors = sqrt sum squared self._T0 self._T1 = self._T0 / self.sc1 # orthonormal and T[0] == 1 self.sc2 = Numeric.sqrt( Numeric.reshape(Numeric.diagonal(_TT0), (self.k, 1)) / self.n) # scaling factors = sqrt 1/n * sum squared self._T0 self._T2 = self._T0 / self.sc2 # T[:,-1] == 1 self.sc3 = Numeric.take(self._T0, (-1, ), 1) # scaling factors = self._T0[:,-1] self._T3 = self._T0 / self.sc3 # set the variables according to the chosen normalization self.setNormalization(normalization)