def __findTransformation(self, x, y): """ Match two arrays by rotation and translation. Returns the rotation matrix and the translation vector. Back transformation: for atom i new coordinates will be:: y_new[i] = N0.dot(r, y[i]) + t for all atoms in one step:: y_new = N0.dot(y, N0.transpose(r)) + t @param x: coordinates @type x: array @param y: coordinates @type y: array @return: rotation matrix, translation vector @rtype: array, array @author: Michael Habeck """ from numpy.linalg import svd ## center configurations x_av = N0.sum(x) / len(x) y_av = N0.sum(y) / len(y) x = x - x_av y = y - y_av ## svd of correlation matrix v, l, u = svd(N0.dot(N0.transpose(x), y)) ## build rotation matrix and translation vector r = N0.dot(v, u) t = x_av - N0.dot(r, y_av) return r, t
def findTransformation(x, y): """ Match two arrays by rotation and translation. Returns the rotation matrix and the translation vector. :param x: first set of coordinates :type x: array('f') :param y: second set of coordinates :type y: array('f') :return: rotation matrix (3x3) and translation vector (1x3) :rtype: array, array """ ## center configurations x_av = N0.average(x) y_av = N0.average(y) x = x - x_av y = y - y_av ## svd of correlation matrix v, l, u = svd(N0.dot(N0.transpose(x), y)) ## build rotation matrix and translation vector r = N0.dot(v, u) t = x_av - N0.dot(r, y_av) return r, t
def __pairwiseDistances(self, u, v): """ pairwise distance between 2 3-D numpy arrays of atom coordinates. @param u: coordinates @type u: array @param v: coordinates @type v: array @return: Numpy array len(u) x len(v) @rtype:array @author: Wolfgang Rieping. """ ## check input if not type( u ) == arraytype or\ not type( v ) == arraytype: raise ComplexError('unsupported argument type ' + \ str( type(u) ) + ' or ' + str( type(v) ) ) diag1 = N0.diagonal(N0.dot(u, N0.transpose(u))) diag2 = N0.diagonal(N0.dot(v, N0.transpose(v))) dist = -N0.dot(v, N0.transpose(u)) - N0.transpose( N0.dot(u, N0.transpose(v))) dist= N0.transpose(N0.asarray(list(map(lambda column,a:column+a, \ N0.transpose(dist), diag1)))) return N0.transpose( N0.sqrt(N0.asarray(list(map(lambda row, a: row + a, dist, diag2)))))
def __pairwiseDistances(self, u, v): """ pairwise distance between 2 3-D numpy arrays of atom coordinates. @param u: coordinates @type u: array @param v: coordinates @type v: array @return: Numpy array len(u) x len(v) @rtype:array @author: Wolfgang Rieping. """ ## check input if not type( u ) == arraytype or\ not type( v ) == arraytype: raise ComplexError('unsupported argument type ' + \ str( type(u) ) + ' or ' + str( type(v) ) ) diag1= N0.diagonal(N0.dot(u,N0.transpose(u))) diag2= N0.diagonal(N0.dot(v,N0.transpose(v))) dist= -N0.dot(v,N0.transpose(u))-N0.transpose(N0.dot(u,N0.transpose(v))) dist= N0.transpose(N0.asarray(list(map(lambda column,a:column+a, \ N0.transpose(dist), diag1)))) return N0.transpose(N0.sqrt(N0.asarray( list(map(lambda row,a: row+a, dist, diag2)))))
def squared_distance_matrix(x, y): d1 = N0.diagonal(N0.dot(x, N0.transpose(x))) d2 = N0.diagonal(N0.dot(y, N0.transpose(y))) a1 = N0.add.outer(d1, d2) a2 = N0.dot(x, N0.transpose(y)) return a1 - 2 * a2
def squared_distance_matrix(x, y): d1 = N0.diagonal(N0.dot(x, N0.transpose(x))) d2 = N0.diagonal(N0.dot(y, N0.transpose(y))) a1 = N0.add.outer(d1,d2) a2 = N0.dot(x, N0.transpose(y)) return a1 - 2 * a2
def pairwiseDistances(u, v): """ Pairwise distances between two arrays. :param u: first array :type u: array :param v: second array :type v: array :return: array( len(u) x len(v) ) of double :rtype: array """ diag1 = N0.diagonal(N0.dot(u, N0.transpose(u))) diag2 = N0.diagonal(N0.dot(v, N0.transpose(v))) dist = -N0.dot( v,N0.transpose(u) )\ -N0.transpose( N0.dot( u, N0.transpose(v) ) ) dist = N0.transpose( N0.asarray( list(map( lambda column,a:column+a, \ N0.transpose(dist), diag1)) ) ) return N0.transpose( N0.sqrt(N0.asarray(list(map(lambda row, a: row + a, dist, diag2)))))
def pairwiseDistances(u, v): """ Pairwise distances between two arrays. :param u: first array :type u: array :param v: second array :type v: array :return: array( len(u) x len(v) ) of double :rtype: array """ diag1 = N0.diagonal( N0.dot( u, N0.transpose(u) ) ) diag2 = N0.diagonal( N0.dot( v, N0.transpose(v) ) ) dist = -N0.dot( v,N0.transpose(u) )\ -N0.transpose( N0.dot( u, N0.transpose(v) ) ) dist = N0.transpose( N0.asarray( list(map( lambda column,a:column+a, \ N0.transpose(dist), diag1)) ) ) return N0.transpose( N0.sqrt( N0.asarray( list(map( lambda row,a: row+a, dist, diag2 ) ) )))
def error(self, msm, d2): """ @param msm: membership matrix @type msm: array('f') @param d2: distance from data to the centers @type d2: array('f') @return: weighted error @rtype: float """ p = N0.power(msm, self.w) product = N0.dot(p, N0.transpose(d2)) return N0.trace(product)
def transform( self, *rt ): """ Apply given transformation to all frames (in place). :param rt: rotation translation matrix :type rt: array( 4 x 4 ) OR array(3 x 3), array(3 x 1) """ if len(rt) == 2: r, t = rt[0], rt[1] else: rt = rt[0] r, t = (rt[0:3,0:3], rt[0:3, 3]) r = N0.transpose( r ) r = r.astype(N0.Float32) t = t.astype(N0.Float32) for i in range( len( self.frames ) ): self.frames[ i ] = N0.array( N0.dot( self.frames[i], r ) ) + t
def rowDistances( x, y ): """ Calculate the distances between the items of two arrays (of same shape) after least-squares superpositioning. :param x: first set of coordinates :type x: array('f') :param y: second set of coordinates :type y: array('f') :return: array( len(x), 'f' ), distance between x[i] and y[i] for all i :rtype: array """ ## find transformation for best match r, t = findTransformation(x, y) ## transform coordinates z = N0.dot(y, N0.transpose(r)) + t ## calculate row distances return N0.sqrt(N0.sum(N0.power(x - z, 2), 1))
def rowDistances(x, y): """ Calculate the distances between the items of two arrays (of same shape) after least-squares superpositioning. :param x: first set of coordinates :type x: array('f') :param y: second set of coordinates :type y: array('f') :return: array( len(x), 'f' ), distance between x[i] and y[i] for all i :rtype: array """ ## find transformation for best match r, t = findTransformation(x, y) ## transform coordinates z = N0.dot(y, N0.transpose(r)) + t ## calculate row distances return N0.sqrt(N0.sum(N0.power(x - z, 2), 1))
def clusterEntropy(self): centropy = N0.diagonal(N0.dot(self.msm, N0.transpose(N0.log(self.msm)))) return -1 / float(self.npoints) * centropy
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05): """ Matches two arrays onto each other, while iteratively removing outliers. Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }. :param n_iterations: number of calculations:: 1 .. no iteration 0 .. until convergence :type n_iterations: 1|0 :param z: number of standard deviations for outlier definition (default: 2) :type z: float :param eps_rmsd: tolerance in rmsd (default: 0.5) :type eps_rmsd: float :param eps_stdv: tolerance in standard deviations (default: 0.05) :type eps_stdv: float :return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ] :rtype: (array, array), [float, float, int] """ iter_trace = [] rmsd_old = 0 stdv_old = 0 n = 0 converged = 0 mask = N0.ones(len(y), N0.Int32) while not converged: ## find transformation for best match r, t = findTransformation(N0.compress(mask, x, 0), N0.compress(mask, y, 0)) ## transform coordinates xt = N0.dot(y, N0.transpose(r)) + t ## calculate row distances d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask ## calculate rmsd and stdv rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2)) stdv = MU.SD(N0.compress(mask, d)) ## check conditions for convergence d_rmsd = abs(rmsd - rmsd_old) d_stdv = abs(1 - stdv_old / stdv) if d_rmsd < eps_rmsd and d_stdv < eps_stdv: converged = 1 else: rmsd_old = rmsd stdv_old = stdv ## store result perc = round(float(N0.sum(mask)) / float(len(mask)), 2) ## throw out non-matching rows mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv)) outliers = N0.nonzero(N0.logical_not(mask)) iter_trace.append([perc, round(rmsd, 3), outliers]) n += 1 if n_iterations and n >= n_iterations: break return (r, t), iter_trace
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05): """ Matches two arrays onto each other, while iteratively removing outliers. Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }. :param n_iterations: number of calculations:: 1 .. no iteration 0 .. until convergence :type n_iterations: 1|0 :param z: number of standard deviations for outlier definition (default: 2) :type z: float :param eps_rmsd: tolerance in rmsd (default: 0.5) :type eps_rmsd: float :param eps_stdv: tolerance in standard deviations (default: 0.05) :type eps_stdv: float :return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ] :rtype: (array, array), [float, float, int] """ iter_trace = [] rmsd_old = 0 stdv_old = 0 n = 0 converged = 0 mask = N0.ones(len(y), N0.Int32 ) while not converged: ## find transformation for best match r, t = findTransformation(N0.compress(mask, x, 0), N0.compress(mask, y, 0)) ## transform coordinates xt = N0.dot(y, N0.transpose(r)) + t ## calculate row distances d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask ## calculate rmsd and stdv rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2)) stdv = MU.SD(N0.compress(mask, d)) ## check conditions for convergence d_rmsd = abs(rmsd - rmsd_old) d_stdv = abs(1 - stdv_old / stdv) if d_rmsd < eps_rmsd and d_stdv < eps_stdv: converged = 1 else: rmsd_old = rmsd stdv_old = stdv ## store result perc = round(float(N0.sum(mask)) / float(len(mask)), 2) ## throw out non-matching rows mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv)) outliers = N0.nonzero( N0.logical_not( mask ) ) iter_trace.append([perc, round(rmsd, 3), outliers]) n += 1 if n_iterations and n >= n_iterations: break return (r, t), iter_trace
def clusterEntropy(self): centropy = N0.diagonal(N0.dot(self.msm, N0.transpose(N0.log(self.msm)))) return -1/float(self.npoints)*centropy
def pcMovie( self, ev, steps, factor=1., ref=0, morph=1 ): """ Morph between the two extreme values of a single principal component. :param ev: EigenVector to visualize :type ev: int :param steps: number of intermediate frames :type steps: int :param factor: exageration factor (default: 1 = No exageration) :type factor: float :param ref: take other eigenvecors from this frame (default: 1) :type ref: int :param morph: morph between min and max (1) or take real values (0) (default: 1) :type morph: 1|0 :return: Trajectory with frames visualizing the morphing. :rtype: Trajectory """ fit = 1 if self.pc is not None: fit = self.pc['fit'] pc = self.getPca( fit=fit ) ## eigenvectors (rows) U = pc['u'] ## raveled and centered frames x_avg = N0.average(self.frames, 0) X = N0.array( [N0.ravel(x) for x in self.frames - x_avg] ) ## ev'th eigenvector of reference frame alpha_0 = N0.dot( X[ref], U[ev] ) ## list of deviations of ev'th eigenvector of each frame from ref alpha_range = N0.dot(X, U[ev]) - alpha_0 ## get some representative alphas... if morph: a_min = factor * min(alpha_range) a_max = factor * max(alpha_range) delta = (a_max - a_min) / steps alpha_range = [ a_min + i*(delta) for i in range(0, steps) ] else: alpha_range = N0.sort( alpha_range ) delta = len(alpha_range) / (steps * 1.0) alpha_range = [ alpha_range[ int(round( i*delta )) ] for i in range(0,steps) ] ## scale ev'th eigenvector of ref with different alphas Y = N0.array( [ X[ref] + alpha * U[ev] for alpha in alpha_range] ) ## back convert to N x 3 coordinates Y = N0.reshape(Y, (Y.shape[0], -1, 3)) Y = x_avg + Y result = self.__class__() result.ref = self.ref result.frames = Y return result
def fit( self, mask=None, ref=None, n_it=1, prof='rms', verbose=1, fit=1, **profInfos ): """ Superimpose all coordinate frames on reference coordinates. Put rms values in a profile. If n_it > 1, the fraction of atoms considered for the fit is put into a profile called |prof|_considered (i.e. by default 'rms_considered'). :param mask: atom mask, atoms to consider default: [all] :type mask: [1|0] :param ref: use as reference, default: None, average Structure :type ref: PDBModel :param n_it: number of fit iterations, kicking out outliers on the way 1 -> classic single fit, 0 -> until convergence (default: 1) :type n_it: int :param prof: save rms per frame in profile of this name, ['rms'] :type prof: str :param verbose: print progress info to STDERR (default: 1) :type verbose: 1|0 :param fit: transform frames after match, otherwise just calc rms (default: 1) :type fit: 1|0 :param profInfos: additional key=value pairs for rms profile info [] :type profInfos: key=value """ if ref is None: refxyz = N0.average( self.frames, 0 ) else: refxyz = ref.getXyz() if mask is None: mask = N0.ones( len( refxyz ), N0.Int32 ) refxyz = N0.compress( mask, refxyz, 0 ) if verbose: T.errWrite( "rmsd fitting..." ) rms = [] ## rms value of each frame non_outliers = [] ## fraction of atoms considered for rms and fit iterations = [] ## number of iterations performed on each frame for i in range(0, len( self.frames) ): xyz = self.frames[i] if n_it != 1: (r, t), rmsdList = rmsFit.match( refxyz, N0.compress( mask, xyz, 0), n_it) iterations.append( len( rmsdList ) ) non_outliers.append( rmsdList[-1][0] ) xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t rms += [ rmsdList[-1][1] ] else: r, t = rmsFit.findTransformation( refxyz, N0.compress( mask, xyz, 0)) xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t d = N0.sqrt(N0.sum(N0.power( N0.compress(mask, xyz_transformed,0)\ - refxyz, 2), 1)) rms += [ N0.sqrt( N0.average(d**2) ) ] if fit: self.frames[i] = xyz_transformed.astype(N0.Float32) if verbose and i%100 == 0: T.errWrite( '#' ) self.setProfile( prof, rms, n_iterations=n_it, **profInfos ) if non_outliers: self.setProfile( prof+'_considered', non_outliers, n_iterations=n_it, comment='fraction of atoms considered for iterative fit' ) if verbose: T.errWrite( 'done\n' )
def calc_cluster_center(self, msm): p = N0.power(msm, self.w) ccenter = N0.transpose(N0.dot(p, self.data)) return N0.transpose(ccenter / N0.sum(p, 1))