def __findTransformation(self, x, y): """ Match two arrays by rotation and translation. Returns the rotation matrix and the translation vector. Back transformation: for atom i new coordinates will be:: y_new[i] = N0.dot(r, y[i]) + t for all atoms in one step:: y_new = N0.dot(y, N0.transpose(r)) + t @param x: coordinates @type x: array @param y: coordinates @type y: array @return: rotation matrix, translation vector @rtype: array, array @author: Michael Habeck """ from numpy.linalg import svd ## center configurations x_av = N0.sum(x) / len(x) y_av = N0.sum(y) / len(y) x = x - x_av y = y - y_av ## svd of correlation matrix v, l, u = svd(N0.dot(N0.transpose(x), y)) ## build rotation matrix and translation vector r = N0.dot(v, u) t = x_av - N0.dot(r, y_av) return r, t
def findTransformation(x, y): """ Match two arrays by rotation and translation. Returns the rotation matrix and the translation vector. @param x: first set of coordinates @type x: array('f') @param y: second set of coordinates @type y: array('f') @return: rotation matrix (3x3) and translation vector (1x3) @rtype: array, array """ ## center configurations x_av = N0.average(x) y_av = N0.average(y) x = x - x_av y = y - y_av ## svd of correlation matrix v, l, u = svd(N0.dot(N0.transpose(x), y)) ## build rotation matrix and translation vector r = N0.dot(v, u) t = x_av - N0.dot(r, y_av) return r, t
def __pairwiseDistances(self, u, v): """ pairwise distance between 2 3-D numpy arrays of atom coordinates. @param u: coordinates @type u: array @param v: coordinates @type v: array @return: Numpy array len(u) x len(v) @rtype:array @author: Wolfgang Rieping. """ ## check input if not type( u ) == arraytype or\ not type( v ) == arraytype: raise ComplexError('unsupported argument type ' + \ str( type(u) ) + ' or ' + str( type(v) ) ) diag1 = N0.diagonal(N0.dot(u, N0.transpose(u))) diag2 = N0.diagonal(N0.dot(v, N0.transpose(v))) dist = -N0.dot(v, N0.transpose(u)) - N0.transpose( N0.dot(u, N0.transpose(v))) dist= N0.transpose(N0.asarray(map(lambda column,a:column+a, \ N0.transpose(dist), diag1))) return N0.transpose( N0.sqrt(N0.asarray(map(lambda row, a: row + a, dist, diag2))))
def squared_distance_matrix(x, y): d1 = N0.diagonal(N0.dot(x, N0.transpose(x))) d2 = N0.diagonal(N0.dot(y, N0.transpose(y))) a1 = N0.add.outer(d1,d2) a2 = N0.dot(x, N0.transpose(y)) return a1 - 2 * a2
def error(self, msm, d2): """ @param msm: membership matrix @type msm: array('f') @param d2: distance from data to the centers @type d2: array('f') @return: weighted error @rtype: float """ p = N0.power(msm, self.w) product = N0.dot(p, N0.transpose(d2)) return N0.trace(product)
def transform( self, *rt ): """ Apply given transformation to all frames (in place). @param rt: rotation translation matrix @type rt: array( 4 x 4 ) OR array(3 x 3), array(3 x 1) """ if len(rt) == 2: r, t = rt[0], rt[1] else: rt = rt[0] r, t = (rt[0:3,0:3], rt[0:3, 3]) r = N0.transpose( r ) r = r.astype(N0.Float32) t = t.astype(N0.Float32) for i in range( len( self.frames ) ): self.frames[ i ] = N0.array( N0.dot( self.frames[i], r ) ) + t
def rowDistances(x, y): """ Calculate the distances between the items of two arrays (of same shape) after least-squares superpositioning. @param x: first set of coordinates @type x: array('f') @param y: second set of coordinates @type y: array('f') @return: array( len(x), 'f' ), distance between x[i] and y[i] for all i @rtype: array """ ## find transformation for best match r, t = findTransformation(x, y) ## transform coordinates z = N0.dot(y, N0.transpose(r)) + t ## calculate row distances return N0.sqrt(N0.sum(N0.power(x - z, 2), 1))
def hbonds(model): """ Collect a list with all potential hydrogen bonds in model. @param model: PDBModel for which @type model: PDBModel @return: a list of potential hydrogen bonds containing a lists with donor index, acceptor index, distance and angle. @rtype: [ int, int, float, float ] """ hbond_lst = [] donors = molU.hbonds['donors'] accept = molU.hbonds['acceptors'] ## indices if potential donors d_ind = [] for res, aList in donors.items(): for a in aList: if a in molU.hydrogenSynonyms.keys(): aList.append(molU.hydrogenSynonyms[a]) d_ind += model.filterIndex(residue_name=res, name=aList) ## indices if potential acceptors a_ind = [] for res, aList in accept.items(): a_ind += model.filterIndex(residue_name=res, name=aList) ## calculate pairwise distances and angles for d in d_ind: d_xyz = model.xyz[d] d_nr = model.atoms['residue_number'][d] d_cid = model.atoms['chain_id'][d] d_segi = model.atoms['segment_id'][d] for a in a_ind: a_xyz = model.xyz[a] a_nr = model.atoms['residue_number'][a] a_cid = model.atoms['chain_id'][a] a_segi = model.atoms['segment_id'][a] dist = N0.sqrt(sum((d_xyz - a_xyz)**2)) ## don't calculate angles within the same residue and ## for distances definately are not are h-bonds if dist < 3.0 and not\ ( d_nr == a_nr and d_cid == a_cid and d_segi == a_segi ): ## calculate angle for potenital hbond d_xyz_cov = xyzOfNearestCovalentNeighbour(d, model) a_xyz_cov = xyzOfNearestCovalentNeighbour(a, model) d_vec = d_xyz_cov - d_xyz a_vec = a_xyz - a_xyz_cov d_len = N0.sqrt(sum((d_vec)**2)) a_len = N0.sqrt(sum((a_vec)**2)) da_dot = N0.dot(d_vec, a_vec) angle = 180 - N0.arccos(da_dot / (d_len * a_len)) * 180 / N0.pi if hbondCheck(angle, dist): hbond_lst += [[d, a, dist, angle]] return hbond_lst
def hbonds( model ): """ Collect a list with all potential hydrogen bonds in model. @param model: PDBModel for which @type model: PDBModel @return: a list of potential hydrogen bonds containing a lists with donor index, acceptor index, distance and angle. @rtype: [ int, int, float, float ] """ hbond_lst = [] donors = molU.hbonds['donors'] accept = molU.hbonds['acceptors'] ## indices if potential donors d_ind = [] for res , aList in donors.items(): for a in aList: if a in molU.hydrogenSynonyms.keys(): aList.append( molU.hydrogenSynonyms[a] ) d_ind += model.filterIndex( residue_name=res, name=aList ) ## indices if potential acceptors a_ind = [] for res , aList in accept.items(): a_ind += model.filterIndex( residue_name=res, name=aList ) ## calculate pairwise distances and angles for d in d_ind: d_xyz = model.xyz[d] d_nr = model.atoms['residue_number'][d] d_cid = model.atoms['chain_id'][d] d_segi = model.atoms['segment_id'][d] for a in a_ind: a_xyz = model.xyz[a] a_nr = model.atoms['residue_number'][a] a_cid = model.atoms['chain_id'][a] a_segi = model.atoms['segment_id'][a] dist = N0.sqrt( sum( (d_xyz - a_xyz)**2 ) ) ## don't calculate angles within the same residue and ## for distances definately are not are h-bonds if dist < 3.0 and not\ ( d_nr == a_nr and d_cid == a_cid and d_segi == a_segi ): ## calculate angle for potenital hbond d_xyz_cov = xyzOfNearestCovalentNeighbour( d, model ) a_xyz_cov = xyzOfNearestCovalentNeighbour( a, model ) d_vec = d_xyz_cov - d_xyz a_vec = a_xyz - a_xyz_cov d_len = N0.sqrt( sum( (d_vec)**2 ) ) a_len = N0.sqrt( sum( (a_vec)**2 ) ) da_dot = N0.dot( d_vec, a_vec) angle = 180 - N0.arccos( da_dot / (d_len * a_len) )*180/N0.pi if hbondCheck( angle, dist ): hbond_lst += [[ d, a, dist, angle ]] return hbond_lst
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05): """ Matches two arrays onto each other, while iteratively removing outliers. Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }. @param n_iterations: number of calculations:: 1 .. no iteration 0 .. until convergence @type n_iterations: 1|0 @param z: number of standard deviations for outlier definition (default: 2) @type z: float @param eps_rmsd: tolerance in rmsd (default: 0.5) @type eps_rmsd: float @param eps_stdv: tolerance in standard deviations (default: 0.05) @type eps_stdv: float @return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ] @rtype: (array, array), [float, float, int] """ iter_trace = [] rmsd_old = 0 stdv_old = 0 n = 0 converged = 0 mask = N0.ones(len(y), N0.Int32) while not converged: ## find transformation for best match r, t = findTransformation(N0.compress(mask, x, 0), N0.compress(mask, y, 0)) ## transform coordinates xt = N0.dot(y, N0.transpose(r)) + t ## calculate row distances d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask ## calculate rmsd and stdv rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2)) stdv = MU.SD(N0.compress(mask, d)) ## check conditions for convergence d_rmsd = abs(rmsd - rmsd_old) d_stdv = abs(1 - stdv_old / stdv) if d_rmsd < eps_rmsd and d_stdv < eps_stdv: converged = 1 else: rmsd_old = rmsd stdv_old = stdv ## store result perc = round(float(N0.sum(mask)) / float(len(mask)), 2) ## throw out non-matching rows mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv)) outliers = N0.nonzero(N0.logical_not(mask)) iter_trace.append([perc, round(rmsd, 3), outliers]) n += 1 if n_iterations and n >= n_iterations: break return (r, t), iter_trace
def calc_cluster_center(self, msm): p = N0.power(msm, self.w) ccenter = N0.transpose(N0.dot(p, self.data)) return N0.transpose(ccenter / N0.sum(p, 1))
def clusterEntropy(self): centropy = N0.diagonal(N0.dot(self.msm, N0.transpose(N0.log(self.msm)))) return -1/float(self.npoints)*centropy
def fit( self, mask=None, ref=None, n_it=1, prof='rms', verbose=1, fit=1, **profInfos ): """ Superimpose all coordinate frames on reference coordinates. Put rms values in a profile. If n_it > 1, the fraction of atoms considered for the fit is put into a profile called |prof|_considered (i.e. by default 'rms_considered'). @param mask: atom mask, atoms to consider default: [all] @type mask: [1|0] @param ref: use as reference, default: None, average Structure @type ref: PDBModel @param n_it: number of fit iterations, kicking out outliers on the way 1 -> classic single fit, 0 -> until convergence (default: 1) @type n_it: int @param prof: save rms per frame in profile of this name, ['rms'] @type prof: str @param verbose: print progress info to STDERR (default: 1) @type verbose: 1|0 @param fit: transform frames after match, otherwise just calc rms (default: 1) @type fit: 1|0 @param profInfos: additional key=value pairs for rms profile info [] @type profInfos: key=value """ if ref is None: refxyz = N0.average( self.frames, 0 ) else: refxyz = ref.getXyz() if mask is None: mask = N0.ones( len( refxyz ), N0.Int32 ) refxyz = N0.compress( mask, refxyz, 0 ) if verbose: T.errWrite( "rmsd fitting..." ) rms = [] ## rms value of each frame non_outliers = [] ## fraction of atoms considered for rms and fit iterations = [] ## number of iterations performed on each frame for i in range(0, len( self.frames) ): xyz = self.frames[i] if n_it != 1: (r, t), rmsdList = rmsFit.match( refxyz, N0.compress( mask, xyz, 0), n_it) iterations.append( len( rmsdList ) ) non_outliers.append( rmsdList[-1][0] ) xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t rms += [ rmsdList[-1][1] ] else: r, t = rmsFit.findTransformation( refxyz, N0.compress( mask, xyz, 0)) xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t d = N0.sqrt(N0.sum(N0.power( N0.compress(mask, xyz_transformed,0)\ - refxyz, 2), 1)) rms += [ N0.sqrt( N0.average(d**2) ) ] if fit: self.frames[i] = xyz_transformed.astype(N0.Float32) if verbose and i%100 == 0: T.errWrite( '#' ) self.setProfile( prof, rms, n_iterations=n_it, **profInfos ) if non_outliers: self.setProfile( prof+'_considered', non_outliers, n_iterations=n_it, comment='fraction of atoms considered for iterative fit' ) if verbose: T.errWrite( 'done\n' )
def pcMovie( self, ev, steps, factor=1., ref=0, morph=1 ): """ Morph between the two extreme values of a single principal component. @param ev: EigenVector to visualize @type ev: int @param steps: number of intermediate frames @type steps: int @param factor: exageration factor (default: 1 = No exageration) @type factor: float @param ref: take other eigenvecors from this frame (default: 1) @type ref: int @param morph: morph between min and max (1) or take real values (0) (default: 1) @type morph: 1|0 @return: Trajectory with frames visualizing the morphing. @rtype: Trajectory """ fit = 1 if self.pc is not None: fit = self.pc['fit'] pc = self.getPca( fit=fit ) ## eigenvectors (rows) U = pc['u'] ## raveled and centered frames x_avg = N0.average(self.frames, 0) X = N0.array( [N0.ravel(x) for x in self.frames - x_avg] ) ## ev'th eigenvector of reference frame alpha_0 = N0.dot( X[ref], U[ev] ) ## list of deviations of ev'th eigenvector of each frame from ref alpha_range = N0.dot(X, U[ev]) - alpha_0 ## get some representative alphas... if morph: a_min = factor * min(alpha_range) a_max = factor * max(alpha_range) delta = (a_max - a_min) / steps alpha_range = [ a_min + i*(delta) for i in range(0, steps) ] else: alpha_range = N0.sort( alpha_range ) delta = len(alpha_range) / (steps * 1.0) alpha_range = [ alpha_range[ int(round( i*delta )) ] for i in range(0,steps) ] ## scale ev'th eigenvector of ref with different alphas Y = N0.array( [ X[ref] + alpha * U[ev] for alpha in alpha_range] ) ## back convert to N x 3 coordinates Y = N0.reshape(Y, (Y.shape[0], -1, 3)) Y = x_avg + Y result = self.__class__() result.ref = self.ref result.frames = Y return result