def calcClusterNumber(self, min_clst=5, max_clst=30, rmsLimit=1.0, weight=1.13, converged=1e-11, aMask=None, force=0): """ Calculate the approximate number of clusters needed to pass the average intra-cluster rmsd limit. @param min_clst: lower limit for clusters (default: 5) @type min_clst: int @param max_clst: upper limit for clusters (default: 30 ) @type max_clst: int @param rmsLimit: rmsd criteria that the average of all clusters must meet in Angstrom (default: 1.0) @type rmsLimit: float @param weight: fuzziness weigth (default: 1.13) @type weight: float @param converged: stop iteration if min dist changes less than converged (default: 1e-11) @type converged: float @param force: re-calculate even if parameters haven't changed (default: 0) @type force: 1|0 @return: number of clusters @rtype: int @raise ClusterError: if can't determining number of clusters """ pos = [min_clst, max_clst] while 1: clst = int(N0.average(pos)) self.cluster(clst, weight, converged, aMask, force=force) rmsLst = [self.avgRmsd(i, aMask)[0] for i in range(clst)] if N0.average(rmsLst) > rmsLimit: pos[0] = clst else: pos[1] = clst if pos[1] - pos[0] == 1: if self.verbose: T.flushPrint( 'Converged at %i clusters, current average cluster rmsd %.2f\n' % (clst, N0.average(rmsLst))) return pos[1] if pos[1] - pos[0] != 1: if self.verbose: T.flushPrint( 'Current cluster setting %i, current average cluster rmsd %.2f\n' % (clst, N0.average(rmsLst))) if pos[1] - pos[0] <= 0 or pos[0] < min_clst or pos[1] > max_clst: raise ClusterError("Error determining number of clusters")
def findTransformation(x, y): """ Match two arrays by rotation and translation. Returns the rotation matrix and the translation vector. :param x: first set of coordinates :type x: array('f') :param y: second set of coordinates :type y: array('f') :return: rotation matrix (3x3) and translation vector (1x3) :rtype: array, array """ ## center configurations x_av = N0.average(x) y_av = N0.average(y) x = x - x_av y = y - y_av ## svd of correlation matrix v, l, u = svd(N0.dot(N0.transpose(x), y)) ## build rotation matrix and translation vector r = N0.dot(v, u) t = x_av - N0.dot(r, y_av) return r, t
def linfit( x, y ): """ Calculate linear least-square fit to the points given by x and y. see U{http://mathworld.wolfram.com/LeastSquaresFitting.html} :param x: x-data :type x: [ float ] :param y: y-data :type y: [ float ] :return: m, n, r^2 (slope, intersection, corr. coefficient) :rtype: float, float, float :raise BiskitError: if x and y have different number of elements """ x, y = N0.array( x, N0.Float64), N0.array( y, N0.Float64) if len( x ) != len( y ): raise Exception('linfit: x and y must have same length') av_x = N0.average( x ) av_y = N0.average( y ) n = len( x ) ss_xy = N0.sum( x * y ) - n * av_x * av_y ss_xx = N0.sum( x * x ) - n * av_x * av_x ss_yy = N0.sum( y * y ) - n * av_y * av_y slope = ss_xy / ss_xx inter = av_y - slope * av_x corr = ss_xy**2 / ( ss_xx * ss_yy ) return slope, inter, corr
def linfit(x, y): """ Calculate linear least-square fit to the points given by x and y. see U{http://mathworld.wolfram.com/LeastSquaresFitting.html} :param x: x-data :type x: [ float ] :param y: y-data :type y: [ float ] :return: m, n, r^2 (slope, intersection, corr. coefficient) :rtype: float, float, float :raise BiskitError: if x and y have different number of elements """ x, y = N0.array(x, N0.Float64), N0.array(y, N0.Float64) if len(x) != len(y): raise Exception('linfit: x and y must have same length') av_x = N0.average(x) av_y = N0.average(y) n = len(x) ss_xy = N0.sum(x * y) - n * av_x * av_y ss_xx = N0.sum(x * x) - n * av_x * av_x ss_yy = N0.sum(y * y) - n * av_y * av_y slope = ss_xy / ss_xx inter = av_y - slope * av_x corr = ss_xy**2 / (ss_xx * ss_yy) return slope, inter, corr
def avgRmsd( self, cluster, aMask=None, threshold=0. ): """ Claculate the average pairwise rmsd (in Angstrom) for members of a cluter. @param cluster: cluster number @type cluster: int @param aMask: atom mask applied before calculation @type aMask: [1|0] @param threshold: float 0-1, minimal cluster membership, see L{memberTraj()} @type threshold: float @return: average rmsd and the standard deviation @rtype: float, float """ try: rms = self.memberTraj(cluster,threshold).pairwiseRmsd( aMask ) rms = aboveDiagonal( rms ) except: rms = [] if len(N0.ravel(rms)) == 1: ## was: return N0.average(rms)[0], 0.0 return N0.average(rms), 0.0 if len(N0.ravel(rms)) == 0: return 0.0, 0.0 return N0.average( rms ), SD( rms )
def runningAverage( x, interval=2, preserve_boundaries=0 ): """ Running average (smoothing) over a given data window. :param x: data :type x: list of int/float :param interval: window size C{ (-(interval-1)/2 to +(interval-1)/2) } (default: 2) :type interval: int :param preserve_boundaries: shrink window at edges to keep original start and end value (default: 0) :type preserve_boundaries: 0|1 :return: list of floats :rtype: [ float ] """ if interval == 0: return x l = [] interval = int((interval-1)/2) if not preserve_boundaries: for i in range(len(x)): left = max(0, i - interval) right = min(len(x), i + interval + 1) slice = x[left:right] l.append(N0.average(slice)) else: for i in range( len(x) ): left = i - interval right= i + interval + 1 if left < 0: right = right + left left = 0 if right > len(x): left = left + right - len(x) right = len(x) slice = x[left:right] l.append(N0.average(slice)) return N0.array(l)
def runningAverage(x, interval=2, preserve_boundaries=0): """ Running average (smoothing) over a given data window. :param x: data :type x: list of int/float :param interval: window size C{ (-(interval-1)/2 to +(interval-1)/2) } (default: 2) :type interval: int :param preserve_boundaries: shrink window at edges to keep original start and end value (default: 0) :type preserve_boundaries: 0|1 :return: list of floats :rtype: [ float ] """ if interval == 0: return x l = [] interval = int((interval - 1) / 2) if not preserve_boundaries: for i in range(len(x)): left = max(0, i - interval) right = min(len(x), i + interval + 1) slice = x[left:right] l.append(N0.average(slice)) else: for i in range(len(x)): left = i - interval right = i + interval + 1 if left < 0: right = right + left left = 0 if right > len(x): left = left + right - len(x) right = len(x) slice = x[left:right] l.append(N0.average(slice)) return N0.array(l)
def calcClusterNumber( self, min_clst=5, max_clst=30, rmsLimit=1.0, weight=1.13, converged=1e-11, aMask=None, force=0 ): """ Calculate the approximate number of clusters needed to pass the average intra-cluster rmsd limit. @param min_clst: lower limit for clusters (default: 5) @type min_clst: int @param max_clst: upper limit for clusters (default: 30 ) @type max_clst: int @param rmsLimit: rmsd criteria that the average of all clusters must meet in Angstrom (default: 1.0) @type rmsLimit: float @param weight: fuzziness weigth (default: 1.13) @type weight: float @param converged: stop iteration if min dist changes less than converged (default: 1e-11) @type converged: float @param force: re-calculate even if parameters haven't changed (default: 0) @type force: 1|0 @return: number of clusters @rtype: int @raise ClusterError: if can't determining number of clusters """ pos = [ min_clst, max_clst ] while 1: clst = int( N0.average(pos) ) self.cluster( clst, weight, converged, aMask, force=force ) rmsLst = [ self.avgRmsd(i, aMask)[0] for i in range(clst)] if N0.average( rmsLst ) > rmsLimit: pos[0] = clst else: pos[1] = clst if pos[1]-pos[0] == 1: if self.verbose: T.flushPrint('Converged at %i clusters, current average cluster rmsd %.2f\n'%( clst, N0.average( rmsLst ) )) return pos[1] if pos[1]-pos[0] != 1: if self.verbose: T.flushPrint('Current cluster setting %i, current average cluster rmsd %.2f\n'%( clst, N0.average( rmsLst ) )) if pos[1]-pos[0]<= 0 or pos[0]<min_clst or pos[1]>max_clst: raise ClusterError("Error determining number of clusters")
def pairwiseRmsd( self, aMask=None, noFit=0 ): """ Calculate rmsd between each 2 coordinate frames. :param aMask: atom mask :type aMask: [1|0] :return: frames x frames array of float :rtype: array """ frames = self.frames if aMask is not None: frames = N0.compress( aMask, frames, 1 ) result = N0.zeros( (len( frames ), len( frames )), N0.Float32 ) for i in range(0, len( frames ) ): for j in range( i+1, len( frames ) ): if noFit: d = N0.sqrt(N0.sum(N0.power(frames[i]-frames[j], 2), 1)) result[i,j] = result[j,i] = N0.sqrt( N0.average(d**2) ) else: rt, rmsdLst = rmsFit.match( frames[i], frames[j], 1 ) result[i,j] = result[j,i] = rmsdLst[0][1] return result
def projectOnSphere( xyz, radius=None, center=None ): """ Project the coordinates xyz on a sphere with a given radius around a given center. :param xyz: cartesian coordinates :type xyz: array N x 3 of float :param radius: radius of target sphere, if not provided the maximal distance to center will be used (default: None) :type radius: float :param center: center of the sphere, if not given the average of xyz will be assigned to the center (default: None) :type center: array 0 x 3 of float :return: array of cartesian coordinates (x, y, z) :rtype: array """ if center is None: center = N0.average( xyz ) if radius is None: radius = max( N0.sqrt( N0.sum( N0.power( xyz - center, 2 ), 1 ) ) ) rtp = cartesianToPolar( xyz - center ) rtp[ :, 0 ] = radius return polarToCartesian( rtp ) + center
def projectOnSphere(xyz, radius=None, center=None): """ Project the coordinates xyz on a sphere with a given radius around a given center. :param xyz: cartesian coordinates :type xyz: array N x 3 of float :param radius: radius of target sphere, if not provided the maximal distance to center will be used (default: None) :type radius: float :param center: center of the sphere, if not given the average of xyz will be assigned to the center (default: None) :type center: array 0 x 3 of float :return: array of cartesian coordinates (x, y, z) :rtype: array """ if center is None: center = N0.average(xyz) if radius is None: radius = max(N0.sqrt(N0.sum(N0.power(xyz - center, 2), 1))) rtp = cartesianToPolar(xyz - center) rtp[:, 0] = radius return polarToCartesian(rtp) + center
def avgRmsd2Ref( self, cluster, ref, avg=1 ): """ Claculate the rmsd (or average rmsd) of all frames belonging to a cluster to a reference structure (in Angstrom). @param cluster: cluster number @type cluster: int @param ref: reference structure @type ref: model @param avg: return the average rmsd (1) OR a list with all rmsds (0) (default: 1) @type avg: float OR [float] """ eTraj = self.memberTraj(cluster,threshold=0) rms = [] if type(ref) == types.InstanceType: ref = ref.xyz for frame in eTraj.frames: rt, rmsdLst = rmsFit.match( ref, frame) rms += [ rmsdLst[0][1] ] if avg ==1: return N0.average(rms) return rms
def centerModel( self, model ): """ Geometric centar of model. :param model: model :type model: PDBMode """ center = N0.average( model.getXyz() ) model.setXyz( model.xyz - center )
def slidingAverage( y, window=2 ): if window == 0: return y assert window < len(y), 'window size too large for array' margin = int(round((window-1)/2.)) return [ N0.average( y[i-margin : i+margin] ) for i in range(margin,len(y)-margin) ]
def getFluct_global( self, mask=None ): """ Get RMS of each atom from it's average position in trajectory. The frames should be superimposed (fit() ) to a reference. :param mask: N x 1 list/Numpy array of 0|1, (N=atoms), atoms to be considered. :type mask: [1|0] :return: Numpy array ( N_unmasked x 1 ) of float. :rtype: array """ frames = self.frames if mask is not None: frames = N0.compress( mask, frames, 1 ) ## mean position of each atom in all frames avg = N0.average( frames ) return N0.average(N0.sqrt(N0.sum(N0.power(frames - avg, 2), 2) ))
def avgModel( self ): """ Returna a PDBModel with coordinates that are the average of all frames. :return: PDBModel with average structure of trajectory (no fitting!) this trajectory's ref is the source of result model :rtype: PDBModel """ result = PDBModel( self.getRef(), noxyz=1 ) result.setXyz( N0.average( self.frames ) ) return result
def slidingAverage(y, window=2): if window == 0: return y assert window < len(y), 'window size too large for array' margin = int(round((window - 1) / 2.)) return [ N0.average(y[i - margin:i + margin]) for i in range(margin, len(y) - margin) ]
def outliers(self, z=1.0, mask=None, prof='rmsCA_last', last=10, step=1, verbose=1): """ Identify outlier trajectories. First we calculate the CA-RMS of every |step|th frame to the last frame. Outliers are member trajectories for which the **slope** of this rms profile is z standard deviations below the mean of all members. :param z: z-value threshold :type z: float :param mask: atom mask used (default: ref.maskCA()) :type mask: [int] :param prof: name of pre-calculated profile to use (default: 'rmsCA_last') :type prof: str :param last: skip |last| last frames from linear regression :type last: int :param step: frame offset :type step: int :return: member mask of outlier trajectories :rtype: [0|1] """ if mask is None: mask = self.ref.maskCA() traj = self.compressAtoms(mask) if step != 1: traj = traj.thin(step) if not prof in traj.profiles: traj.fitMembers(refIndex=-1, prof=prof, verbose=verbose) p_all = traj.profiles[prof] n = traj.n_members l = len(traj) pm = [p_all[member:l:n][:-last] for member in range(n)] slopes = [M.linfit(list(range(l // n - last)), p)[0] for p in pm] mean, sd = N0.average(slopes), M.SD(slopes) return [r - mean < -z * sd for r in slopes]
def calcProfiles(self, m): """ Calculate needed profiles. @param m: PDBModel to calculate data for @type m: PDBModel """ if self.verbose: print("Initiating PDBDope...") d = PDBDope(m) if not self.profileName in m.atoms.keys(): if self.profileName in ['MS', 'AS', 'curvature', 'relAS', 'relMS']: if self.verbose: print("Adding SurfaceRacer profile...", end=' ') d.addSurfaceRacer() if self.profileName in ['density']: if self.verbose: print("Adding surface density...", end=' ') d.addDensity() if not self.profileName in m.residues.keys(): if self.profileName in ['cons_abs', 'cons_max', 'cons_ent']: if self.verbose: print("Adding conservation data...", end=' ') d.addConservation() if self.verbose: print('Done.') ## convert atom profiles to average residue profile if self.profileName in m.atoms.keys(): prof = [] aProfile = m.profile(self.profileName) resIdx = m.resIndex().tolist() resIdx += [m.lenAtoms()] for i in range(len(resIdx) - 1): prof += [ N0.average( N0.take(aProfile, list(range(resIdx[i], resIdx[i + 1])))) ] else: prof = m.profile(self.profileName) return prof
def variance(x, avg=None): """ Variance, S{sigma}^2 :param x: data :type x: array('f') or float :param avg: use this average, otherwise calculated from x :type avg: float OR None :return: float :rtype: float """ if avg is None: avg = N0.average(x) if len(x) == 1: return 0.0 return N0.sum(N0.power(N0.array(x) - avg, 2)) / (len(x) - 1.)
def outliers( self, z=1.0, mask=None, prof='rmsCA_last', last=10, step=1, verbose=1 ): """ Identify outlier trajectories. First we calculate the CA-RMS of every |step|th frame to the last frame. Outliers are member trajectories for which the **slope** of this rms profile is z standard deviations below the mean of all members. :param z: z-value threshold :type z: float :param mask: atom mask used (default: ref.maskCA()) :type mask: [int] :param prof: name of pre-calculated profile to use (default: 'rmsCA_last') :type prof: str :param last: skip |last| last frames from linear regression :type last: int :param step: frame offset :type step: int :return: member mask of outlier trajectories :rtype: [0|1] """ if mask is None: mask = self.ref.maskCA() traj = self.compressAtoms( mask ) if step != 1: traj = traj.thin( step ) if not prof in traj.profiles: traj.fitMembers( refIndex=-1, prof=prof, verbose=verbose ) p_all = traj.profiles[ prof ] n = traj.n_members l = len( traj ) pm = [ p_all[ member : l : n ][:-last] for member in range( n ) ] slopes = [ M.linfit( list(range( l//n - last)), p )[0] for p in pm ] mean, sd = N0.average( slopes ), M.SD( slopes ) return [ r - mean < - z * sd for r in slopes ]
def variance(x, avg = None): """ Variance, S{sigma}^2 :param x: data :type x: array('f') or float :param avg: use this average, otherwise calculated from x :type avg: float OR None :return: float :rtype: float """ if avg is None: avg = N0.average(x) if len(x) == 1: return 0.0 return N0.sum(N0.power(N0.array(x) - avg, 2)) / (len(x) - 1.)
def calcProfiles( self, m ): """ Calculate needed profiles. @param m: PDBModel to calculate data for @type m: PDBModel """ if self.verbose: print("Initiating PDBDope...") d = PDBDope( m ) if not self.profileName in m.atoms.keys(): if self.profileName in ['MS', 'AS', 'curvature', 'relAS', 'relMS']: if self.verbose: print("Adding SurfaceRacer profile...", end=' ') d.addSurfaceRacer() if self.profileName in ['density']: if self.verbose: print("Adding surface density...", end=' ') d.addDensity() if not self.profileName in m.residues.keys(): if self.profileName in ['cons_abs', 'cons_max', 'cons_ent']: if self.verbose: print("Adding conservation data...", end=' ') d.addConservation() if self.verbose: print('Done.') ## convert atom profiles to average residue profile if self.profileName in m.atoms.keys(): prof = [] aProfile = m.profile( self.profileName ) resIdx = m.resIndex().tolist() resIdx += [ m.lenAtoms()] for i in range(len(resIdx)-1): prof += [ N0.average( N0.take(aProfile, list(range(resIdx[i], resIdx[i+1])) ) )] else: prof = m.profile( self.profileName ) return prof
def reduceAtomProfiles( self, from_model, to_model ): """ reduce all atom profiles according to the calculated map by calculating the average over the grouped atoms. @param from_model: model @type from_model: PDBModel @param to_model: model @type to_model: PDBModel """ for profname in from_model.atoms: p0 = from_model.atoms.get(profname) info = from_model.profileInfo( profname ) try: pr = [ N0.average( N0.take( p0, group ) ) for group in self.groups ] to_model.atoms.set( profname, pr ) except: pass to_model.atoms.setInfo( profname, **info )
def pca( self, atomMask=None, frameMask=None, fit=1 ): """ Calculate principal components of trajectory frames. :param atomMask: 1 x N_atoms, [111001110..] atoms to consider (default: all) :type atomMask: [1|0] :param frameMask: 1 x N_frames, [001111..] frames to consider (default all ) :type frameMask: [1|0] :return: (N_frames x N_frames), (1 x N_frames), projection of each frame in PC space, eigenvalue of each PC :rtype: array, array, array """ if frameMask is None: frameMask = N0.ones( len( self.frames ), N0.Int32 ) if atomMask is None: atomMask = N0.ones(self.getRef().lenAtoms(), N0.Int32) if fit: self.fit( atomMask ) refxyz = N0.average( self.frames, 0 ) data = N0.compress( frameMask, self.frames, 0 ) data = data - refxyz data = N0.compress( atomMask, data, 1 ) ## reduce to 2D array data = N0.array( map( N0.ravel, data ) ) V, L, U = LA.svd( data ) return U, V * L, N0.power(L, 2)
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05): """ Matches two arrays onto each other, while iteratively removing outliers. Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }. :param n_iterations: number of calculations:: 1 .. no iteration 0 .. until convergence :type n_iterations: 1|0 :param z: number of standard deviations for outlier definition (default: 2) :type z: float :param eps_rmsd: tolerance in rmsd (default: 0.5) :type eps_rmsd: float :param eps_stdv: tolerance in standard deviations (default: 0.05) :type eps_stdv: float :return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ] :rtype: (array, array), [float, float, int] """ iter_trace = [] rmsd_old = 0 stdv_old = 0 n = 0 converged = 0 mask = N0.ones(len(y), N0.Int32) while not converged: ## find transformation for best match r, t = findTransformation(N0.compress(mask, x, 0), N0.compress(mask, y, 0)) ## transform coordinates xt = N0.dot(y, N0.transpose(r)) + t ## calculate row distances d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask ## calculate rmsd and stdv rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2)) stdv = MU.SD(N0.compress(mask, d)) ## check conditions for convergence d_rmsd = abs(rmsd - rmsd_old) d_stdv = abs(1 - stdv_old / stdv) if d_rmsd < eps_rmsd and d_stdv < eps_stdv: converged = 1 else: rmsd_old = rmsd stdv_old = stdv ## store result perc = round(float(N0.sum(mask)) / float(len(mask)), 2) ## throw out non-matching rows mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv)) outliers = N0.nonzero(N0.logical_not(mask)) iter_trace.append([perc, round(rmsd, 3), outliers]) n += 1 if n_iterations and n >= n_iterations: break return (r, t), iter_trace
def fit( self, mask=None, ref=None, n_it=1, prof='rms', verbose=1, fit=1, **profInfos ): """ Superimpose all coordinate frames on reference coordinates. Put rms values in a profile. If n_it > 1, the fraction of atoms considered for the fit is put into a profile called |prof|_considered (i.e. by default 'rms_considered'). :param mask: atom mask, atoms to consider default: [all] :type mask: [1|0] :param ref: use as reference, default: None, average Structure :type ref: PDBModel :param n_it: number of fit iterations, kicking out outliers on the way 1 -> classic single fit, 0 -> until convergence (default: 1) :type n_it: int :param prof: save rms per frame in profile of this name, ['rms'] :type prof: str :param verbose: print progress info to STDERR (default: 1) :type verbose: 1|0 :param fit: transform frames after match, otherwise just calc rms (default: 1) :type fit: 1|0 :param profInfos: additional key=value pairs for rms profile info [] :type profInfos: key=value """ if ref is None: refxyz = N0.average( self.frames, 0 ) else: refxyz = ref.getXyz() if mask is None: mask = N0.ones( len( refxyz ), N0.Int32 ) refxyz = N0.compress( mask, refxyz, 0 ) if verbose: T.errWrite( "rmsd fitting..." ) rms = [] ## rms value of each frame non_outliers = [] ## fraction of atoms considered for rms and fit iterations = [] ## number of iterations performed on each frame for i in range(0, len( self.frames) ): xyz = self.frames[i] if n_it != 1: (r, t), rmsdList = rmsFit.match( refxyz, N0.compress( mask, xyz, 0), n_it) iterations.append( len( rmsdList ) ) non_outliers.append( rmsdList[-1][0] ) xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t rms += [ rmsdList[-1][1] ] else: r, t = rmsFit.findTransformation( refxyz, N0.compress( mask, xyz, 0)) xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t d = N0.sqrt(N0.sum(N0.power( N0.compress(mask, xyz_transformed,0)\ - refxyz, 2), 1)) rms += [ N0.sqrt( N0.average(d**2) ) ] if fit: self.frames[i] = xyz_transformed.astype(N0.Float32) if verbose and i%100 == 0: T.errWrite( '#' ) self.setProfile( prof, rms, n_iterations=n_it, **profInfos ) if non_outliers: self.setProfile( prof+'_considered', non_outliers, n_iterations=n_it, comment='fraction of atoms considered for iterative fit' ) if verbose: T.errWrite( 'done\n' )
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05): """ Matches two arrays onto each other, while iteratively removing outliers. Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }. :param n_iterations: number of calculations:: 1 .. no iteration 0 .. until convergence :type n_iterations: 1|0 :param z: number of standard deviations for outlier definition (default: 2) :type z: float :param eps_rmsd: tolerance in rmsd (default: 0.5) :type eps_rmsd: float :param eps_stdv: tolerance in standard deviations (default: 0.05) :type eps_stdv: float :return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ] :rtype: (array, array), [float, float, int] """ iter_trace = [] rmsd_old = 0 stdv_old = 0 n = 0 converged = 0 mask = N0.ones(len(y), N0.Int32 ) while not converged: ## find transformation for best match r, t = findTransformation(N0.compress(mask, x, 0), N0.compress(mask, y, 0)) ## transform coordinates xt = N0.dot(y, N0.transpose(r)) + t ## calculate row distances d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask ## calculate rmsd and stdv rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2)) stdv = MU.SD(N0.compress(mask, d)) ## check conditions for convergence d_rmsd = abs(rmsd - rmsd_old) d_stdv = abs(1 - stdv_old / stdv) if d_rmsd < eps_rmsd and d_stdv < eps_stdv: converged = 1 else: rmsd_old = rmsd stdv_old = stdv ## store result perc = round(float(N0.sum(mask)) / float(len(mask)), 2) ## throw out non-matching rows mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv)) outliers = N0.nonzero( N0.logical_not( mask ) ) iter_trace.append([perc, round(rmsd, 3), outliers]) n += 1 if n_iterations and n >= n_iterations: break return (r, t), iter_trace
def pcMovie( self, ev, steps, factor=1., ref=0, morph=1 ): """ Morph between the two extreme values of a single principal component. :param ev: EigenVector to visualize :type ev: int :param steps: number of intermediate frames :type steps: int :param factor: exageration factor (default: 1 = No exageration) :type factor: float :param ref: take other eigenvecors from this frame (default: 1) :type ref: int :param morph: morph between min and max (1) or take real values (0) (default: 1) :type morph: 1|0 :return: Trajectory with frames visualizing the morphing. :rtype: Trajectory """ fit = 1 if self.pc is not None: fit = self.pc['fit'] pc = self.getPca( fit=fit ) ## eigenvectors (rows) U = pc['u'] ## raveled and centered frames x_avg = N0.average(self.frames, 0) X = N0.array( [N0.ravel(x) for x in self.frames - x_avg] ) ## ev'th eigenvector of reference frame alpha_0 = N0.dot( X[ref], U[ev] ) ## list of deviations of ev'th eigenvector of each frame from ref alpha_range = N0.dot(X, U[ev]) - alpha_0 ## get some representative alphas... if morph: a_min = factor * min(alpha_range) a_max = factor * max(alpha_range) delta = (a_max - a_min) / steps alpha_range = [ a_min + i*(delta) for i in range(0, steps) ] else: alpha_range = N0.sort( alpha_range ) delta = len(alpha_range) / (steps * 1.0) alpha_range = [ alpha_range[ int(round( i*delta )) ] for i in range(0,steps) ] ## scale ev'th eigenvector of ref with different alphas Y = N0.array( [ X[ref] + alpha * U[ev] for alpha in alpha_range] ) ## back convert to N x 3 coordinates Y = N0.reshape(Y, (Y.shape[0], -1, 3)) Y = x_avg + Y result = self.__class__() result.ref = self.ref result.frames = Y return result
def getFluct_local( self, mask=None, border_res=1, left_atoms=['C'], right_atoms=['N'], verbose=1 ): """ Get mean displacement of each atom from it's average position after fitting of each residue to the reference backbone coordinates of itself and selected atoms of neighboring residues to the right and left. :param mask: N_atoms x 1 array of 0||1, atoms for which fluctuation should be calculated :type mask: array :param border_res: number of neighboring residues to use for fitting :type border_res: int :param left_atoms: atoms (names) to use from these neighbore residues :type left_atoms: [str] :param right_atoms: atoms (names) to use from these neighbore residues :type right_atoms: [str] :return: Numpy array ( N_unmasked x 1 ) of float :rtype: array """ if mask is None: mask = N0.ones( len( self.frames[0] ), N0.Int32 ) if verbose: T.errWrite( "rmsd fitting per residue..." ) residues = N0.nonzero( self.ref.atom2resMask( mask ) ) ## backbone atoms used for fit fit_atoms_right = N0.nonzero( self.ref.mask( right_atoms ) ) fit_atoms_left = N0.nonzero( self.ref.mask( left_atoms ) ) ## chain index of each residue rchainMap = N0.take( self.ref.chainMap(), self.ref.resIndex() ) result = [] for res in residues: i_res, i_border = self.__resWindow(res, border_res, rchainMap, fit_atoms_left, fit_atoms_right) try: if not len( i_res ): raise PDBError('empty residue') t_res = self.takeAtoms( i_res + i_border ) i_center = range( len( i_res ) ) mask_BB = t_res.ref.maskBB() * t_res.ref.maskHeavy() ## fit with border atoms .. t_res.fit( ref=t_res.ref, mask=mask_BB, verbose=0 ) ## .. but calculate only with center residue atoms frames = N0.take( t_res.frames, i_center, 1 ) avg = N0.average( frames ) rmsd = N0.average(N0.sqrt(N0.sum(N0.power(frames - avg, 2), 2) )) result.extend( rmsd ) if verbose: T.errWrite('#') except ZeroDivisionError: result.extend( N0.zeros( len(i_res), N0.Float32 ) ) T.errWrite('?' + str( res )) if verbose: T.errWriteln( "done" ) return result