def logConfidence( x, R, clip=0 ): """ Estimate the probability of x NOT beeing a random observation from a lognormal distribution that is described by a set of random values. @param x: observed value @type x: float @param R: sample of random values @type R: [float] @param clip: clip zeros at this value 0->don't clip (default: 0) @type clip: float @return: confidence that x is not random, median of random distr. @rtype: (float, float) """ if clip and 0 in R: R = N0.clip( R, clip, max( R ) ) if clip and x == 0: x = clip ## remove 0 instead of clipping R = N0.compress( R, R ) if x == 0: return 0, 0 ## get mean and stdv of log-transformed random sample alpha = N0.average( N0.log( R ) ) n = len( R ) beta = N0.sqrt(N0.sum(N0.power(N0.log( R ) - alpha, 2)) / (n - 1.)) return logArea( x, alpha, beta ), logMedian( alpha )
def pairwiseRmsd( self, aMask=None, noFit=0 ): """ Calculate rmsd between each 2 coordinate frames. @param aMask: atom mask @type aMask: [1|0] @return: frames x frames array of float @rtype: array """ frames = self.frames if aMask is not None: frames = N0.compress( aMask, frames, 1 ) result = N0.zeros( (len( frames ), len( frames )), N0.Float32 ) for i in range(0, len( frames ) ): for j in range( i+1, len( frames ) ): if noFit: d = N0.sqrt(N0.sum(N0.power(frames[i]-frames[j], 2), 1)) result[i,j] = result[j,i] = N0.sqrt( N0.average(d**2) ) else: rt, rmsdLst = rmsFit.match( frames[i], frames[j], 1 ) result[i,j] = result[j,i] = rmsdLst[0][1] return result
def __distances( self, point, xyz=None ): """ point - 3 x 1 array of float; point of origin xyz - 3 x n array of float; coordinates, if None -- take model atoms -> distances of all atoms to given point """ if xyz is None: xyz = self.model.getXyz() return N0.sqrt( N0.sum( N0.power( xyz - point, 2), 1 ) )
def __distances(self, point, xyz=None): """ point - 3 x 1 array of float; point of origin xyz - 3 x n array of float; coordinates, if None -- take model atoms -> distances of all atoms to given point """ if xyz is None: xyz = self.model.getXyz() return N0.sqrt(N0.sum(N0.power(xyz - point, 2), 1))
def hmmEmm2Prob(self, nullEmm, emmScore): """ Convert HMM profile emmisiion scores into emmission probabilities @param nullEmm: null scores @type nullEmm: array @param emmScore: emmission scores @type emmScore: array @return: null and emmission probabilities, for each amino acid in each position @rtype: array( len_seq x 20 ), array( 1 x 20 ) """ ## Null probabilities: prob = 2 ^ (nullEmm / 1000) * 1/len(alphabet) nullProb = N0.power(2, N0.array(nullEmm) / 1000.0) * (1. / 20) ## Emmission probabilities: prob = nullProb 2 ^ (nullEmm / 1000) ## see http://www.ebc.ee/WWW/hmmer2-html/node26.html emmProb = nullProb * N0.power(2, (emmScore / 1000.0)) return emmProb, nullProb
def hmmEmm2Prob( self, nullEmm, emmScore ): """ Convert HMM profile emmisiion scores into emmission probabilities @param nullEmm: null scores @type nullEmm: array @param emmScore: emmission scores @type emmScore: array @return: null and emmission probabilities, for each amino acid in each position @rtype: array( len_seq x 20 ), array( 1 x 20 ) """ ## Null probabilities: prob = 2 ^ (nullEmm / 1000) * 1/len(alphabet) nullProb = N0.power( 2, N0.array( nullEmm )/1000.0 )*(1./20) ## Emmission probabilities: prob = nullProb 2 ^ (nullEmm / 1000) ## see http://www.ebc.ee/WWW/hmmer2-html/node26.html emmProb = nullProb * N0.power( 2, ( emmScore/1000.0) ) return emmProb, nullProb
def error(self, msm, d2): """ @param msm: membership matrix @type msm: array('f') @param d2: distance from data to the centers @type d2: array('f') @return: weighted error @rtype: float """ p = N0.power(msm, self.w) product = N0.dot(p, N0.transpose(d2)) return N0.trace(product)
def rmsd_res(self, coord1, coord2): """ Calculate the rsmd on residue level for c-alpha between a model and its reference. @param coord1: first set of coordinates @type coord1: array @param coord2: second set of coordinates @type coord2: array @return: rmsd_res: rmsd per c-alpha @rtype: [float] """ rmsd_res = [] for i in range(len(coord1)): rmsd = N0.sqrt( (N0.power(coord1[i][0]-coord2[i][0],2) + \ N0.power(coord1[i][1]-coord2[i][1],2 )+ \ N0.power(coord1[i][2]-coord2[i][2],2 ))) rmsd_res.append(rmsd) return rmsd_res
def rmsd_res(self, coord1, coord2): """ Calculate the rsmd on residue level for c-alpha between a model and its reference. @param coord1: first set of coordinates @type coord1: array @param coord2: second set of coordinates @type coord2: array @return: rmsd_res: rmsd per c-alpha @rtype: [float] """ rmsd_res = [] for i in range( len(coord1) ): rmsd = N0.sqrt( (N0.power(coord1[i][0]-coord2[i][0],2) + \ N0.power(coord1[i][1]-coord2[i][1],2 )+ \ N0.power(coord1[i][2]-coord2[i][2],2 ))) rmsd_res.append(rmsd) return rmsd_res
def getFluct_global( self, mask=None ): """ Get RMS of each atom from it's average position in trajectory. The frames should be superimposed (fit() ) to a reference. @param mask: N x 1 list/Numpy array of 0|1, (N=atoms), atoms to be considered. @type mask: [1|0] @return: Numpy array ( N_unmasked x 1 ) of float. @rtype: array """ frames = self.frames if mask is not None: frames = N0.compress( mask, frames, 1 ) ## mean position of each atom in all frames avg = N0.average( frames ) return N0.average(N0.sqrt(N0.sum(N0.power(frames - avg, 2), 2) ))
def rowDistances(x, y): """ Calculate the distances between the items of two arrays (of same shape) after least-squares superpositioning. @param x: first set of coordinates @type x: array('f') @param y: second set of coordinates @type y: array('f') @return: array( len(x), 'f' ), distance between x[i] and y[i] for all i @rtype: array """ ## find transformation for best match r, t = findTransformation(x, y) ## transform coordinates z = N0.dot(y, N0.transpose(r)) + t ## calculate row distances return N0.sqrt(N0.sum(N0.power(x - z, 2), 1))
def pca( self, atomMask=None, frameMask=None, fit=1 ): """ Calculate principal components of trajectory frames. @param atomMask: 1 x N_atoms, [111001110..] atoms to consider (default: all) @type atomMask: [1|0] @param frameMask: 1 x N_frames, [001111..] frames to consider (default all ) @type frameMask: [1|0] @return: (N_frames x N_frames), (1 x N_frames), projection of each frame in PC space, eigenvalue of each PC @rtype: array, array, array """ if frameMask is None: frameMask = N0.ones( len( self.frames ), N0.Int32 ) if atomMask is None: atomMask = N0.ones(self.getRef().lenAtoms(), N0.Int32) if fit: self.fit( atomMask ) refxyz = N0.average( self.frames, 0 ) data = N0.compress( frameMask, self.frames, 0 ) data = data - refxyz data = N0.compress( atomMask, data, 1 ) ## reduce to 2D array data = N0.array( map( N0.ravel, data ) ) V, L, U = LA.svd( data ) return U, V * L, N0.power(L, 2)
def logConfidence(x, R, clip=1e-32): """ Estimate the probability of x NOT beeing a random observation from a lognormal distribution that is described by a set of random values. The exact solution to this problem is in L{Biskit.Statistics.lognormal}. @param x: observed value @type x: float @param R: sample of random values; 0 -> don't clip (default: 1e-32) @type R: [float] @param clip: clip zeros at this value @type clip: float @return: confidence that x is not random, mean of random distrib. @rtype: (float, float) """ if clip and 0 in R: R = N0.clip(R, clip, max(R)) ## get mean and stdv of log-transformed random sample mean = N0.average(N0.log(R)) n = len(R) stdv = N0.sqrt(N0.sum(N0.power(N0.log(R) - mean, 2)) / (n - 1.)) ## create dense lognormal distribution representing the random sample stop = max(R) * 50.0 step = stop / 100000 start = step / 10.0 X = [(v, p_lognormal(v, mean, stdv)) for v in N0.arange(start, stop, step)] ## analyse distribution d = Density(X) return d.findConfidenceInterval(x * 1.0)[0], d.average()
def calc_membership_matrix(self, d2): ## remove 0s (if a cluster center is exactly on one item) d2 = N0.clip( d2, N0.power(1e200, 1-self.w), 1e300 ) q = N0.power(d2, 1. / (1. - self.w)) return q / N0.sum(q)
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05): """ Matches two arrays onto each other, while iteratively removing outliers. Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }. @param n_iterations: number of calculations:: 1 .. no iteration 0 .. until convergence @type n_iterations: 1|0 @param z: number of standard deviations for outlier definition (default: 2) @type z: float @param eps_rmsd: tolerance in rmsd (default: 0.5) @type eps_rmsd: float @param eps_stdv: tolerance in standard deviations (default: 0.05) @type eps_stdv: float @return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ] @rtype: (array, array), [float, float, int] """ iter_trace = [] rmsd_old = 0 stdv_old = 0 n = 0 converged = 0 mask = N0.ones(len(y), N0.Int32) while not converged: ## find transformation for best match r, t = findTransformation(N0.compress(mask, x, 0), N0.compress(mask, y, 0)) ## transform coordinates xt = N0.dot(y, N0.transpose(r)) + t ## calculate row distances d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask ## calculate rmsd and stdv rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2)) stdv = MU.SD(N0.compress(mask, d)) ## check conditions for convergence d_rmsd = abs(rmsd - rmsd_old) d_stdv = abs(1 - stdv_old / stdv) if d_rmsd < eps_rmsd and d_stdv < eps_stdv: converged = 1 else: rmsd_old = rmsd stdv_old = stdv ## store result perc = round(float(N0.sum(mask)) / float(len(mask)), 2) ## throw out non-matching rows mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv)) outliers = N0.nonzero(N0.logical_not(mask)) iter_trace.append([perc, round(rmsd, 3), outliers]) n += 1 if n_iterations and n >= n_iterations: break return (r, t), iter_trace
def getFluct_local( self, mask=None, border_res=1, left_atoms=['C'], right_atoms=['N'], verbose=1 ): """ Get mean displacement of each atom from it's average position after fitting of each residue to the reference backbone coordinates of itself and selected atoms of neighboring residues to the right and left. @param mask: N_atoms x 1 array of 0||1, atoms for which fluctuation should be calculated @type mask: array @param border_res: number of neighboring residues to use for fitting @type border_res: int @param left_atoms: atoms (names) to use from these neighbore residues @type left_atoms: [str] @param right_atoms: atoms (names) to use from these neighbore residues @type right_atoms: [str] @return: Numpy array ( N_unmasked x 1 ) of float @rtype: array """ if mask is None: mask = N0.ones( len( self.frames[0] ), N0.Int32 ) if verbose: T.errWrite( "rmsd fitting per residue..." ) residues = N0.nonzero( self.ref.atom2resMask( mask ) ) ## backbone atoms used for fit fit_atoms_right = N0.nonzero( self.ref.mask( right_atoms ) ) fit_atoms_left = N0.nonzero( self.ref.mask( left_atoms ) ) ## chain index of each residue rchainMap = N0.take( self.ref.chainMap(), self.ref.resIndex() ) result = [] for res in residues: i_res, i_border = self.__resWindow(res, border_res, rchainMap, fit_atoms_left, fit_atoms_right) try: if not len( i_res ): raise PDBError, 'empty residue' t_res = self.takeAtoms( i_res + i_border ) i_center = range( len( i_res ) ) mask_BB = t_res.ref.maskBB() * t_res.ref.maskHeavy() ## fit with border atoms .. t_res.fit( ref=t_res.ref, mask=mask_BB, verbose=0 ) ## .. but calculate only with center residue atoms frames = N0.take( t_res.frames, i_center, 1 ) avg = N0.average( frames ) rmsd = N0.average(N0.sqrt(N0.sum(N0.power(frames - avg, 2), 2) )) result.extend( rmsd ) if verbose: T.errWrite('#') except ZeroDivisionError: result.extend( N0.zeros( len(i_res), N0.Float32 ) ) T.errWrite('?' + str( res )) if verbose: T.errWriteln( "done" ) return result
def fit( self, mask=None, ref=None, n_it=1, prof='rms', verbose=1, fit=1, **profInfos ): """ Superimpose all coordinate frames on reference coordinates. Put rms values in a profile. If n_it > 1, the fraction of atoms considered for the fit is put into a profile called |prof|_considered (i.e. by default 'rms_considered'). @param mask: atom mask, atoms to consider default: [all] @type mask: [1|0] @param ref: use as reference, default: None, average Structure @type ref: PDBModel @param n_it: number of fit iterations, kicking out outliers on the way 1 -> classic single fit, 0 -> until convergence (default: 1) @type n_it: int @param prof: save rms per frame in profile of this name, ['rms'] @type prof: str @param verbose: print progress info to STDERR (default: 1) @type verbose: 1|0 @param fit: transform frames after match, otherwise just calc rms (default: 1) @type fit: 1|0 @param profInfos: additional key=value pairs for rms profile info [] @type profInfos: key=value """ if ref is None: refxyz = N0.average( self.frames, 0 ) else: refxyz = ref.getXyz() if mask is None: mask = N0.ones( len( refxyz ), N0.Int32 ) refxyz = N0.compress( mask, refxyz, 0 ) if verbose: T.errWrite( "rmsd fitting..." ) rms = [] ## rms value of each frame non_outliers = [] ## fraction of atoms considered for rms and fit iterations = [] ## number of iterations performed on each frame for i in range(0, len( self.frames) ): xyz = self.frames[i] if n_it != 1: (r, t), rmsdList = rmsFit.match( refxyz, N0.compress( mask, xyz, 0), n_it) iterations.append( len( rmsdList ) ) non_outliers.append( rmsdList[-1][0] ) xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t rms += [ rmsdList[-1][1] ] else: r, t = rmsFit.findTransformation( refxyz, N0.compress( mask, xyz, 0)) xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t d = N0.sqrt(N0.sum(N0.power( N0.compress(mask, xyz_transformed,0)\ - refxyz, 2), 1)) rms += [ N0.sqrt( N0.average(d**2) ) ] if fit: self.frames[i] = xyz_transformed.astype(N0.Float32) if verbose and i%100 == 0: T.errWrite( '#' ) self.setProfile( prof, rms, n_iterations=n_it, **profInfos ) if non_outliers: self.setProfile( prof+'_considered', non_outliers, n_iterations=n_it, comment='fraction of atoms considered for iterative fit' ) if verbose: T.errWrite( 'done\n' )
def calc_cluster_center(self, msm): p = N0.power(msm, self.w) ccenter = N0.transpose(N0.dot(p, self.data)) return N0.transpose(ccenter / N0.sum(p, 1))
def nonFuzzyIndex(self): p = N0.power(self.msm, self.w) return (self.n_cluster*N0.sum(N0.sum(p))- self.npoints)/(self.npoints*(self.n_cluster-1))
def clusterPartitionCoefficient(self): return N0.sum(N0.power(self.msm, self.w), 1)/self.npoints