def compress( self, rec_mask, lig_mask ): """ Compress complex using a rec and lig mask. @param rec_mask: atom mask @type rec_mask: [1|0] @param lig_mask: atom mask @type lig_mask: [1|0] @return: compressed complex @rtype: Complex """ return self.take( N0.nonzero( rec_mask ), N0.nonzero( lig_mask ) )
def compress(self, rec_mask, lig_mask): """ Compress complex using a rec and lig mask. @param rec_mask: atom mask @type rec_mask: [1|0] @param lig_mask: atom mask @type lig_mask: [1|0] @return: compressed complex @rtype: Complex """ return self.take(N0.nonzero(rec_mask), N0.nonzero(lig_mask))
def __resWindow( self, res, n_neighbores, rchainMap=None, left_allowed=None, right_allowed=None ): """ Get indices of all atoms of a residue and some atoms of its neighboring residues (if they belong to the same chain). :param res: residue index :type res: int :param n_neighbores: number of residues to include right and left :type n_neighbores: int :param right_allowed: array 1 x N_atoms of 1|0, possible neighbore atoms :type right_allowed: array :param left_allowed: array 1 x N_atoms of 1|0, possible neighbore atoms :type left_allowed: array :param rchainMap: array 1 x N_residues of int, chain id of each res :type rchainMap: array :return: atoms of res, atoms of neighbores :rtype: [ int ], [ int ] """ ## some defaults.. time-consuming.. if rchainMap is None: rchainMap = N0.take( self.chainMap(), self.resIndex() ) if left_allowed is None: left_allowed = N0.nonzero( self.ref.maskBB() ) if right_allowed is None: right_allowed= N0.nonzero( self.ref.maskBB() ) ## atom indices of center residue result = self.ref.res2atomIndices( [ res ] ).tolist() ## get indices of neighbore residues that still belong to same chain l = self.ref.lenResidues() chain = rchainMap[res] outer_left = range( res-n_neighbores, res ) outer_right= range( res+1, res+n_neighbores+1 ) outer_left = [ i for i in outer_left if i > 0 and rchainMap[i]==chain] outer_right= [ i for i in outer_right if i < l and rchainMap[i]==chain] ## convert to atom indices, filter them against allowed neighbore atoms if outer_left: outer_left = self.ref.res2atomIndices( outer_left ) outer_left = MU.intersection( left_allowed, outer_left ) if outer_right: outer_right= self.ref.res2atomIndices( outer_right) outer_right= MU.intersection( right_allowed, outer_right) return result, outer_left + outer_right
def __cleanAtoms( self, m ): """ Remove non protein atoms and H if needed. :param m: model to clean :type m: PDBModel :return: cleaned model :rtype: PDBModel """ if self.protein: m.keep( N0.nonzero( m.maskProtein() ) ) if self.heavy: m.keep( N0.nonzero( m.maskHeavy() ) ) return m
def __cleanAtoms(self, m): """ Remove non protein atoms and H if needed. :param m: model to clean :type m: PDBModel :return: cleaned model :rtype: PDBModel """ if self.protein: m.keep(N0.nonzero(m.maskProtein())) if self.heavy: m.keep(N0.nonzero(m.maskHeavy())) return m
def removeAtoms( self, what ): """ Remove atoms from all frames of trajectory and from reference structure. :param what: Specify what atoms to remove:: - function( atom_dict ) -> 1 || 0 or (1..remove) - list of int [4, 5, 6, 200, 201..], indices of atoms to remove - list of int [11111100001101011100..N_atoms], mask (1..remove) - int, remove atom with this index :type what: any :return: N0.array(1 x N_atoms_old) of 0||1, mask used to compress the atoms and xyz arrays. This mask can be used to apply the same change to another array of same dimension as the old(!) xyz and atoms. :rtype: array """ ## pass what on to PDBModel, collect resulting mask mask = N0.logical_not( self.atomMask( what ) ) self.keepAtoms( N0.nonzero( mask ) ) return mask
def __unmaskedMatrix(self, contacts, rec_mask, lig_mask): """ Map contacts between selected rec and lig atoms back to all atoms matrix. @param contacts: contact matrix, array sum_rec_mask x sum_lig_mask @type contacts: array @param rec_mask: atom mask @type rec_mask: [1|0] @param lig_mask: atom mask @type lig_mask: [1|0] @return: atom contact matrix, array N_atoms_rec x N_atoms_lig @rtype: array """ l_rec = len(self.rec_model) l_lig = len(self.lig_model) ## map contacts back to all atoms matrix r = N0.zeros(l_rec * l_lig) rMask = N0.ravel(N0.outerproduct(rec_mask, lig_mask)) ## (Optimization: nonzero is time consuming step) N0.put(r, N0.nonzero(rMask), N0.ravel(contacts)) return N0.resize(r, (l_rec, l_lig))
def __unmaskedMatrix( self, contacts, rec_mask, lig_mask ): """ Map contacts between selected rec and lig atoms back to all atoms matrix. @param contacts: contact matrix, array sum_rec_mask x sum_lig_mask @type contacts: array @param rec_mask: atom mask @type rec_mask: [1|0] @param lig_mask: atom mask @type lig_mask: [1|0] @return: atom contact matrix, array N_atoms_rec x N_atoms_lig @rtype: array """ l_rec = len( self.rec_model ) l_lig = len( self.lig_model ) ## map contacts back to all atoms matrix r = N0.zeros( l_rec * l_lig ) rMask = N0.ravel( N0.outerproduct( rec_mask, lig_mask ) ) ## (Optimization: nonzero is time consuming step) N0.put( r, N0.nonzero( rMask ), N0.ravel( contacts ) ) return N0.resize( r, (l_rec, l_lig))
def addDensity( self, radius=6, minasa=None, profName='density' ): """ Count the number of heavy atoms within the given radius. Values are only collected for atoms with |minasa| accessible surface area. @param minasa: relative exposed surface - 0 to 100% @type minasa: float @param radius: in Angstrom @type radius: float """ mHeavy = self.m.maskHeavy() xyz = N0.compress( mHeavy, self.m.getXyz(), 0 ) if minasa and self.m.profile( 'relAS', 0 ) == 0: self.addASA() if minasa: mSurf = self.m.profile2mask( 'relAS', minasa ) else: mSurf = N0.ones( self.m.lenAtoms() ) ## loop over all surface atoms surf_pos = N0.nonzero( mSurf ) contacts = [] for i in surf_pos: dist = N0.sum(( xyz - self.m.xyz[i])**2, 1) contacts += [ N0.sum( N0.less(dist, radius**2 )) -1] self.m.atoms.set( profName, contacts, mSurf, default=-1, comment='atom density radius %3.1fA' % radius, version= T.dateString() + ' ' + self.version() )
def __checkProfileIntegrity(self, profile, upperLimit=1.0, lowerLimit=-1.0): """ In some cases SurfaceRacer generates incorrect curvature values for some atoms. This function sets values outside a given range to 0 @param profile: profile name @type profile: str @param upperLimit: upper limit for a valid value (default: 1.0) @type upperLimit: float @param lowerLimit: lower limit for a valid value (default: -1.0) @type lowerLimit: float @return: profile with inspected values @rtype: [float] """ mask = N0.greater(profile, upperLimit) mask += N0.less(profile, lowerLimit) for i in N0.nonzero(mask): print('WARNING! Profile value %.2f set to O\n' % profile[i]) profile[i] = 0 return profile
def __checkProfileIntegrity( self, profile, upperLimit=1.0, lowerLimit=-1.0): """ In some cases SurfaceRacer generates incorrect curvature values for some atoms. This function sets values outside a given range to 0 @param profile: profile name @type profile: str @param upperLimit: upper limit for a valid value (default: 1.0) @type upperLimit: float @param lowerLimit: lower limit for a valid value (default: -1.0) @type lowerLimit: float @return: profile with inspected values @rtype: [float] """ mask = N0.greater( profile, upperLimit ) mask += N0.less( profile, lowerLimit ) for i in N0.nonzero(mask): print('WARNING! Profile value %.2f set to O\n'%profile[i]) profile[i] = 0 return profile
def memberFrames(self, threshold=0.): """ Get indices of all frames belonging to each cluster. Each frame is guaranteed to belong, at least, to the cluster for which it has its maximum membership. If threshold > 0, it can additionally pop up in other clusters. @param threshold: minimal cluster membership or 0 to consider only max membership (default: 0) @type threshold: float @return: n_cluster, lst of lst of int, frame indices @rtype: [[int]] """ ## best cluster for each frame msm = self.memberships() maxMemb = N0.argmax(msm, 0) r = [ N0.nonzero(N0.equal(maxMemb, i)) for i in range(0, self.n_clusters) ] r = [x.tolist() for x in r] ## same thing but now taking all above threshold ## -> same frame can end up in several clusters if threshold > 0.: r2 = [N0.nonzero(N0.greater(l, threshold)) for l in msm] ## add only additional frames for i in range(0, len(r)): try: frames = r[i].tolist() except: frames = r[i] r[i] = frames + [fr for fr in r2[i] if fr not in r[i]] ## sort frames within each cluster by their membership r = [self.membershipSort(r[i], i) for i in range(0, len(r))] return r
def compressMembers( self, mask ): """ Apply mask to member trajectories. :param mask: positions in trajectory list to keep or remove :type mask: [1|0] :return: compressed EnsembleTraj :rtype: EnsembleTraj """ return self.takeMembers( N0.nonzero( mask ) )
def compressMembers( self, mask ): """ Apply mask to member trajectories. :param mask: positions in trajectory list to keep or remove :type mask: [1|0] :return: compressed EnsembleTraj :rtype: EnsembleTraj """ return self.takeMembers( N0.nonzero( mask ) )
def __compressPca( self, fMask ): """ Compress PCA results using a frame mask. :param fMask: frame mask :type fMask: [1|0] :return: list of pca values :rtype: [float] """ return self.__takePca( N0.nonzero( fMask ) )
def compressFrames( self, mask ): """ Compress trajectory with a frame mask. :param mask: frame mask, 1 x N_frames :type mask: [1|0] :return: copy of this Trajectory (fewer frames, semi-deep copy of ref) :rtype: Trajectory """ return self.takeFrames( N0.nonzero( mask ) )
def filterFunct( self, f ): """ Get indices of items for which f( item ) == 1. @param f: f must take a single item as argument and return 1 or 0 @type f: function @return: array of int @rtype: array """ mask = [ f( c ) for c in self ] return N0.nonzero( mask )
def memberFrames( self, threshold=0. ): """ Get indices of all frames belonging to each cluster. Each frame is guaranteed to belong, at least, to the cluster for which it has its maximum membership. If threshold > 0, it can additionally pop up in other clusters. @param threshold: minimal cluster membership or 0 to consider only max membership (default: 0) @type threshold: float @return: n_cluster, lst of lst of int, frame indices @rtype: [[int]] """ ## best cluster for each frame msm = self.memberships() maxMemb = N0.argmax( msm, 0 ) r = [N0.nonzero( N0.equal(maxMemb, i) ) for i in range(0, self.n_clusters)] r = [ x.tolist() for x in r ] ## same thing but now taking all above threshold ## -> same frame can end up in several clusters if threshold > 0.: r2 = [ N0.nonzero( N0.greater( l, threshold) ) for l in msm ] ## add only additional frames for i in range(0, len( r ) ): try: frames = r[i].tolist() except: frames = r[i] r[i] = frames + [ fr for fr in r2[i] if fr not in r[i] ] ## sort frames within each cluster by their membership r = [ self.membershipSort( r[i], i) for i in range(0, len(r) )] return r
def __inverseIndices( self, model, i_atoms ): """ :param model: model :type model: PDBMode :param i_atoms: atom index :type i_atoms: [int] :return: remaining atom indices of m that are NOT in i_atoms :rtype: [int] """ mask = N0.zeros( len( model ),N0.Int ) N0.put( mask, i_atoms, 1 ) return N0.nonzero( N0.logical_not( mask ) )
def __inverseIndices( self, model, i_atoms ): """ :param model: model :type model: PDBMode :param i_atoms: atom index :type i_atoms: [int] :return: remaining atom indices of m that are NOT in i_atoms :rtype: [int] """ mask = N0.zeros( len( model ),N0.Int ) N0.put( mask, i_atoms, 1 ) return N0.nonzero( N0.logical_not( mask ) )
def compress( self, mask, deepcopy=0 ): """ Extract certain items. @param mask: mask of positions; len( mask ) == len( self ) @type mask: [ 1|0 ] @param deepcopy: deepcopy items (default: 0) @type deepcopy: 1|0 @return: new instance (or sub-class) with specified items @rtype: instance """ return self.take( N0.nonzero( mask ), deepcopy=deepcopy )
def filterEqual( self, key, lst ): """ Get indices of items for which item[ key ] in lst. @param key: item attribute @type key: any @param lst: [ any ], list of allowed values @type lst: list @return: array of int @rtype: array """ mask = [ self.getValue( i,key) in lst for i in range( len(self)) ] return N0.nonzero( mask )
def compressAtoms( self, aMask, returnClass=None ): """ Get copy of this trajectory with only atoms marked 1 in aMask. :param aMask: atom mask [10011100101111...], lst 1 x N_atoms of 1(keep) or 0 :type aMask: [1|0] :param returnClass: default: None, same class as this object :type returnClass: class :return: copy of Trajectory with fewer atoms :rtype: Trajectory """ return self.takeAtoms( N0.nonzero( aMask ), returnClass )
def random2DArray(matrix, ranNr=1, mask=None): """ Create randomized 2D array containing ones and zeros. :param matrix: matrix to randomize :type matrix: 2D array :param mask: mask OR None (default: None) :type mask: list(1|0) :param ranNr: number of matricies to add up (default: 1) :type ranNr: integer :return: 2D array or |ranNr| added contact matricies :rtype:2D array :raise MathUtilError: if mask does not fit matrix """ ## get shape of matrix a, b = N0.shape(matrix) ## get array from matrix that is to be randomized if mask is not None: if len(mask) == len(N0.ravel(matrix)): array = N0.compress(mask, N0.ravel(matrix)) if len(mask) != len(N0.ravel(matrix)): raise MathUtilError( 'MatUtils.random2DArray - mask of incorrect length' + '\tMatrix length: %i Mask length: %i'\ %(len( N0.ravel(matrix) ), len(mask))) if not mask: array = N0.ravel(matrix) ## number of ones and length of array nOnes = int(N0.sum(array)) lenArray = len(array) ranArray = N0.zeros(lenArray) ## create random array for n in range(ranNr): ranArray += randomMask(nOnes, lenArray) ## blow up to size of original matix if mask is not None: r = N0.zeros(a * b) N0.put(r, N0.nonzero(mask), ranArray) return N0.reshape(r, (a, b)) if not mask: return N0.reshape(ranArray, (a, b))
def random2DArray( matrix, ranNr=1, mask=None): """ Create randomized 2D array containing ones and zeros. :param matrix: matrix to randomize :type matrix: 2D array :param mask: mask OR None (default: None) :type mask: list(1|0) :param ranNr: number of matricies to add up (default: 1) :type ranNr: integer :return: 2D array or |ranNr| added contact matricies :rtype:2D array :raise MathUtilError: if mask does not fit matrix """ ## get shape of matrix a,b = N0.shape( matrix ) ## get array from matrix that is to be randomized if mask is not None: if len(mask) == len( N0.ravel(matrix) ): array = N0.compress( mask, N0.ravel(matrix) ) if len(mask) != len( N0.ravel(matrix) ): raise MathUtilError( 'MatUtils.random2DArray - mask of incorrect length' + '\tMatrix length: %i Mask length: %i'\ %(len( N0.ravel(matrix) ), len(mask))) if not mask: array = N0.ravel(matrix) ## number of ones and length of array nOnes = int( N0.sum( array ) ) lenArray = len( array ) ranArray = N0.zeros( lenArray ) ## create random array for n in range(ranNr): ranArray += randomMask( nOnes, lenArray ) ## blow up to size of original matix if mask is not None: r = N0.zeros(a*b) N0.put( r, N0.nonzero(mask), ranArray) return N0.reshape( r, (a,b) ) if not mask: return N0.reshape( ranArray, (a,b) )
def loadTraj(f, trajIndex, start=0, end=None, step=1, prot=False): """Load traj from file, add frame names, extract portion if requested""" t = T.load(T.absfile(f)) addFrameNames(t, trajIndex) e = end or len(t) if start or end or (step != 1): t = t.takeFrames(list(range(start, e, step))) if prot: t.keepAtoms(N0.nonzero(t.ref.maskProtein())) return t
def filterFunct( self, f ): """ Get indices of Complexes where f( c ) == 1. Use:: filterFunct( f ) @param f: filterFunct @type f: function @return: array of int @rtype: [int] """ mask = [ f( c ) for c in self ] return N0.nonzero( mask )
def filterFunct(self, f): """ Get indices of Complexes where f( c ) == 1. Use:: filterFunct( f ) @param f: filterFunct @type f: function @return: array of int @rtype: [int] """ mask = [f(c) for c in self] return N0.nonzero(mask)
def __center_model( self, model ): """ translate PDBModel so that it's center is in 0,0,0 @param model: model to center @type model: PDBModel @return: PDBModel (clone of model) @rtype: PDBModel """ r = model.clone() r.keep( N0.nonzero( N0.logical_not( r.maskH2O() ) ) ) center = r.centerOfMass() r.setXyz( r.getXyz() - center ) return r
def filterEqual(self, infoKey, lst): """ Get indices of Complexes where c.info[ infoKey ] in lst. Use:: filterEqual( infoKey, lst ) @param infoKey: key for info dict @type infoKey: str @param lst: list of values to look for @type lst: [any] @return: array of int @rtype: [int] """ mask = [c.info.get(infoKey) in lst for c in self] return N0.nonzero(mask)
def filterEqual( self, infoKey, lst ): """ Get indices of Complexes where c.info[ infoKey ] in lst. Use:: filterEqual( infoKey, lst ) @param infoKey: key for info dict @type infoKey: str @param lst: list of values to look for @type lst: [any] @return: array of int @rtype: [int] """ mask = [ c.info.get( infoKey ) in lst for c in self ] return N0.nonzero( mask )
def packBinaryMatrix( cm ): """ Compress sparse array of 0 and ones to list of one-positions (space saving function, upack with :class:`unpackBinaryMatrix`). :param cm: X by Y array of int :type cm: 2D array :return: {'shape':(X,Y), 'nonzero':[int] } :rtype: dict """ if cm is None or type( cm ) == dict: return cm result = {} result['shape'] = N0.shape( cm ) result['nonzero'] = N0.nonzero( N0.ravel( cm ) ) result['nonzero'] = result['nonzero'].tolist() return result
def packBinaryMatrix(cm): """ Compress sparse array of 0 and ones to list of one-positions (space saving function, upack with :class:`unpackBinaryMatrix`). :param cm: X by Y array of int :type cm: 2D array :return: {'shape':(X,Y), 'nonzero':[int] } :rtype: dict """ if cm is None or type(cm) == dict: return cm result = {} result['shape'] = N0.shape(cm) result['nonzero'] = N0.nonzero(N0.ravel(cm)) result['nonzero'] = result['nonzero'].tolist() return result
def __setChainID(self, m, ids): """ set chaiID for Hex pdb files @param m: model @type m: PDBModel @param ids: chain id, len(ids) == m.lenChains @type ids: [str] @return: m is changed directly @rtype: PDBModel """ if ids: ids = t.toList(ids) cMap = m.chainMap() for chain in range(m.lenChains()): idx = N0.nonzero(cMap == chain) for i in idx: m.atoms['chain_id'][i] = ids[chain]
def __setChainID( self, m, ids ): """ set chaiID for Hex pdb files @param m: model @type m: PDBModel @param ids: chain id, len(ids) == m.lenChains @type ids: [str] @return: m is changed directly @rtype: PDBModel """ if ids: ids = t.toList( ids ) cMap = m.chainMap() for chain in range( m.lenChains() ): idx = N0.nonzero( cMap == chain ) for i in idx: m.atoms['chain_id'][i] = ids[chain]
def filterRange( self, key, vLow, vHigh ): """ Get indices of items where vLow <= item[ key ] <= vHigh. @param key: item attribute @type key: any @param vLow: lower bound @type vLow: any @param vHigh: upper bound @type vHigh: any @return: array of int @rtype: array """ vLst = self.valuesOf( key ) maskL = N0.greater_equal( vLst, vLow ) maskH = N0.less_equal( vLst, vHigh ) return N0.nonzero( maskL * maskH )
def slim(self): """ Remove coordinates and atoms of ligand and receptor from memory, if they can be restored from file, compress contact matrix. @note: CALLED BEFORE PICKLING """ self.lig_transformed = None self.pw_dist = None ## self.ligandMatrix = self.ligandMatrix.tolist() if 'matrix' in self.info: del self.info['matrix'] ## compress contact matrix array if self.contacts is not None and \ len(N0.shape( self.contacts['result'] ) )==2: m = self.contacts['result'] self.contacts['shape'] = N0.shape(m) self.contacts['result'] = N0.nonzero(N0.ravel(m)).astype(N0.Int32)
def slim(self): """ Remove coordinates and atoms of ligand and receptor from memory, if they can be restored from file, compress contact matrix. @note: CALLED BEFORE PICKLING """ self.lig_transformed = None self.pw_dist = None ## self.ligandMatrix = self.ligandMatrix.tolist() if 'matrix' in self.info: del self.info['matrix'] ## compress contact matrix array if self.contacts is not None and \ len(N0.shape( self.contacts['result'] ) )==2: m = self.contacts['result'] self.contacts['shape'] = N0.shape( m ) self.contacts['result'] = N0.nonzero( N0.ravel( m ) ).astype(N0.Int32)
def filterRange(self, infoKey, vLow, vHigh): """ Get indices of Complexes where vLow <= c.info[ infoKey ] <= vHigh. Use:: filterRange( str_infoKey, vLow, vHigh ) @param infoKey: key for info dict @type infoKey: str @param vLow: upper value limit @type vLow: float @param vHigh: lower value limit @type vHigh: float @return: array of int @rtype: [int] """ vLst = self.valuesOf(infoKey) maskL = N0.greater_equal(vLst, vLow) maskH = N0.less_equal(vLst, vHigh) return N0.nonzero(maskL * maskH)
def filterRange( self, infoKey, vLow, vHigh ): """ Get indices of Complexes where vLow <= c.info[ infoKey ] <= vHigh. Use:: filterRange( str_infoKey, vLow, vHigh ) @param infoKey: key for info dict @type infoKey: str @param vLow: upper value limit @type vLow: float @param vHigh: lower value limit @type vHigh: float @return: array of int @rtype: [int] """ vLst = self.valuesOf( infoKey ) maskL = N0.greater_equal( vLst, vLow ) maskH = N0.less_equal( vLst, vHigh ) return N0.nonzero( maskL * maskH )
def addDensity(self, radius=6, minasa=None, profName='density'): """ Count the number of heavy atoms within the given radius. Values are only collected for atoms with |minasa| accessible surface area. @param minasa: relative exposed surface - 0 to 100% @type minasa: float @param radius: in Angstrom @type radius: float """ mHeavy = self.m.maskHeavy() xyz = N0.compress(mHeavy, self.m.getXyz(), 0) if minasa and self.m.profile('relAS', 0) == 0: self.addASA() if minasa: mSurf = self.m.profile2mask('relAS', minasa) else: mSurf = N0.ones(self.m.lenAtoms()) ## loop over all surface atoms surf_pos = N0.nonzero(mSurf) contacts = [] for i in surf_pos: dist = N0.sum((xyz - self.m.xyz[i])**2, 1) contacts += [N0.sum(N0.less(dist, radius**2)) - 1] self.m.atoms.set(profName, contacts, mSurf, default=-1, comment='atom density radius %3.1fA' % radius, version=T.dateString() + ' ' + self.version())
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05): """ Matches two arrays onto each other, while iteratively removing outliers. Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }. :param n_iterations: number of calculations:: 1 .. no iteration 0 .. until convergence :type n_iterations: 1|0 :param z: number of standard deviations for outlier definition (default: 2) :type z: float :param eps_rmsd: tolerance in rmsd (default: 0.5) :type eps_rmsd: float :param eps_stdv: tolerance in standard deviations (default: 0.05) :type eps_stdv: float :return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ] :rtype: (array, array), [float, float, int] """ iter_trace = [] rmsd_old = 0 stdv_old = 0 n = 0 converged = 0 mask = N0.ones(len(y), N0.Int32 ) while not converged: ## find transformation for best match r, t = findTransformation(N0.compress(mask, x, 0), N0.compress(mask, y, 0)) ## transform coordinates xt = N0.dot(y, N0.transpose(r)) + t ## calculate row distances d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask ## calculate rmsd and stdv rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2)) stdv = MU.SD(N0.compress(mask, d)) ## check conditions for convergence d_rmsd = abs(rmsd - rmsd_old) d_stdv = abs(1 - stdv_old / stdv) if d_rmsd < eps_rmsd and d_stdv < eps_stdv: converged = 1 else: rmsd_old = rmsd stdv_old = stdv ## store result perc = round(float(N0.sum(mask)) / float(len(mask)), 2) ## throw out non-matching rows mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv)) outliers = N0.nonzero( N0.logical_not( mask ) ) iter_trace.append([perc, round(rmsd, 3), outliers]) n += 1 if n_iterations and n >= n_iterations: break return (r, t), iter_trace
def loadTraj(self, fname, shift=0): """ Load single trajectory. """ if self.verbose: self.log.write('Loading %s...' % fname) traj = self.load_locked(fname) if self.verbose: self.log.add('Processing trajectory...') ## convert single member frame index into all member frame index if (self.sstart or self.sstop) and isinstance(traj, EnsembleTraj): self.start = (self.sstart or 0) * traj.n_members self.stop = (self.sstop or 0) * traj.n_members if (self.sstart or self.sstop) and not isinstance(traj, EnsembleTraj): self.start, self.stop = self.sstart, self.sstop if self.verbose: self.log.add('Warning: I am using -ss -se instead of -s -e') ## remove unwanted frames if self.start or self.stop: start, stop = self.start, self.stop or len(traj) traj = traj[start:stop] ## stepping (offset) if self.step > 1: traj = traj.thin(self.step) ## thin with random stepping, use same frames from all trajectories if self.thin: targetLength = int(round(len(traj) * self.thin)) self.thin_i = self.thin_i or \ MU.randomRange(0, len( traj ), targetLength ) traj = traj.takeFrames(self.thin_i) if self.verbose: self.log.add("Thinned to %i frames." % len(traj)) ## keep only allowed atoms (default: all) if self.atoms: traj.ref.addChainId() aMask = traj.ref.mask(lambda a, ok=self.atoms: a['name'] in ok) traj.removeAtoms(N0.nonzero(N0.logical_not(aMask))) ## get rid of non-standard atoms, water, ions, etc. if not self.solvent: l = traj.lenAtoms() traj = traj.compressAtoms(N0.logical_not(traj.ref.maskSolvent())) if self.verbose: self.log.add('%i solvent/ion atoms deleted.' % (l - traj.lenAtoms())) ## delete hydrogens, if requested if self.heavy: l = traj.lenAtoms() traj = traj.compressAtoms(traj.ref.maskHeavy()) if self.verbose: self.log.add('%i hydrogens deleted.' % (l - traj.lenAtoms())) return traj
def loadTraj( self, fname, shift=0 ): """ Load single trajectory. """ if self.verbose: self.log.write( 'Loading %s...' % fname) traj = self.load_locked( fname ) if self.verbose: self.log.add( 'Processing trajectory...') ## convert single member frame index into all member frame index if (self.sstart or self.sstop) and isinstance(traj, EnsembleTraj): self.start = (self.sstart or 0) * traj.n_members self.stop = (self.sstop or 0) * traj.n_members if (self.sstart or self.sstop) and not isinstance(traj, EnsembleTraj): self.start, self.stop = self.sstart, self.sstop if self.verbose: self.log.add('Warning: I am using -ss -se instead of -s -e') ## remove unwanted frames if self.start or self.stop: start, stop = self.start, self.stop or len(traj) traj = traj[ start : stop ] ## stepping (offset) if self.step > 1: traj = traj.thin( self.step ) ## thin with random stepping, use same frames from all trajectories if self.thin: targetLength = int( round( len( traj ) * self.thin ) ) self.thin_i = self.thin_i or \ MU.randomRange(0, len( traj ), targetLength ) traj = traj.takeFrames( self.thin_i ) if self.verbose: self.log.add( "Thinned to %i frames." % len( traj ) ) ## keep only allowed atoms (default: all) if self.atoms: traj.ref.addChainId() aMask = traj.ref.mask( lambda a,ok=self.atoms: a['name'] in ok ) traj.removeAtoms( N0.nonzero( N0.logical_not( aMask ) ) ) ## get rid of non-standard atoms, water, ions, etc. if not self.solvent: l = traj.lenAtoms() traj = traj.compressAtoms( N0.logical_not(traj.ref.maskSolvent()) ) if self.verbose: self.log.add('%i solvent/ion atoms deleted.'% (l- traj.lenAtoms())) ## delete hydrogens, if requested if self.heavy: l = traj.lenAtoms() traj = traj.compressAtoms( traj.ref.maskHeavy() ) if self.verbose: self.log.add('%i hydrogens deleted.' % (l - traj.lenAtoms()) ) return traj
def getFluct_local( self, mask=None, border_res=1, left_atoms=['C'], right_atoms=['N'], verbose=1 ): """ Get mean displacement of each atom from it's average position after fitting of each residue to the reference backbone coordinates of itself and selected atoms of neighboring residues to the right and left. :param mask: N_atoms x 1 array of 0||1, atoms for which fluctuation should be calculated :type mask: array :param border_res: number of neighboring residues to use for fitting :type border_res: int :param left_atoms: atoms (names) to use from these neighbore residues :type left_atoms: [str] :param right_atoms: atoms (names) to use from these neighbore residues :type right_atoms: [str] :return: Numpy array ( N_unmasked x 1 ) of float :rtype: array """ if mask is None: mask = N0.ones( len( self.frames[0] ), N0.Int32 ) if verbose: T.errWrite( "rmsd fitting per residue..." ) residues = N0.nonzero( self.ref.atom2resMask( mask ) ) ## backbone atoms used for fit fit_atoms_right = N0.nonzero( self.ref.mask( right_atoms ) ) fit_atoms_left = N0.nonzero( self.ref.mask( left_atoms ) ) ## chain index of each residue rchainMap = N0.take( self.ref.chainMap(), self.ref.resIndex() ) result = [] for res in residues: i_res, i_border = self.__resWindow(res, border_res, rchainMap, fit_atoms_left, fit_atoms_right) try: if not len( i_res ): raise PDBError('empty residue') t_res = self.takeAtoms( i_res + i_border ) i_center = range( len( i_res ) ) mask_BB = t_res.ref.maskBB() * t_res.ref.maskHeavy() ## fit with border atoms .. t_res.fit( ref=t_res.ref, mask=mask_BB, verbose=0 ) ## .. but calculate only with center residue atoms frames = N0.take( t_res.frames, i_center, 1 ) avg = N0.average( frames ) rmsd = N0.average(N0.sqrt(N0.sum(N0.power(frames - avg, 2), 2) )) result.extend( rmsd ) if verbose: T.errWrite('#') except ZeroDivisionError: result.extend( N0.zeros( len(i_res), N0.Float32 ) ) T.errWrite('?' + str( res )) if verbose: T.errWriteln( "done" ) return result
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05): """ Matches two arrays onto each other, while iteratively removing outliers. Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }. :param n_iterations: number of calculations:: 1 .. no iteration 0 .. until convergence :type n_iterations: 1|0 :param z: number of standard deviations for outlier definition (default: 2) :type z: float :param eps_rmsd: tolerance in rmsd (default: 0.5) :type eps_rmsd: float :param eps_stdv: tolerance in standard deviations (default: 0.05) :type eps_stdv: float :return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ] :rtype: (array, array), [float, float, int] """ iter_trace = [] rmsd_old = 0 stdv_old = 0 n = 0 converged = 0 mask = N0.ones(len(y), N0.Int32) while not converged: ## find transformation for best match r, t = findTransformation(N0.compress(mask, x, 0), N0.compress(mask, y, 0)) ## transform coordinates xt = N0.dot(y, N0.transpose(r)) + t ## calculate row distances d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask ## calculate rmsd and stdv rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2)) stdv = MU.SD(N0.compress(mask, d)) ## check conditions for convergence d_rmsd = abs(rmsd - rmsd_old) d_stdv = abs(1 - stdv_old / stdv) if d_rmsd < eps_rmsd and d_stdv < eps_stdv: converged = 1 else: rmsd_old = rmsd stdv_old = stdv ## store result perc = round(float(N0.sum(mask)) / float(len(mask)), 2) ## throw out non-matching rows mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv)) outliers = N0.nonzero(N0.logical_not(mask)) iter_trace.append([perc, round(rmsd, 3), outliers]) n += 1 if n_iterations and n >= n_iterations: break return (r, t), iter_trace