def addDensity( self, radius=6, minasa=None, profName='density' ): """ Count the number of heavy atoms within the given radius. Values are only collected for atoms with |minasa| accessible surface area. @param minasa: relative exposed surface - 0 to 100% @type minasa: float @param radius: in Angstrom @type radius: float """ mHeavy = self.m.maskHeavy() xyz = N0.compress( mHeavy, self.m.getXyz(), 0 ) if minasa and self.m.profile( 'relAS', 0 ) == 0: self.addASA() if minasa: mSurf = self.m.profile2mask( 'relAS', minasa ) else: mSurf = N0.ones( self.m.lenAtoms() ) ## loop over all surface atoms surf_pos = N0.nonzero( mSurf ) contacts = [] for i in surf_pos: dist = N0.sum(( xyz - self.m.xyz[i])**2, 1) contacts += [ N0.sum( N0.less(dist, radius**2 )) -1] self.m.atoms.set( profName, contacts, mSurf, default=-1, comment='atom density radius %3.1fA' % radius, version= T.dateString() + ' ' + self.version() )
def linfit(x, y): """ Calculate linear least-square fit to the points given by x and y. see U{http://mathworld.wolfram.com/LeastSquaresFitting.html} :param x: x-data :type x: [ float ] :param y: y-data :type y: [ float ] :return: m, n, r^2 (slope, intersection, corr. coefficient) :rtype: float, float, float :raise BiskitError: if x and y have different number of elements """ x, y = N0.array(x, N0.Float64), N0.array(y, N0.Float64) if len(x) != len(y): raise Exception('linfit: x and y must have same length') av_x = N0.average(x) av_y = N0.average(y) n = len(x) ss_xy = N0.sum(x * y) - n * av_x * av_y ss_xx = N0.sum(x * x) - n * av_x * av_x ss_yy = N0.sum(y * y) - n * av_y * av_y slope = ss_xy / ss_xx inter = av_y - slope * av_x corr = ss_xy**2 / (ss_xx * ss_yy) return slope, inter, corr
def __atomContacts(self, cutoff, rec_mask, lig_mask, cache): """ Intermolecular distances below cutoff after applying the two masks. @param cutoff: cutoff for B{atom-atom} contact in \AA @type cutoff: float @param rec_mask: atom mask @type rec_mask: [1|0] @param lig_mask: atom mask @type lig_mask: [1|0] @param cache: cache pairwise atom distance matrix @type cache: 1|0 @return: atom contact matrix, array sum_rec_mask x sum_lig_mask @rtype: array """ ## get atom coordinats as array 3 x all_atoms rec_xyz = self.rec().getXyz() lig_xyz = self.lig().getXyz() ## get pair-wise distances -> atoms_rec x atoms_lig dist = getattr(self, 'pw_dist', None) if dist is None or \ N0.shape( dist ) != ( N0.sum(rec_mask), N0.sum(lig_mask) ): dist = self.__pairwiseDistances(N0.compress(rec_mask, rec_xyz, 0), N0.compress(lig_mask, lig_xyz, 0)) if cache: self.pw_dist = dist ## reduce to 1 (distance < cutoff) or 0 -> n_atoms_rec x n_atoms_lig return N0.less(dist, cutoff)
def contactResDistribution( self, cm=None ): """ Count occurrence of residues in protein-protein interface. @param cm: pre-calculated contact matrix (default: None) @type cm: matrix @return: dict {'A':3, 'C':1, .. } (20 standard amino acids) @rtype: dict """ if cm is None: cm = self.resContacts() ## get mask for residues involved in contacts maskLig = N0.sum( cm ) maskRec = N0.sum( N0.transpose( cm )) ## get sequence of contact residues only seqLig = N0.compress( maskLig, self.lig().sequence() ) seqRec = N0.compress( maskRec, self.rec().sequence() ) seq = ''.join( seqLig ) + ''.join(seqRec) ## convert back to string ## count occurrence of letters result = {} for aa in molUtils.allAA(): result[aa] = seq.count( aa ) return result
def __findTransformation(self, x, y): """ Match two arrays by rotation and translation. Returns the rotation matrix and the translation vector. Back transformation: for atom i new coordinates will be:: y_new[i] = N0.dot(r, y[i]) + t for all atoms in one step:: y_new = N0.dot(y, N0.transpose(r)) + t @param x: coordinates @type x: array @param y: coordinates @type y: array @return: rotation matrix, translation vector @rtype: array, array @author: Michael Habeck """ from numpy.linalg import svd ## center configurations x_av = N0.sum(x) / len(x) y_av = N0.sum(y) / len(y) x = x - x_av y = y - y_av ## svd of correlation matrix v, l, u = svd(N0.dot(N0.transpose(x), y)) ## build rotation matrix and translation vector r = N0.dot(v, u) t = x_av - N0.dot(r, y_av) return r, t
def __atomContacts(self, cutoff, rec_mask, lig_mask, cache): """ Intermolecular distances below cutoff after applying the two masks. @param cutoff: cutoff for B{atom-atom} contact in \AA @type cutoff: float @param rec_mask: atom mask @type rec_mask: [1|0] @param lig_mask: atom mask @type lig_mask: [1|0] @param cache: cache pairwise atom distance matrix @type cache: 1|0 @return: atom contact matrix, array sum_rec_mask x sum_lig_mask @rtype: array """ ## get atom coordinats as array 3 x all_atoms rec_xyz = self.rec().getXyz() lig_xyz = self.lig().getXyz() ## get pair-wise distances -> atoms_rec x atoms_lig dist = getattr( self, 'pw_dist', None ) if dist is None or \ N0.shape( dist ) != ( N0.sum(rec_mask), N0.sum(lig_mask) ): dist = self.__pairwiseDistances(N0.compress( rec_mask, rec_xyz, 0), N0.compress( lig_mask, lig_xyz, 0) ) if cache: self.pw_dist = dist ## reduce to 1 (distance < cutoff) or 0 -> n_atoms_rec x n_atoms_lig return N0.less( dist, cutoff )
def contactResDistribution(self, cm=None): """ Count occurrence of residues in protein-protein interface. @param cm: pre-calculated contact matrix (default: None) @type cm: matrix @return: dict {'A':3, 'C':1, .. } (20 standard amino acids) @rtype: dict """ if cm is None: cm = self.resContacts() ## get mask for residues involved in contacts maskLig = N0.sum(cm) maskRec = N0.sum(N0.transpose(cm)) ## get sequence of contact residues only seqLig = N0.compress(maskLig, self.lig().sequence()) seqRec = N0.compress(maskRec, self.rec().sequence()) seq = ''.join(seqLig) + ''.join(seqRec) ## convert back to string ## count occurrence of letters result = {} for aa in molUtils.allAA(): result[aa] = seq.count(aa) return result
def linfit( x, y ): """ Calculate linear least-square fit to the points given by x and y. see U{http://mathworld.wolfram.com/LeastSquaresFitting.html} :param x: x-data :type x: [ float ] :param y: y-data :type y: [ float ] :return: m, n, r^2 (slope, intersection, corr. coefficient) :rtype: float, float, float :raise BiskitError: if x and y have different number of elements """ x, y = N0.array( x, N0.Float64), N0.array( y, N0.Float64) if len( x ) != len( y ): raise Exception('linfit: x and y must have same length') av_x = N0.average( x ) av_y = N0.average( y ) n = len( x ) ss_xy = N0.sum( x * y ) - n * av_x * av_y ss_xx = N0.sum( x * x ) - n * av_x * av_x ss_yy = N0.sum( y * y ) - n * av_y * av_y slope = ss_xy / ss_xx inter = av_y - slope * av_x corr = ss_xy**2 / ( ss_xx * ss_yy ) return slope, inter, corr
def wVar(x, w): """ Variance of weighted (w) data (x). :param x: X-D array with numbers :type x: array :param w: 1-D array of same length as x with weight factors :type w: array :return: array('f') or float :rtype: array('f') or float """ wm = wMean(x,w) return ( N0.sum(w) / ( (N0.sum(w)**2-N0.sum(w**2)) ) ) * N0.sum(w*(x-wm)**2)
def contactsDiff(self, ref, cutoff=None): """ Number of different B{residue-residue} contacts in this and reference complex. @param ref: to compare this one with @type ref: Complex @param cutoff: maximal atom-atom distance, None .. previous setting @type cutoff: float @return: number of contacts different in this and refererence complex. @rtype: int """ both = N0.logical_or(self.resContacts(cutoff), ref.resContacts(cutoff)) return N0.sum(N0.sum(both)) - self.contactsShared(ref, cutoff)
def contactsDiff(self, ref, cutoff=None): """ Number of different B{residue-residue} contacts in this and reference complex. @param ref: to compare this one with @type ref: Complex @param cutoff: maximal atom-atom distance, None .. previous setting @type cutoff: float @return: number of contacts different in this and refererence complex. @rtype: int """ both = N0.logical_or( self.resContacts(cutoff), ref.resContacts(cutoff)) return N0.sum(N0.sum(both)) - self.contactsShared( ref, cutoff )
def wVar(x, w): """ Variance of weighted (w) data (x). :param x: X-D array with numbers :type x: array :param w: 1-D array of same length as x with weight factors :type w: array :return: array('f') or float :rtype: array('f') or float """ wm = wMean(x, w) return (N0.sum(w) / ((N0.sum(w)**2 - N0.sum(w**2)))) * N0.sum(w * (x - wm)**2)
def group( self, a_indices, maxPerCenter ): """ Group a bunch of integers (atom indices in PDBModel) so that each group has at most maxPerCenter items. @param a_indices: atom indices @type a_indices: [int] @param maxPerCenter: max entries per group @type maxPerCenter: int @return: list of lists of int @rtype: [[int],[int]..] """ ## how many groups are necessary? n_centers = len( a_indices ) // maxPerCenter ## floor division if len( a_indices ) % maxPerCenter: n_centers += 1 ## how many items/atoms go into each group? nAtoms = N0.ones(n_centers, N0.Int) * int(len( a_indices ) / n_centers) i=0 while N0.sum(nAtoms) != len( a_indices ): nAtoms[i] += 1 i += 1 ## distribute atom indices into groups result = [] pos = 0 for n in nAtoms: result += [ N0.take( a_indices, N0.arange(n) + pos) ] pos += n return result
def randomMask(nOnes, length): """ Create random array of given lenght and number of ones. :param nOnes: number of ones :type nOnes: int :param length: lenght of array :type length: int :return: array with ones and zeros :rtype: array( 1|0 ) """ r = N0.zeros(length) pos = [] ## add random ones for i in range(nOnes): pos += [int(random.random() * length)] N0.put(r, pos, 1) ## if two ones ended up on the same position while nOnes != N0.sum(r): pos = int(random.random() * length) N0.put(r, pos, 1) return r
def reduceToModel( self, xyz=None, reduce_profiles=1 ): """ Create a reduced PDBModel from coordinates. Atom profiles the source PDBModel are reduced by averaging over the grouped atoms. @param xyz: coordinte array (N_atoms x 3) or None (->use reference coordinates) @type xyz: array OR None @return: PDBModel with reduced atom set and profile 'mass' @rtype: PDBModel """ mass = self.m.atoms.get('mass') if xyz is None: xyz = self.m.getXyz() mProf = [ N0.sum( N0.take( mass, group ) ) for group in self.groups ] xyz = self.reduceXyz( xyz ) result = PDBModel() for k in self.atoms.keys(): result.atoms.set( k, self.atoms.valuesOf(k) ) ## result.setAtoms( self.atoms ) result.setXyz( xyz ) result.atoms.set( 'mass', mProf ) if reduce_profiles: self.reduceAtomProfiles( self.m, result ) result.residues = self.m.residues return result
def test_rmsFit( self ): """rmsFit test""" from . import tools as T self.traj = T.load( T.testRoot('lig_pcr_00/traj.dat') ) rt, rmsdLst = match( self.traj.ref.xyz, self.traj[-1].xyz) if self.local: print('RMSD: %.2f' % rmsdLst[0][1]) # return rotation matrix r = abs( N0.sum( N0.ravel( rt[0] ))) e = abs( N0.sum( N0.ravel( self.EXPECT ))) self.assertAlmostEqual(r, e, 6)
def test_molUtils( self ): """molUtils test""" from biskit import PDBModel S = self ## load a structure S.m = PDBModel( t.testRoot('lig/1A19.pdb' )) S.model_1 = S.m.compress( S.m.maskProtein() ) ## now sort in standard order S.model_2 = sortAtomsOfModel( S.model_1) ## compare the atom order cmp = [] for a in S.model_1.atomRange(): cmp += [ cmpAtoms( S.model_1.atoms[a], S.model_2.atoms[a] )] self.assertEqual( N0.sum(cmp), 159 ) ## get the primaty sequence as a string S.seq = S.model_1.sequence() ## convert it to a list of three letter code S.seq=single2longAA(S.seq) ## convert it to a list in one letter code S.seq=singleAA(S.seq) self.assertEqual( ''.join(S.seq), S.model_1.sequence() )
def test_EnsembleTraj( self ): """EnsembleTraj.fit/fitMembers/plotMembers test """ ## The second part of the test will fail with the slimmed ## down test trajectory of T.testRoot(). To run the full ## test pease select a larger trajectory. self.tr = traj2ensemble( self.tr ) mask = self.tr.memberMask( 1 ) self.tr.fit( ref=self.tr.ref, mask=self.tr.ref.maskCA(), prof='rms_CA_ref', verbose=self.local ) self.tr.fitMembers( mask=self.tr.ref.maskCA(), prof='rms_CA_0', refIndex=0, verbose=self.local ) self.tr.fitMembers( mask=self.tr.ref.maskCA(), prof='rms_CA_av', verbose=self.local ) self.p = self.tr.plotMemberProfiles( 'rms_CA_av', 'rms_CA_0', 'rms_CA_ref', xlabel='frame' ) if self.local or self.VERBOSITY > 2: self.p.show() self.assertAlmostEqual( 26.19851, N0.sum( self.tr.profile('rms_CA_av') ), 2 )
def tripples( self, lst, n ): """ Group items of lst into n tripples with minimal overlap. """ all = [] l = len( lst ) ## get all possible tripples for i in range( l ): for j in range( i+1, l ): for k in range( j+1, l ): all += [ ( lst[i], lst[j], lst[k] ) ] ## calculate pairwise "distance" between tripples pw = N0.zeros( (len(all), len(all)), N0.Float32 ) for i in range( len( all ) ): for j in range( i, len(all) ): pw[i,j] = pw[j,i] = len( MU.intersection(all[i],all[j]) )**2 pos = 0 r = [] while len( r ) < n: r += [ pos ] ## overlap of selected tripples with all others overlap = N0.sum( N0.array( [ pw[ i ] for i in r ] ) ) ## select one with lowest overlap to all tripples selected before pos = N0.argmin( overlap ) return N0.take( all, r )
def centerSurfDist(model, surf_mask, mask=None): """ Calculate the longest and shortest distance from the center of the molecule to the surface. @param mask: atoms not to be considerd (default: None) @type mask: [1|0] @param surf_mask: atom surface mask, needed for minimum surface distance @type surf_mask: [1|0] @return: max distance, min distance @rtype: float, float """ if mask is None: mask = model.maskHeavy() ## calculate center of mass center = model.centerOfMass() ## surface atom coordinates surf_xyz = N0.compress(mask * surf_mask, model.getXyz(), 0) ## find the atom closest and furthest away from center dist = N0.sqrt(N0.sum((surf_xyz - center)**2, 1)) minDist = min(dist) maxDist = max(dist) return maxDist, minDist
def projectOnSphere( xyz, radius=None, center=None ): """ Project the coordinates xyz on a sphere with a given radius around a given center. :param xyz: cartesian coordinates :type xyz: array N x 3 of float :param radius: radius of target sphere, if not provided the maximal distance to center will be used (default: None) :type radius: float :param center: center of the sphere, if not given the average of xyz will be assigned to the center (default: None) :type center: array 0 x 3 of float :return: array of cartesian coordinates (x, y, z) :rtype: array """ if center is None: center = N0.average( xyz ) if radius is None: radius = max( N0.sqrt( N0.sum( N0.power( xyz - center, 2 ), 1 ) ) ) rtp = cartesianToPolar( xyz - center ) rtp[ :, 0 ] = radius return polarToCartesian( rtp ) + center
def arrayEqual(a, b): """ Compare 2 arrays or lists of numbers for equality. :param a: first array (multi-dimensional is supported) :type a: array / list :param b: second array (multi-dimensional is supported) :type b: array / list :return: 1 if array/list a equals array/list b :rtype: 1|0 """ if a is None or b is None: return a is b if len(a) != len(b): return 0 if type(a) is list: a = N0.array(a) if type(b) is list: b = N0.array(b) a = N0.ravel(a) b = N0.ravel(b) return N0.sum(a == b) == len(a)
def test_rmsFit(self): """rmsFit test""" from . import tools as T self.traj = T.load(T.testRoot('lig_pcr_00/traj.dat')) rt, rmsdLst = match(self.traj.ref.xyz, self.traj[-1].xyz) if self.local: print('RMSD: %.2f' % rmsdLst[0][1]) # return rotation matrix r = abs(N0.sum(N0.ravel(rt[0]))) e = abs(N0.sum(N0.ravel(self.EXPECT))) self.assertAlmostEqual(r, e, 6)
def arrayEqual( a, b ): """ Compare 2 arrays or lists of numbers for equality. :param a: first array (multi-dimensional is supported) :type a: array / list :param b: second array (multi-dimensional is supported) :type b: array / list :return: 1 if array/list a equals array/list b :rtype: 1|0 """ if a is None or b is None: return a is b if len(a) != len(b): return 0 if type(a) is list: a = N0.array( a ) if type(b) is list: b = N0.array( b ) a = N0.ravel( a ) b = N0.ravel( b ) return N0.sum( a==b ) == len(a)
def randomMask( nOnes, length ): """ Create random array of given lenght and number of ones. :param nOnes: number of ones :type nOnes: int :param length: lenght of array :type length: int :return: array with ones and zeros :rtype: array( 1|0 ) """ r = N0.zeros( length ) pos = [] ## add random ones for i in range( nOnes ): pos += [ int( random.random() * length ) ] N0.put( r, pos, 1 ) ## if two ones ended up on the same position while nOnes != N0.sum(r): pos = int( random.random() * length ) N0.put( r, pos, 1 ) return r
def pairwiseRmsd( self, aMask=None, noFit=0 ): """ Calculate rmsd between each 2 coordinate frames. :param aMask: atom mask :type aMask: [1|0] :return: frames x frames array of float :rtype: array """ frames = self.frames if aMask is not None: frames = N0.compress( aMask, frames, 1 ) result = N0.zeros( (len( frames ), len( frames )), N0.Float32 ) for i in range(0, len( frames ) ): for j in range( i+1, len( frames ) ): if noFit: d = N0.sqrt(N0.sum(N0.power(frames[i]-frames[j], 2), 1)) result[i,j] = result[j,i] = N0.sqrt( N0.average(d**2) ) else: rt, rmsdLst = rmsFit.match( frames[i], frames[j], 1 ) result[i,j] = result[j,i] = rmsdLst[0][1] return result
def tripples(self, lst, n): """ Group items of lst into n tripples with minimal overlap. """ all = [] l = len(lst) ## get all possible tripples for i in range(l): for j in range(i + 1, l): for k in range(j + 1, l): all += [(lst[i], lst[j], lst[k])] ## calculate pairwise "distance" between tripples pw = N0.zeros((len(all), len(all)), N0.Float32) for i in range(len(all)): for j in range(i, len(all)): pw[i, j] = pw[j, i] = len(MU.intersection(all[i], all[j]))**2 pos = 0 r = [] while len(r) < n: r += [pos] ## overlap of selected tripples with all others overlap = N0.sum(N0.array([pw[i] for i in r])) ## select one with lowest overlap to all tripples selected before pos = N0.argmin(overlap) return N0.take(all, r)
def test_ComplexTraj(self): """Dock.ComplexTraj test""" import biskit.tools as T ## there is no complex trajectory in the test folder so will have ## to create a fake trajectory with a complex f = [ T.testRoot()+ '/com/1BGS.pdb' ] * 5 t = Trajectory( f, verbose=self.local ) t = ComplexTraj( t, recChains=[0] ) #if self.local: #print 'plotting contact density...' #t.plotContactDensity( step=2 ) ## create a fake second chain in the ligand for i in range( 1093+98, 1968 ): t.ref.atoms['chain_id'][i] = 'B' t.ref.chainIndex( force=1, cache=1 ) t.cl = [1,2] r = N0.concatenate((list(range(1093,1191)), list(range(0,1093)), list(range(1191,1968)))) tt = t.takeAtoms( r ) contactMat = tt.atomContacts( 1 ) if self.local: print('Receptor chains: %s Ligand chains: %s'%(t.cr, t.cl)) self.assertEqual( N0.sum(N0.ravel(contactMat)), 308 )
def test_ComplexTraj(self): """Dock.ComplexTraj test""" import biskit.tools as T ## there is no complex trajectory in the test folder so will have ## to create a fake trajectory with a complex f = [T.testRoot() + '/com/1BGS.pdb'] * 5 t = Trajectory(f, verbose=self.local) t = ComplexTraj(t, recChains=[0]) #if self.local: #print 'plotting contact density...' #t.plotContactDensity( step=2 ) ## create a fake second chain in the ligand for i in range(1093 + 98, 1968): t.ref.atoms['chain_id'][i] = 'B' t.ref.chainIndex(force=1, cache=1) t.cl = [1, 2] r = N0.concatenate( (list(range(1093, 1191)), list(range(0, 1093)), list(range(1191, 1968)))) tt = t.takeAtoms(r) contactMat = tt.atomContacts(1) if self.local: print('Receptor chains: %s Ligand chains: %s' % (t.cr, t.cl)) self.assertEqual(N0.sum(N0.ravel(contactMat)), 308)
def projectOnSphere(xyz, radius=None, center=None): """ Project the coordinates xyz on a sphere with a given radius around a given center. :param xyz: cartesian coordinates :type xyz: array N x 3 of float :param radius: radius of target sphere, if not provided the maximal distance to center will be used (default: None) :type radius: float :param center: center of the sphere, if not given the average of xyz will be assigned to the center (default: None) :type center: array 0 x 3 of float :return: array of cartesian coordinates (x, y, z) :rtype: array """ if center is None: center = N0.average(xyz) if radius is None: radius = max(N0.sqrt(N0.sum(N0.power(xyz - center, 2), 1))) rtp = cartesianToPolar(xyz - center) rtp[:, 0] = radius return polarToCartesian(rtp) + center
def prepare( self ): """ Overrides Executor method. """ self.model = self.model.compress( self.model.maskHeavy() ) if self.model.lenAtoms() == N0.sum(self.model.maskCA): raise Dssp_Error('The structure you want to calculate the secondary structure for seems to be a carbon alpha trace. Terminating') self.model.writePdb( self.f_pdb )
def fractionNativeContacts(self, ref, cutoff=None): """ Fraction of native B{residue-residue} contacts. @param ref: native complex @type ref: Complex @param cutoff: maximal atom-atom distance, None .. previous setting @type cutoff: float @return: fraction of native contacts @rtype: float """ cont = self.resContacts(cutoff, refComplex=ref) ref_cont = ref.resContacts(cutoff) result = N0.sum(N0.sum(ref_cont * cont)) * 1.0 return result / N0.sum(N0.sum(ref_cont))
def contactsShared(self, reference, cutoff=None): """ Number of equal B{residue-residue} contacts in this and reference complex. @param reference: reference complex @type reference: Complex @param cutoff: cutoff for atom-atom contact to be counted @type cutoff: float @return: the number or residue-residue contacts that are common to both this and reference:: abs( N0.sum( N0.sum( contactMatrix_a - contactMatrix_b ))) @rtype: int """ equality = N0.logical_and(self.resContacts( cutoff=cutoff ), reference.resContacts( cutoff=cutoff ) ) return abs(N0.sum(N0.sum( equality )))
def contactsShared(self, reference, cutoff=None): """ Number of equal B{residue-residue} contacts in this and reference complex. @param reference: reference complex @type reference: Complex @param cutoff: cutoff for atom-atom contact to be counted @type cutoff: float @return: the number or residue-residue contacts that are common to both this and reference:: abs( N0.sum( N0.sum( contactMatrix_a - contactMatrix_b ))) @rtype: int """ equality = N0.logical_and(self.resContacts(cutoff=cutoff), reference.resContacts(cutoff=cutoff)) return abs(N0.sum(N0.sum(equality)))
def fractionNativeContacts(self, ref, cutoff=None ): """ Fraction of native B{residue-residue} contacts. @param ref: native complex @type ref: Complex @param cutoff: maximal atom-atom distance, None .. previous setting @type cutoff: float @return: fraction of native contacts @rtype: float """ cont = self.resContacts( cutoff, refComplex=ref ) ref_cont = ref.resContacts( cutoff ) result = N0.sum(N0.sum( ref_cont * cont ))*1.0 return result / N0.sum( N0.sum( ref_cont ))
def area(curve, start=0.0, stop=1.0): """ Numerically add up the area under the given curve. The curve is a 2-D array or list of tupples. The x-axis is the first column of this array (curve[:,0]). (originally taken from biskit.Statistics.ROCalyzer) :param curve: a list of x,y coordinates :type curve: [ (y,x), ] or N0.array :param start: lower boundary (in x) (default: 0.0) :type start: float :param stop: upper boundary (in x) (default: 1.0) :type stop: float :return: the area underneath the curve between start and stop. :rtype: float """ ## convert and swap axes curve = N0.array(curve) c = N0.zeros(N0.shape(curve), curve.dtype) c[:, 0] = curve[:, 1] c[:, 1] = curve[:, 0] assert len(N0.shape(c)) == 2 ## apply boundaries ## here we have a problem with flat curves mask = N0.greater_equal(c[:, 1], start) mask *= N0.less_equal(c[:, 1], stop) c = N0.compress(mask, c, axis=0) ## fill to boundaries -- not absolutely accurate: we actually should ## interpolate to the neighboring points instead c = N0.concatenate((N0.array([ [c[0, 0], start], ]), c, N0.array([ [c[-1, 0], stop], ]))) x = c[:, 1] y = c[:, 0] dx = x[1:] - x[:-1] # distance on x between points dy = y[1:] - y[:-1] # distance on y between points areas1 = y[:-1] * dx # the rectangles between all points areas2 = dx * dy / 2.0 # the triangles between all points return N0.sum(areas1) + N0.sum(areas2)
def wMean(x, w=None): """ Weighted mean: Mean of data (x) weighted by (w). :param x: X-D array with numbers :type x: array :param w: 1-D array of same length as x with weight factors :type w: array :return: array('f') or float :rtype: array('f') or float """ if w is None: wx = x else: wx = [x[i] * 1. * w[i] for i in range(len(x))] return N0.sum(wx) / N0.sum(w)
def wMean(x, w=None): """ Weighted mean: Mean of data (x) weighted by (w). :param x: X-D array with numbers :type x: array :param w: 1-D array of same length as x with weight factors :type w: array :return: array('f') or float :rtype: array('f') or float """ if w is None: wx = x else: wx = [ x[i] * 1. * w[i] for i in range( len(x) ) ] return N0.sum(wx)/N0.sum(w)
def test_mathUtils(self): """mathUtils.polar/euler test""" ## Calculating something .. self.d = N0.array([[20., 30., 40.], [23., 31., 50.]]) self.a = polarToCartesian(cartesianToPolar(self.d)) self.t = eulerRotation(self.a[0][0], self.a[0][1], self.a[0][2]) self.assertAlmostEqual(N0.sum(SD(self.a)), self.EXPECT)
def area(curve, start=0.0, stop=1.0 ): """ Numerically add up the area under the given curve. The curve is a 2-D array or list of tupples. The x-axis is the first column of this array (curve[:,0]). (originally taken from biskit.Statistics.ROCalyzer) :param curve: a list of x,y coordinates :type curve: [ (y,x), ] or N0.array :param start: lower boundary (in x) (default: 0.0) :type start: float :param stop: upper boundary (in x) (default: 1.0) :type stop: float :return: the area underneath the curve between start and stop. :rtype: float """ ## convert and swap axes curve = N0.array( curve ) c = N0.zeros( N0.shape(curve), curve.dtype ) c[:,0] = curve[:,1] c[:,1] = curve[:,0] assert len( N0.shape( c ) ) == 2 ## apply boundaries ## here we have a problem with flat curves mask = N0.greater_equal( c[:,1], start ) mask *= N0.less_equal( c[:,1], stop ) c = N0.compress( mask, c, axis=0 ) ## fill to boundaries -- not absolutely accurate: we actually should ## interpolate to the neighboring points instead c = N0.concatenate((N0.array([[c[0,0], start],]), c, N0.array([[c[-1,0],stop ],])) ) x = c[:,1] y = c[:,0] dx = x[1:] - x[:-1] # distance on x between points dy = y[1:] - y[:-1] # distance on y between points areas1 = y[:-1] * dx # the rectangles between all points areas2 = dx * dy / 2.0 # the triangles between all points return N0.sum(areas1) + N0.sum(areas2)
def test_mathUtils(self): """mathUtils.polar/euler test""" ## Calculating something .. self.d = N0.array([[20.,30.,40.],[23., 31., 50.]]) self.a = polarToCartesian( cartesianToPolar( self.d ) ) self.t = eulerRotation( self.a[0][0], self.a[0][1], self.a[0][2] ) self.assertAlmostEqual( N0.sum( SD(self.a) ), self.EXPECT )
def rmsInterface( self, ref, cutoff=4.5, fit=1 ): """ Rmsd between this and reference interface. The interface is defined as any residue that has an atom which is within the distance given by |cutoff| from its partner. @param ref: reference complex @type ref: Complex @param cutoff: atom distance cutoff for interface residue definition (default: 4.5) @type cutoff: float @param fit: least-squares fit before calculating the rms (default: 1) @type fit: 1|0 @return: interface rmad @rtype: float """ ## casting this = self if not ref.rec_model.equals( self.rec_model )[1] \ or not ref.lig_model.equals( self.lig_model )[1]: m_rec, m_rec_ref, m_lig, m_lig_ref = self.equalAtoms( ref ) this = self.compress( m_rec, m_lig ) ref = ref.compress( m_rec_ref, m_lig_ref ) ## determine interface contacts = ref.resContacts( cutoff ) if_rec = ref.rec_model.res2atomMask( N0.sum( contacts, 1 ) ) if_lig = ref.lig_model.res2atomMask( N0.sum( contacts, 0 ) ) mask_interface = N0.concatenate( (if_rec, if_lig) ) mask_heavy = N0.concatenate( (ref.rec().maskHeavy(), ref.lig_model.maskHeavy()) ) mask_interface = mask_interface * mask_heavy ## rms ref_model = ref.model() this_model= this.model() return ref_model.rms( this_model, mask_interface, fit=fit)
def rmsInterface(self, ref, cutoff=4.5, fit=1): """ Rmsd between this and reference interface. The interface is defined as any residue that has an atom which is within the distance given by |cutoff| from its partner. @param ref: reference complex @type ref: Complex @param cutoff: atom distance cutoff for interface residue definition (default: 4.5) @type cutoff: float @param fit: least-squares fit before calculating the rms (default: 1) @type fit: 1|0 @return: interface rmad @rtype: float """ ## casting this = self if not ref.rec_model.equals( self.rec_model )[1] \ or not ref.lig_model.equals( self.lig_model )[1]: m_rec, m_rec_ref, m_lig, m_lig_ref = self.equalAtoms(ref) this = self.compress(m_rec, m_lig) ref = ref.compress(m_rec_ref, m_lig_ref) ## determine interface contacts = ref.resContacts(cutoff) if_rec = ref.rec_model.res2atomMask(N0.sum(contacts, 1)) if_lig = ref.lig_model.res2atomMask(N0.sum(contacts, 0)) mask_interface = N0.concatenate((if_rec, if_lig)) mask_heavy = N0.concatenate( (ref.rec().maskHeavy(), ref.lig_model.maskHeavy())) mask_interface = mask_interface * mask_heavy ## rms ref_model = ref.model() this_model = this.model() return ref_model.rms(this_model, mask_interface, fit=fit)
def test_PDBParseModel(self): """PDBParseModel test""" ## loading output file from X-plor if self.local: print('Loading pdb file ..') self.p = PDBParseModel() self.m = self.p.parse2new(B.PDBModel(T.testRoot() + '/rec/1A2P.pdb')) self.assertAlmostEqual(N0.sum(self.m.centerOfMass()), 113.682601929, 2)
def contactsOverlap(self, ref, cutoff=None): """ Fraction of overlapping B{residue-residue} contacts between this and reference complex. @param ref: reference complex @type ref: Complex @param cutoff: maximal atom-atom distance, None .. previous setting @type cutoff: float @return: fraction of contacts shared between this and ref (normalized to number of all contacts) @rtype: float """ equal = N0.logical_and(self.resContacts( cutoff=cutoff ), ref.resContacts( cutoff=cutoff ) ) total = N0.logical_or( self.resContacts(cutoff), ref.resContacts(cutoff) ) return N0.sum(N0.sum( equal )) * 1.0 / N0.sum(N0.sum( total ))
def contactsOverlap(self, ref, cutoff=None): """ Fraction of overlapping B{residue-residue} contacts between this and reference complex. @param ref: reference complex @type ref: Complex @param cutoff: maximal atom-atom distance, None .. previous setting @type cutoff: float @return: fraction of contacts shared between this and ref (normalized to number of all contacts) @rtype: float """ equal = N0.logical_and(self.resContacts(cutoff=cutoff), ref.resContacts(cutoff=cutoff)) total = N0.logical_or(self.resContacts(cutoff), ref.resContacts(cutoff)) return N0.sum(N0.sum(equal)) * 1.0 / N0.sum(N0.sum(total))
def test_PDBParseModel( self ): """PDBParseModel test""" ## loading output file from X-plor if self.local: print('Loading pdb file ..') self.p = PDBParseModel() self.m = self.p.parse2new( B.PDBModel(T.testRoot()+'/rec/1A2P.pdb') ) self.assertAlmostEqual( N0.sum( self.m.centerOfMass() ), 113.682601929, 2 )
def accumulate(a): """ cumulative sum of C{ a[0], a[0]+a[1], a[0]+a[1]+[a2], ... } normalized by C{ N0.sum( a ) } :param a: array('f') or float :type a: array :return: float :rtype: float """ return N0.add.accumulate(a) / N0.sum(a)
def accumulate( a ): """ cumulative sum of C{ a[0], a[0]+a[1], a[0]+a[1]+[a2], ... } normalized by C{ N0.sum( a ) } :param a: array('f') or float :type a: array :return: float :rtype: float """ return N0.add.accumulate( a ) / N0.sum( a )
def test_PDBParsePickle(self): """PDBParsePickle test""" import biskit.core.oldnumeric as N0 ## loading output file from X-plor if self.local: print('Loading pickled model ..') self.p = PDBParsePickle() self.m = self.p.parse2new(T.testRoot('rec/1A2P_dry.model')) self.assertAlmostEqual(N0.sum(self.m.centerOfMass()), 114.18037, 5)
def outliers(a, z=5, it=5): """ Iterative detection of outliers in a set of numeric values. Requirement: len(a) > 0; outlier detection is only performed if len(a)>2 :param a: array or list of values :type a: [ float ] :param z: z-score threshold for iterative refinement of median and SD :type z: float :param it: maximum number of iterations :type it: int :return: outlier mask, median and standard deviation of last iteration :rtype: N0.array( int ), float, float """ assert (len(a) > 0) mask = N0.ones(len(a)) out = N0.zeros(len(a)) if len(a) < 3: return out, N0.median(a), N0.std(a) for i in range(it): b = N0.compress(N0.logical_not(out), a) me = N0.median(b) sd = N0.std(b) bz = N0.absolute( (N0.array(a) - me) / sd) # pseudo z-score of each value o = bz > z ## print 'iteration %i: <%5.2f> +- %5.2f -- %i outliers' % (i,me,sd,N0.sum(o)) ## stop if converged or reached bottom if (N0.sum(o) == N0.sum(out)) or (N0.sum(o) > len(a) - 3): return o, me, sd out = o return out, me, sd
def reduceXyz( self, xyz, axis=0 ): """ Reduce the number of atoms in the given coordinate set. The set must have the same length and order as the reference model. It may have an additional (time) dimension as first axis. @param xyz: coordinates (N_atoms x 3) or (N_frames x N_atoms x 3) @type xyz: array @param axis: axis with atoms (default: 0) @type axis: int @return: coordinate array (N_less_atoms x 3) or (N_frames x N_less_atoms x 3) @rtype: array """ masses = self.m.atoms.get('mass') r_xyz = None for atom_indices in self.groups: x = N0.take( xyz, atom_indices, axis ) m = N0.take( masses, atom_indices ) center = N0.sum( x * N0.transpose([m,]), axis=axis) / N0.sum( m ) if axis == 0: center = center[N0.NewAxis, :] if axis == 1: center = center[:, N0.NewAxis, :] if r_xyz is None: r_xyz = center else: r_xyz = N0.concatenate( (r_xyz, center), axis ) return r_xyz
def outliers( a, z=5, it=5 ): """ Iterative detection of outliers in a set of numeric values. Requirement: len(a) > 0; outlier detection is only performed if len(a)>2 :param a: array or list of values :type a: [ float ] :param z: z-score threshold for iterative refinement of median and SD :type z: float :param it: maximum number of iterations :type it: int :return: outlier mask, median and standard deviation of last iteration :rtype: N0.array( int ), float, float """ assert( len(a) > 0 ) mask = N0.ones( len(a) ) out = N0.zeros( len(a) ) if len(a) < 3: return out, N0.median(a), N0.std(a) for i in range( it ): b = N0.compress( N0.logical_not(out), a ) me = N0.median( b ) sd = N0.std( b ) bz = N0.absolute((N0.array( a ) - me) / sd) # pseudo z-score of each value o = bz > z ## print 'iteration %i: <%5.2f> +- %5.2f -- %i outliers' % (i,me,sd,N0.sum(o)) ## stop if converged or reached bottom if (N0.sum(o) == N0.sum(out)) or (N0.sum(o) > len(a) - 3): return o, me, sd out = o return out, me, sd
def test_PDBParsePickle( self ): """PDBParsePickle test""" import biskit.core.oldnumeric as N0 ## loading output file from X-plor if self.local: print('Loading pickled model ..') self.p = PDBParsePickle() self.m = self.p.parse2new( T.testRoot('rec/1A2P_dry.model')) self.assertAlmostEqual( N0.sum( self.m.centerOfMass() ), 114.18037, 5)