def contactResDistribution(self, cm=None): """ Count occurrence of residues in protein-protein interface. @param cm: pre-calculated contact matrix (default: None) @type cm: matrix @return: dict {'A':3, 'C':1, .. } (20 standard amino acids) @rtype: dict """ if cm is None: cm = self.resContacts() ## get mask for residues involved in contacts maskLig = N0.sum(cm) maskRec = N0.sum(N0.transpose(cm)) ## get sequence of contact residues only seqLig = N0.compress(maskLig, self.lig().sequence()) seqRec = N0.compress(maskRec, self.rec().sequence()) seq = ''.join(seqLig) + ''.join(seqRec) ## convert back to string ## count occurrence of letters result = {} for aa in molUtils.allAA(): result[aa] = seq.count(aa) return result
def calcReducedContacts(self, soln, c): """ Get contact matrices and/or fnarc from reduced-atom models. @param soln: solution number @type soln: int @param c: Complex @type c: Complex """ if not (self.reduced_recs and self.reduced_ligs): return if not self.requested(c, 'c_ratom_10', 'fnarc_10'): return try: ## create Complex with same orientation but reduced coordinates red_rec = self.reduced_recs[c.rec_model.source] red_lig = self.reduced_ligs[c.lig_model.source] red_com = Complex(red_rec, red_lig, c.ligandMatrix) contacts = red_com.atomContacts(10.0, cache=1) if self.requested(c, 'c_ratom_10'): c['c_ratom_10'] = MU.packBinaryMatrix(contacts) if self.c_ref_ratom_10 is not None: ref = N0.ravel(self.c_ref_ratom_10) c['fnarc_10'] = N0.sum( N0.ravel(contacts) * ref )\ / float( N0.sum(ref)) except: self.reportError('reduced contacts error', soln)
def test_molTools(self): """molTools test""" from Biskit import PDBModel ## Loading PDB... self.m = PDBModel(T.testRoot() + '/lig/1A19.pdb') self.m = self.m.compress(self.m.maskProtein()) hb = hbonds(self.m) xyz = xyzOfNearestCovalentNeighbour(40, self.m) if self.local: print '\nThe nearest covalently attached atom to the' print ' atom with index 40 has the coordinates:' print xyz print 'Potential h-bonds in model:' print '(donor index, acceptor index, distance and angle)' for h in hb: print h globals().update(locals()) self.r = N0.sum(N0.ravel(hb[3:5])) + N0.sum(xyz) self.assertAlmostEqual(self.r, self.EXPECT, 3)
def test_molTools(self): """molTools test""" from Biskit import PDBModel ## Loading PDB... self.m = PDBModel( T.testRoot() + '/lig/1A19.pdb' ) self.m = self.m.compress( self.m.maskProtein() ) hb = hbonds( self.m ) xyz = xyzOfNearestCovalentNeighbour( 40, self.m ) if self.local: print '\nThe nearest covalently attached atom to the' print ' atom with index 40 has the coordinates:' print xyz print 'Potential h-bonds in model:' print '(donor index, acceptor index, distance and angle)' for h in hb: print h globals().update( locals() ) self.r = N0.sum(N0.ravel(hb[3:5])) + N0.sum(xyz) self.assertAlmostEqual( self.r, self.EXPECT, 3 )
def __findTransformation(self, x, y): """ Match two arrays by rotation and translation. Returns the rotation matrix and the translation vector. Back transformation: for atom i new coordinates will be:: y_new[i] = N0.dot(r, y[i]) + t for all atoms in one step:: y_new = N0.dot(y, N0.transpose(r)) + t @param x: coordinates @type x: array @param y: coordinates @type y: array @return: rotation matrix, translation vector @rtype: array, array @author: Michael Habeck """ from numpy.linalg import svd ## center configurations x_av = N0.sum(x) / len(x) y_av = N0.sum(y) / len(y) x = x - x_av y = y - y_av ## svd of correlation matrix v, l, u = svd(N0.dot(N0.transpose(x), y)) ## build rotation matrix and translation vector r = N0.dot(v, u) t = x_av - N0.dot(r, y_av) return r, t
def calcReducedContacts( self, soln, c ): """ Get contact matrices and/or fnarc from reduced-atom models. @param soln: solution number @type soln: int @param c: Complex @type c: Complex """ if not (self.reduced_recs and self.reduced_ligs): return if not self.requested(c,'c_ratom_10','fnarc_10'): return try: ## create Complex with same orientation but reduced coordinates red_rec = self.reduced_recs[ c.rec_model.source ] red_lig = self.reduced_ligs[ c.lig_model.source ] red_com = Complex( red_rec, red_lig, c.ligandMatrix ) contacts = red_com.atomContacts( 10.0, cache=1 ) if self.requested(c, 'c_ratom_10'): c['c_ratom_10'] = MU.packBinaryMatrix(contacts) if self.c_ref_ratom_10 is not None: ref = N0.ravel( self.c_ref_ratom_10 ) c['fnarc_10'] = N0.sum( N0.ravel(contacts) * ref )\ / float( N0.sum(ref)) except: self.reportError('reduced contacts error', soln)
def __atomContacts(self, cutoff, rec_mask, lig_mask, cache): """ Intermolecular distances below cutoff after applying the two masks. @param cutoff: cutoff for B{atom-atom} contact in \AA @type cutoff: float @param rec_mask: atom mask @type rec_mask: [1|0] @param lig_mask: atom mask @type lig_mask: [1|0] @param cache: cache pairwise atom distance matrix @type cache: 1|0 @return: atom contact matrix, array sum_rec_mask x sum_lig_mask @rtype: array """ ## get atom coordinats as array 3 x all_atoms rec_xyz = self.rec().getXyz() lig_xyz = self.lig().getXyz() ## get pair-wise distances -> atoms_rec x atoms_lig dist = getattr(self, 'pw_dist', None) if dist is None or \ N0.shape( dist ) != ( N0.sum(rec_mask), N0.sum(lig_mask) ): dist = self.__pairwiseDistances(N0.compress(rec_mask, rec_xyz, 0), N0.compress(lig_mask, lig_xyz, 0)) if cache: self.pw_dist = dist ## reduce to 1 (distance < cutoff) or 0 -> n_atoms_rec x n_atoms_lig return N0.less(dist, cutoff)
def randomPatches(self, size, n=None, exclude=None, max_overlap=0, exclude_all=None): """ size - int, number of atoms per patch n - int, number of patches (None -> as many as possible, max 100) exclude - [ 1|0 ], don't touch more than |max_overlap| of these atoms (atom mask) max_overlap - int exclude_all - [ 1|0 ], don't touch ANY of these atoms -> [ [ 1|0 ] ], list of atom masks """ if exclude is None: exclude = N0.zeros(self.model.lenAtoms(), 'i') if exclude_all is None: exclude_all = N0.zeros(self.model.lenAtoms(), 'i') n = n or 500 centers = self.random_translations(n=n, center=self.center) ## start from excluded patch (if given) working outwards origin = centers[0] tabu = exclude_all if not N0.any(tabu): tabu = exclude else: origin = self.model.center(mask=tabu) centers = self.orderCenters(centers, origin) r = [] for i in range(n): m = self.patchAround(centers[i], size) if N0.sum( m * exclude ) <= max_overlap \ and N0.sum( m * exclude_all ) == 0: exclude = exclude + m r += [m] return r
def contactsDiff(self, ref, cutoff=None): """ Number of different B{residue-residue} contacts in this and reference complex. @param ref: to compare this one with @type ref: Complex @param cutoff: maximal atom-atom distance, None .. previous setting @type cutoff: float @return: number of contacts different in this and refererence complex. @rtype: int """ both = N0.logical_or(self.resContacts(cutoff), ref.resContacts(cutoff)) return N0.sum(N0.sum(both)) - self.contactsShared(ref, cutoff)
def get_identities(self, nb_templates, validation_folder=None): """ Calculate the mean of the percentage of identities for each template with the others. @param nb_templates: number of templates used in the cross-validation @type nb_templates: int @param validation_folder: folder vith validation data (defult: None S{->} outFolder/L{F_TEMPLATE_FOLDER}) @type validation_folder: str @return: dictionary with mean percent identities for each template @rtype: {str:float} """ validation_folder = validation_folder or self.outFolder + \ self.F_TEMPLATE_FOLDER folders = self.__listDir(validation_folder) identities = {} for folder in folders: file = "%s/%s"%(validation_folder, folder + \ CI.F_OUTPUT_IDENTITIES_COV) lst = self.parseFile(file) ## identity to mean template identities[folder] = N0.sum(lst[0][1:]) / nb_templates return identities
def test_ComplexTraj(self): """Dock.ComplexTraj test""" import Biskit.tools as T ## there is no complex trajectory in the test folder so will have ## to create a fake trajectory with a complex f = [T.testRoot() + '/com/1BGS.pdb'] * 5 t = Trajectory(f, verbose=self.local) t = ComplexTraj(t, recChains=[0]) #if self.local: #print 'plotting contact density...' #t.plotContactDensity( step=2 ) ## create a fake second chain in the ligand for i in range(1093 + 98, 1968): t.ref.atoms['chain_id'][i] = 'B' t.ref.chainIndex(force=1, cache=1) t.cl = [1, 2] r = N0.concatenate((range(1093, 1191), range(0, 1093), range(1191, 1968))) tt = t.takeAtoms(r) contactMat = tt.atomContacts(1) if self.local: print 'Receptor chains: %s Ligand chains: %s' % (t.cr, t.cl) self.assertEqual(N0.sum(N0.ravel(contactMat)), 308)
def mergeProfiles(self, p0, p1, maxOverlap=3): """ Merge profile p0 with profile p1, as long as they overlap in at most maxOverlap positions @param p0: profile @type p0: [float] @param p1: profile @type p1: [float] @param maxOverlap: maximal allowed overlap between profiles @type maxOverlap: int @return: array @rtype: """ p0 = self.__list2array(p0) p1 = self.__list2array(p1) overlap = N0.greater(N0.greater(p0, 0) + N0.greater(p1, 0), 1) if N0.sum(overlap) <= maxOverlap: ## one of the two profiles will in most cases not belong to these ## positions. We can't decide which one is wrong, let's eliminate ## both values. Alternatively we could keep one, or the average, .. N0.put(p1, N0.nonzero(overlap), 0) N0.put(p0, N0.nonzero(overlap), 0) p0 = p0 + p1 return p0
def mergeProfiles( self, p0, p1, maxOverlap=3 ): """ Merge profile p0 with profile p1, as long as they overlap in at most maxOverlap positions @param p0: profile @type p0: [float] @param p1: profile @type p1: [float] @param maxOverlap: maximal allowed overlap between profiles @type maxOverlap: int @return: array @rtype: """ p0 = self.__list2array( p0 ) p1 = self.__list2array( p1 ) overlap = N0.greater( N0.greater(p0,0) + N0.greater(p1,0), 1 ) if N0.sum( overlap ) <= maxOverlap: ## one of the two profiles will in most cases not belong to these ## positions. We can't decide which one is wrong, let's eliminate ## both values. Alternatively we could keep one, or the average, .. N0.put( p1, N0.nonzero( overlap ), 0 ) N0.put( p0, N0.nonzero( overlap ), 0 ) p0 = p0 + p1 return p0
def test_EnsembleTraj(self): """EnsembleTraj.fit/fitMembers/plotMembers test """ ## The second part of the test will fail with the slimmed ## down test trajectory of T.testRoot(). To run the full ## test pease select a larger trajectory. self.tr = traj2ensemble(self.tr) mask = self.tr.memberMask(1) self.tr.fit(ref=self.tr.ref, mask=self.tr.ref.maskCA(), prof='rms_CA_ref', verbose=self.local) self.tr.fitMembers(mask=self.tr.ref.maskCA(), prof='rms_CA_0', refIndex=0, verbose=self.local) self.tr.fitMembers(mask=self.tr.ref.maskCA(), prof='rms_CA_av', verbose=self.local) self.p = self.tr.plotMemberProfiles('rms_CA_av', 'rms_CA_0', 'rms_CA_ref', xlabel='frame') if self.local or self.VERBOSITY > 2: self.p.show() self.assertAlmostEqual(26.19851, N0.sum(self.tr.profile('rms_CA_av')), 2)
def __exposedResidues( self, ASA_values, sidechainCut=0.0, backboneCut=0.0, totalCut=0.0 ): """ Decide what is a surface exposed residue and what is not. sidechainCut, backboneCut, totalCut - float, cutoff value for what will be considered as a exposed residue. All three values have to pass the test. @param ASA_values: array with ASA values for side chains, backbone and total calculated in L{__read_residueASA}. @type ASA_values: array @param sidechainCut: cutoff ASA value for considering the side chain to consider thew residue being exposed (default: 0.0) @type sidechainCut: float @param backboneCut: cutoffvalue for back bone ASA @type backboneCut: float @param totalCut: cutoff for total ASA @type totalCut: float @return: residue mask, where 0 = burried @rtype: [1|0] """ col_0 = N0.greater( N0.transpose(ASA_values)[0], totalCut ) col_1 = N0.greater( N0.transpose(ASA_values)[1], backboneCut ) col_2 = N0.greater( N0.transpose(ASA_values)[2], sidechainCut ) col_012 = N0.concatenate( ([col_0],[col_1],[col_2]) ) exposedList = N0.greater(N0.sum(col_012), 0) return exposedList
def centerSurfDist(model, surf_mask, mask=None): """ Calculate the longest and shortest distance from the center of the molecule to the surface. @param mask: atoms not to be considerd (default: None) @type mask: [1|0] @param surf_mask: atom surface mask, needed for minimum surface distance @type surf_mask: [1|0] @return: max distance, min distance @rtype: float, float """ if mask is None: mask = model.maskHeavy() ## calculate center of mass center = model.centerOfMass() ## surface atom coordinates surf_xyz = N0.compress(mask * surf_mask, model.getXyz(), 0) ## find the atom closest and furthest away from center dist = N0.sqrt(N0.sum((surf_xyz - center)**2, 1)) minDist = min(dist) maxDist = max(dist) return maxDist, minDist
def pairwiseRmsd( self, aMask=None, noFit=0 ): """ Calculate rmsd between each 2 coordinate frames. @param aMask: atom mask @type aMask: [1|0] @return: frames x frames array of float @rtype: array """ frames = self.frames if aMask is not None: frames = N0.compress( aMask, frames, 1 ) result = N0.zeros( (len( frames ), len( frames )), N0.Float32 ) for i in range(0, len( frames ) ): for j in range( i+1, len( frames ) ): if noFit: d = N0.sqrt(N0.sum(N0.power(frames[i]-frames[j], 2), 1)) result[i,j] = result[j,i] = N0.sqrt( N0.average(d**2) ) else: rt, rmsdLst = rmsFit.match( frames[i], frames[j], 1 ) result[i,j] = result[j,i] = rmsdLst[0][1] return result
def test_molUtils(self): """molUtils test""" from Biskit import PDBModel S = self ## load a structure S.m = PDBModel(t.testRoot() + '/lig/1A19.pdb') S.model_1 = S.m.compress(S.m.maskProtein()) ## now sort in standard order S.model_2 = sortAtomsOfModel(S.model_1) ## compare the atom order cmp = [] for a in S.model_1.atomRange(): cmp += [cmpAtoms(S.model_1.atoms[a], S.model_2.atoms[a])] self.assertEqual(N0.sum(cmp), 159) ## get the primaty sequence as a string S.seq = S.model_1.sequence() ## convert it to a list of three letter code S.seq = single2longAA(S.seq) ## convert it to a list in one letter code S.seq = singleAA(S.seq) self.assertEqual(''.join(S.seq), S.model_1.sequence())
def get_identities(self, nb_templates, validation_folder = None): """ Calculate the mean of the percentage of identities for each template with the others. @param nb_templates: number of templates used in the cross-validation @type nb_templates: int @param validation_folder: folder vith validation data (defult: None S{->} outFolder/L{F_TEMPLATE_FOLDER}) @type validation_folder: str @return: dictionary with mean percent identities for each template @rtype: {str:float} """ validation_folder = validation_folder or self.outFolder + \ self.F_TEMPLATE_FOLDER folders = self.__listDir(validation_folder) identities = {} for folder in folders: file = "%s/%s"%(validation_folder, folder + \ CI.F_OUTPUT_IDENTITIES_COV) lst = self.parseFile( file ) ## identity to mean template identities[folder] = N0.sum(lst[0][1:])/nb_templates return identities
def __exposedResidues(self, ASA_values, sidechainCut=0.0, backboneCut=0.0, totalCut=0.0): """ Decide what is a surface exposed residue and what is not. sidechainCut, backboneCut, totalCut - float, cutoff value for what will be considered as a exposed residue. All three values have to pass the test. @param ASA_values: array with ASA values for side chains, backbone and total calculated in L{__read_residueASA}. @type ASA_values: array @param sidechainCut: cutoff ASA value for considering the side chain to consider thew residue being exposed (default: 0.0) @type sidechainCut: float @param backboneCut: cutoffvalue for back bone ASA @type backboneCut: float @param totalCut: cutoff for total ASA @type totalCut: float @return: residue mask, where 0 = burried @rtype: [1|0] """ col_0 = N0.greater(N0.transpose(ASA_values)[0], totalCut) col_1 = N0.greater(N0.transpose(ASA_values)[1], backboneCut) col_2 = N0.greater(N0.transpose(ASA_values)[2], sidechainCut) col_012 = N0.concatenate(([col_0], [col_1], [col_2])) exposedList = N0.greater(N0.sum(col_012), 0) return exposedList
def reduceToModel(self, xyz=None, reduce_profiles=1): """ Create a reduced PDBModel from coordinates. Atom profiles the source PDBModel are reduced by averaging over the grouped atoms. @param xyz: coordinte array (N_atoms x 3) or None (->use reference coordinates) @type xyz: array OR None @return: PDBModel with reduced atom set and profile 'mass' @rtype: PDBModel """ mass = self.m.atoms.get('mass') if xyz is None: xyz = self.m.getXyz() mProf = [N0.sum(N0.take(mass, group)) for group in self.groups] xyz = self.reduceXyz(xyz) result = PDBModel() for k in self.atoms.keys(): result.atoms.set(k, self.atoms.valuesOf(k)) ## result.setAtoms( self.atoms ) result.setXyz(xyz) result.atoms.set('mass', mProf) if reduce_profiles: self.reduceAtomProfiles(self.m, result) result.residues = self.m.residues return result
def group(self, a_indices, maxPerCenter): """ Group a bunch of integers (atom indices in PDBModel) so that each group has at most maxPerCenter items. @param a_indices: atom indices @type a_indices: [int] @param maxPerCenter: max entries per group @type maxPerCenter: int @return: list of lists of int @rtype: [[int],[int]..] """ ## how many groups are necessary? n_centers = len(a_indices) / maxPerCenter if len(a_indices) % maxPerCenter: n_centers += 1 ## how many items/atoms go into each group? nAtoms = N0.ones(n_centers, N0.Int) * int(len(a_indices) / n_centers) i = 0 while N0.sum(nAtoms) != len(a_indices): nAtoms[i] += 1 i += 1 ## distribute atom indices into groups result = [] pos = 0 for n in nAtoms: result += [N0.take(a_indices, N0.arange(n) + pos)] pos += n return result
def test_rmsFit(self): """rmsFit test""" import Biskit.tools as T self.traj = T.load(T.testRoot() + '/lig_pcr_00/traj.dat') rt, rmsdLst = match(self.traj.ref.xyz, self.traj[-1].xyz) if self.local: print 'RMSD: %.2f' % rmsdLst[0][1] # return rotation matrix r = abs(N0.sum(N0.ravel(rt[0]))) e = abs(N0.sum(N0.ravel(self.EXPECT))) self.assertAlmostEqual(r, e, 6)
def logConfidence( x, R, clip=0 ): """ Estimate the probability of x NOT beeing a random observation from a lognormal distribution that is described by a set of random values. @param x: observed value @type x: float @param R: sample of random values @type R: [float] @param clip: clip zeros at this value 0->don't clip (default: 0) @type clip: float @return: confidence that x is not random, median of random distr. @rtype: (float, float) """ if clip and 0 in R: R = N0.clip( R, clip, max( R ) ) if clip and x == 0: x = clip ## remove 0 instead of clipping R = N0.compress( R, R ) if x == 0: return 0, 0 ## get mean and stdv of log-transformed random sample alpha = N0.average( N0.log( R ) ) n = len( R ) beta = N0.sqrt(N0.sum(N0.power(N0.log( R ) - alpha, 2)) / (n - 1.)) return logArea( x, alpha, beta ), logMedian( alpha )
def prepare(self): """ Overrides Executor method. """ self.model = self.model.compress(self.model.maskHeavy()) if self.model.lenAtoms() == N0.sum(self.model.maskCA): raise Dssp_Error, 'The structure you want to calculate the secondary structure for seems to be a carbon alpha trace. Terminating' self.model.writePdb(self.f_pdb)
def contactsShared(self, reference, cutoff=None): """ Number of equal B{residue-residue} contacts in this and reference complex. @param reference: reference complex @type reference: Complex @param cutoff: cutoff for atom-atom contact to be counted @type cutoff: float @return: the number or residue-residue contacts that are common to both this and reference:: abs( N0.sum( N0.sum( contactMatrix_a - contactMatrix_b ))) @rtype: int """ equality = N0.logical_and(self.resContacts(cutoff=cutoff), reference.resContacts(cutoff=cutoff)) return abs(N0.sum(N0.sum(equality)))
def fractionNativeContacts(self, ref, cutoff=None): """ Fraction of native B{residue-residue} contacts. @param ref: native complex @type ref: Complex @param cutoff: maximal atom-atom distance, None .. previous setting @type cutoff: float @return: fraction of native contacts @rtype: float """ cont = self.resContacts(cutoff, refComplex=ref) ref_cont = ref.resContacts(cutoff) result = N0.sum(N0.sum(ref_cont * cont)) * 1.0 return result / N0.sum(N0.sum(ref_cont))
def random_contacts( self, contMat, n, maskRec=None, maskLig=None ): """ Create randomized surface contact matrix with same number of contacts and same shape as given contact matrix. @param contMat: template contact matrix @type contMat: matrix @param n: number of matrices to generate @type n: int @param maskRec: surface masks (or something similar) @type maskRec: [1|0] @param maskLig: surface masks (or something similar) @type maskLig: [1|0] @return: list of [n] random contact matricies @rtype: [matrix] """ a,b = N0.shape( contMat ) nContacts = N0.sum( N0.sum( contMat )) if not maskLig: r_size, l_size = N0.shape( contMat ) maskLig = N0.ones( l_size ) maskRec = N0.ones( r_size ) c_mask = N0.ravel( N0.outerproduct( maskRec, maskLig ) ) c_pos = N0.nonzero( c_mask ) # get array with surface positions from complex cont = N0.take( N0.ravel(contMat), c_pos ) length = len( cont ) result = [] for i in range( n ): # create random array ranCont = mathUtils.randomMask( nContacts,length ) # blow up to size of original matrix r = N0.zeros(a*b) N0.put( r, c_pos, ranCont) result += [ N0.reshape( r, (a,b) ) ] return result
def randomPatches( self, size, n=None, exclude=None, max_overlap=0, exclude_all=None ): """ size - int, number of atoms per patch n - int, number of patches (None -> as many as possible, max 100) exclude - [ 1|0 ], don't touch more than |max_overlap| of these atoms (atom mask) max_overlap - int exclude_all - [ 1|0 ], don't touch ANY of these atoms -> [ [ 1|0 ] ], list of atom masks """ if exclude is None: exclude = N0.zeros( self.model.lenAtoms(), 'i' ) if exclude_all is None: exclude_all = N0.zeros( self.model.lenAtoms(), 'i' ) n = n or 500 centers = self.random_translations( n=n, center=self.center ) ## start from excluded patch (if given) working outwards origin = centers[0] tabu = exclude_all if not N0.any( tabu ): tabu = exclude else: origin = self.model.center( mask=tabu ) centers = self.orderCenters( centers, origin ) r = [] for i in range(n): m = self.patchAround( centers[i], size ) if N0.sum( m * exclude ) <= max_overlap \ and N0.sum( m * exclude_all ) == 0: exclude = exclude + m r += [ m ] return r
def __distances(self, point, xyz=None): """ point - 3 x 1 array of float; point of origin xyz - 3 x n array of float; coordinates, if None -- take model atoms -> distances of all atoms to given point """ if xyz is None: xyz = self.model.getXyz() return N0.sqrt(N0.sum(N0.power(xyz - point, 2), 1))
def __distances( self, point, xyz=None ): """ point - 3 x 1 array of float; point of origin xyz - 3 x n array of float; coordinates, if None -- take model atoms -> distances of all atoms to given point """ if xyz is None: xyz = self.model.getXyz() return N0.sqrt( N0.sum( N0.power( xyz - point, 2), 1 ) )
def calc_rmsd(self, fitted_model_if, fitted_model_wo_if, reference, model): """ Takes the two fitted structures (with and without iterative fitting), the known structure (reference), and the associated model inside the pdb_list. Calculates the different RMSD and set the profiles @param fitted_model_if: itteratively fitted model @type fitted_model_if: PDBModel @param fitted_model_wo_if: normaly fitted model @type fitted_model_wo_if: PDBModel @param reference: reference model @type reference: PDBModel @param model: model @type model: PDBModel """ ## first calculate rmsd for heavy atoms and CA without ## removing any residues from the model mask_CA = fitted_model_wo_if.maskCA() rmsd_aa = fitted_model_wo_if.rms( reference, fit=0 ) rmsd_ca = fitted_model_wo_if.rms( reference, mask=mask_CA, fit=1 ) model.info["rmsd2ref_aa_wo_if"] = rmsd_aa model.info["rmsd2ref_ca_wo_if"] = rmsd_ca outliers_mask = N0.logical_not(fitted_model_if.profile("rms_outliers")) ## Now remove the residues that were outliers in the iterative fit ## and calculate the rmsd again fitted_model_if = fitted_model_if.compress( outliers_mask ) reference = reference.compress( outliers_mask ) mask_CA = fitted_model_if.maskCA() rmsd_aa_if = fitted_model_if.rms( reference, fit=0 ) rmsd_ca_if = fitted_model_if.rms( reference, mask=mask_CA, fit=1 ) model.info["rmsd2ref_aa_if"] = rmsd_aa_if model.info["rmsd2ref_ca_if"] = rmsd_ca_if model.info["rmsd2ref_aa_outliers"] = 1.*(len(outliers_mask) \ - N0.sum(outliers_mask)) / len(outliers_mask) model.info["rmsd2ref_ca_outliers"] = 1.*(N0.sum(mask_CA) \ - N0.sum(N0.compress(mask_CA, outliers_mask))) \ / N0.sum(mask_CA)
def calc_rmsd(self, fitted_model_if, fitted_model_wo_if, reference, model): """ Takes the two fitted structures (with and without iterative fitting), the known structure (reference), and the associated model inside the pdb_list. Calculates the different RMSD and set the profiles @param fitted_model_if: itteratively fitted model @type fitted_model_if: PDBModel @param fitted_model_wo_if: normaly fitted model @type fitted_model_wo_if: PDBModel @param reference: reference model @type reference: PDBModel @param model: model @type model: PDBModel """ ## first calculate rmsd for heavy atoms and CA without ## removing any residues from the model mask_CA = fitted_model_wo_if.maskCA() rmsd_aa = fitted_model_wo_if.rms(reference, fit=0) rmsd_ca = fitted_model_wo_if.rms(reference, mask=mask_CA, fit=1) model.info["rmsd2ref_aa_wo_if"] = rmsd_aa model.info["rmsd2ref_ca_wo_if"] = rmsd_ca outliers_mask = N0.logical_not(fitted_model_if.profile("rms_outliers")) ## Now remove the residues that were outliers in the iterative fit ## and calculate the rmsd again fitted_model_if = fitted_model_if.compress(outliers_mask) reference = reference.compress(outliers_mask) mask_CA = fitted_model_if.maskCA() rmsd_aa_if = fitted_model_if.rms(reference, fit=0) rmsd_ca_if = fitted_model_if.rms(reference, mask=mask_CA, fit=1) model.info["rmsd2ref_aa_if"] = rmsd_aa_if model.info["rmsd2ref_ca_if"] = rmsd_ca_if model.info["rmsd2ref_aa_outliers"] = 1.*(len(outliers_mask) \ - N0.sum(outliers_mask)) / len(outliers_mask) model.info["rmsd2ref_ca_outliers"] = 1.*(N0.sum(mask_CA) \ - N0.sum(N0.compress(mask_CA, outliers_mask))) \ / N0.sum(mask_CA)
def rmsInterface(self, ref, cutoff=4.5, fit=1): """ Rmsd between this and reference interface. The interface is defined as any residue that has an atom which is within the distance given by |cutoff| from its partner. @param ref: reference complex @type ref: Complex @param cutoff: atom distance cutoff for interface residue definition (default: 4.5) @type cutoff: float @param fit: least-squares fit before calculating the rms (default: 1) @type fit: 1|0 @return: interface rmad @rtype: float """ ## casting this = self if not ref.rec_model.equals( self.rec_model )[1] \ or not ref.lig_model.equals( self.lig_model )[1]: m_rec, m_rec_ref, m_lig, m_lig_ref = self.equalAtoms(ref) this = self.compress(m_rec, m_lig) ref = ref.compress(m_rec_ref, m_lig_ref) ## determine interface contacts = ref.resContacts(cutoff) if_rec = ref.rec_model.res2atomMask(N0.sum(contacts, 1)) if_lig = ref.lig_model.res2atomMask(N0.sum(contacts, 0)) mask_interface = N0.concatenate((if_rec, if_lig)) mask_heavy = N0.concatenate( (ref.rec().maskHeavy(), ref.lig_model.maskHeavy())) mask_interface = mask_interface * mask_heavy ## rms ref_model = ref.model() this_model = this.model() return ref_model.rms(this_model, mask_interface, fit=fit)
def test_PDBParseModel(self): """PDBParseModel test""" ## loading output file from X-plor if self.local: print 'Loading pdb file ..' self.p = PDBParseModel() self.m = self.p.parse2new(B.PDBModel(T.testRoot() + '/rec/1A2P.pdb')) self.assertAlmostEqual(N0.sum(self.m.centerOfMass()), 113.682601929, 2)
def contactsOverlap(self, ref, cutoff=None): """ Fraction of overlapping B{residue-residue} contacts between this and reference complex. @param ref: reference complex @type ref: Complex @param cutoff: maximal atom-atom distance, None .. previous setting @type cutoff: float @return: fraction of contacts shared between this and ref (normalized to number of all contacts) @rtype: float """ equal = N0.logical_and(self.resContacts(cutoff=cutoff), ref.resContacts(cutoff=cutoff)) total = N0.logical_or(self.resContacts(cutoff), ref.resContacts(cutoff)) return N0.sum(N0.sum(equal)) * 1.0 / N0.sum(N0.sum(total))
def reduceXyz(self, xyz, axis=0): """ Reduce the number of atoms in the given coordinate set. The set must have the same length and order as the reference model. It may have an additional (time) dimension as first axis. @param xyz: coordinates (N_atoms x 3) or (N_frames x N_atoms x 3) @type xyz: array @param axis: axis with atoms (default: 0) @type axis: int @return: coordinate array (N_less_atoms x 3) or (N_frames x N_less_atoms x 3) @rtype: array """ masses = self.m.atoms.get('mass') r_xyz = None for atom_indices in self.groups: x = N0.take(xyz, atom_indices, axis) m = N0.take(masses, atom_indices) center = N0.sum(x * N0.transpose([ m, ]), axis=axis) / N0.sum(m) if axis == 0: center = center[N0.NewAxis, :] if axis == 1: center = center[:, N0.NewAxis, :] if r_xyz is None: r_xyz = center else: r_xyz = N0.concatenate((r_xyz, center), axis) return r_xyz
def test_PDBParsePickle(self): """PDBParsePickle test""" import Biskit.oldnumeric as N0 ## loading output file from X-plor if self.local: print 'Loading pickled model ..' self.p = PDBParsePickle() self.m = self.p.parse2new(T.testRoot() + '/rec/1A2P_dry.model') self.assertAlmostEqual(N0.sum(self.m.centerOfMass()), 114.18037, 5)
def entropy( self, emmProb, nullProb ): """ Calculate the Kullback-Leibler distance between the observed and the background amino acid distribution at a given position. High values mean high conservation. Empty (all 0) emmission probabilities yield score 0. See also:BMC Bioinformatics. 2006; 7: 385 emmProb & nullProb is shape 1,len(alphabet) @param emmProb: emmission probabilities @type emmProb: array @param nullProb: null probabilities @type nullProb: array @return: relative entropy score @rtype: float """ ## avoid log error if N0.sum( emmProb ) == 0.: return 0. return N0.sum( emmProb * N0.log(emmProb/nullProb) )
def calcContacts( self, soln, c ): """ Calculate contact matrices and fraction of native contacts, residue- and atom-based, with different distance cutoffs. @param soln: solution number @type soln: int @param c: Complex @type c: Complex """ try: if self.requested(c, 'fnac_4.5') and self.c_ref_atom_4_5 is not None: ## cache pairwise atom distances for following calculations contacts = c.atomContacts( 4.5, self.mask_rec, self.mask_lig, cache=1, map_back=0 ) ref = N0.ravel( self.c_ref_atom_4_5 ) c['fnac_4.5'] = N0.sum( N0.ravel(contacts) * ref )\ / float( N0.sum(ref)) if self.requested(c, 'fnac_10') and self.c_ref_atom_10 is not None: contacts = c.atomContacts( 10., self.mask_rec, self.mask_lig, cache=1, map_back=0 ) ref = N0.ravel( self.c_ref_atom_10 ) c['fnac_10'] = N0.sum( N0.ravel(contacts) * ref ) \ / float( N0.sum(ref)) if self.requested(c, 'c_res_4.5') \ or ( self.c_ref_res_4_5 is not None \ and (self.requested(c,'fnrc_4.5','fnSurf_rec'))): res_cont = c.resContacts( 4.5, cache=self.requested(c, 'c_res_4.5')) if self.c_ref_res_4_5 is not None \ and self.requested(c, 'fnrc_4.5' ): ref = N0.ravel( self.c_ref_res_4_5 ) c['fnrc_4.5'] = N0.sum(N0.ravel(res_cont)*ref) \ /float(N0.sum(ref)) if self.c_ref_res_4_5 is not None \ and self.requested(c, 'fnSurf_rec'): r, l = c.fractionNativeSurface(res_cont, self.c_ref_res_4_5 ) c['fnSurf_rec'] = r c['fnSurf_lig'] = l except: m1 = m2 = s = 0 try: m1, m2, s = c.get('model1',0), c.get('model2',0),\ c.get('soln',0) except: pass self.reportError('contact error (r %i : l %i, #%i)'%\ (m1,m2,s), soln)
def changeModel( inFile, prefix, sourceModel ): print '\nget ' + os.path.basename( inFile ) + '..', model = PDBModel( inFile ) model.update() model = model.sort() eq = model.equals( sourceModel ) if not eq[0] and eq[1]: raise ConvertError('source and other models are not equal: ' + str(eq)) # model.validSource() model.setSource( sourceModel.validSource() ) #model.atomsChanged = 0 for k in model.atoms: model.atoms[k,'changed'] = N0.all( model[k] == sourceModel[k] ) model.xyzChanged = ( 0 != N0.sum( N0.ravel( model.xyz - sourceModel.xyz)) ) model.update( updateMissing=1 ) if model.xyzChanged: doper = PDBDope( model ) if 'MS' in sourceModel.atoms.keys(): doper.addSurfaceRacer( probe=1.4 ) if 'density' in sourceModel.atoms.keys(): doper.addDensity() ## if 'foldX' in sourceModel.info.keys(): ## doper.addFoldX() if 'delphi' in sourceModel.info.keys(): doper.addDelphi() outFile = os.path.dirname( inFile ) + '/' + prefix +\ T.stripFilename( inFile ) + '.model' T.dump( model, outFile ) print '-> ' + os.path.basename( outFile )
def test_Whatif(self): """Whatif test""" from Biskit import PDBModel ## Loading PDB... f = T.testRoot()+"/com/1BGS.pdb" m = PDBModel(f) m = m.compress( m.maskProtein() ) m = m.compress( m.maskHeavy() ) ## Starting WhatIf x = WhatIf( m, debug=0, verbose=0 ) ## Running atomAcc, resAcc, resMask = x.run() if self.local: ## check that model hasn't changed m_ref = PDBModel(f) m_ref = m.compress( m.maskProtein() ) for k in m_ref.atoms.keys(): if not N0.all(m_ref[k] == m[k]): print 'Not equal ', k else: print 'Equal ', k ## display exposed residues in PyMol from Pymoler import Pymoler pm = Pymoler() model = pm.addPdb( m, '1' ) pm.colorRes( '1', resAcc[:,0] ) pm.show() print "\nResult for first 10 atoms/residues: " print '\nAccessability (A^2):\n', atomAcc[:10] print '\nResidue accessability (A^2)' print '[total, backbone, sidechain]:\n', resAcc[:10] print '\nExposed residue mask:\n',resMask[:10] print '\nTotal atom accessability (A^2): %.2f'%sum(atomAcc) print ' residue accessability (A^2): %.2f'%sum(resAcc)[0] self.assertAlmostEqual( N0.sum(resAcc[:,0]), 2814.6903, 7 )
def __categorizeHexSurf(self, cutoff=0.1): """ Compare complexes of list to native complex to see if their contact surfaces overlapp with the native complex. @param cutoff: fraction cutoff for defining a overlap (default: 0.1) @type cutoff: float @return: list of len(self.hexContacts) overlapping with native contact surface of lig and rec (0 - no overlap, 1 - rec OR lig overlapps, 2- rec AND lig overlapps) @rtype: [0|1|2] """ result = [ self.com.fractionNativeSurface( c, self.contacts ) for c in self.hexContacts ] result = [ N0.sum( N0.greater( o, cutoff ) ) for o in result ] return result
def random_translations( self, n=1, center=None ): """ n Random translations on a sphere around center with fixed radius. The radius must be given as orbit to __init__. n - int, number of random coordinates to generate center - 3 array of float -> array n x 3 of float """ if center is None: center = self.center xyz = ra.random( (n,3) ) - 0.5 scale = self.orbit*1.0 / N0.sqrt( N0.sum( xyz**2, 1 ) ) r = N0.array( [ scale[i]*xyz[i] for i in range(n) ] ) return r + center
def test( model, center2center, nAtoms=10, exclude=None ): from Biskit import Pymoler, PDBModel g = PatchGeneratorFromOrbit( model, center2center ) overlap = int( round( nAtoms / 4.0 ) ) r = g.randomPatches( nAtoms, 500, max_overlap=overlap, exclude=exclude ) profile = N0.sum( N0.array(r) ) pm = Pymoler() pm.addPdb( model, 'all' ) ms = [ model.take( N0.nonzero(mask) ) for mask in r ] pm.addMovie( ms ) return pm
def xyzOfNearestCovalentNeighbour( i, model ): """ Closest atom in the same residue as atom with index i @param model: PDBModel @type model: PDBModel @param i: atom index @type i: int @return: coordinates of the nearest atom @rtype: [float, float, float] """ resModel = model.filter( residue_number=model.atoms['residue_number'][i] ) dist = N0.sqrt( N0.sum( (resModel.xyz - model.xyz[i])**2 , 1) ) ## set distance to self to something high dist[ N0.argmin(dist) ] = 100. pos_shortest = N0.nonzero( dist == min(dist) )[0] return resModel.xyz[ pos_shortest ]
def identities(self, aln_dictionary): """ Create a dictionary that contains information about all the alignments in the aln_dictionary using pairwise comparisons. @param aln_dictionary: alignment dictionary @type aln_dictionary: dict @return: a dictionary of dictionaries with the sequence name as the top key. Each sub dictionary then has the keys: - 'name' - str, sequence name - 'seq' - str, sequence of - 'template_info' - list of the same length as the 'key' sequence excluding deletions. The number of sequences in the multiple alignment that contain information at this position. - 'ID' - dict, sequence identity in percent comparing the 'key' sequence to all other sequences (excluding deletions) - 'info_ID' - dict, same as 'ID' but compared to the template sequence length (i.e excluding deletions and insertions in the 'key' sequence ) - 'cov_ID' - dict, same as 'info_ID' but insertions are defined comparing to all template sequences (i.e where 'template_info' is zero ) @rtype: dict """ ## loop over all sequences in alignment for i in self.sequences_name: template_names = [] ## don't compare to self, remove current sequence for name in self.sequences_name: if(name is not i): template_names.append(name) ## loop over all sequences in alignment info_ID, ID, cov_ID = {}, {}, {} for y in self.sequences_name: ## identity = 0 ## info_identity = 0 ## cov_identity = 0 nb_of_identities = 0 nb_of_template = 0 template_info = [] nb_of_residues = 0 ## loop over the full length of the alignment for w in range(len(aln_dictionary["target"]["seq"])): ## skip deletions nb_of_info_res=0 if(aln_dictionary[i]["seq"][w] is not '-'): nb_of_residues += 1 ## count identities if(aln_dictionary[i]["seq"][w] == \ aln_dictionary[y]["seq"][w]): nb_of_identities += 1 ## length excluding insertions if(aln_dictionary[y]["seq"][w] is not '-'): nb_of_template += 1 ## loop over all sequences but self for z in template_names: ## count how many sequences contain alignment ## information at this position if(aln_dictionary[z]["seq"][w] is not '-'): nb_of_info_res += 1 template_info.append(nb_of_info_res) ## number of positions in which any other sequence ## contains alignment information nb_cov_res = N0.sum( N0.greater(template_info, 0) ) ## calculate identities info_ID[y] = ID[y] = cov_ID[y] = 0 ## RAIK: Hack, nb_of_... can turn 0 for fragmented alignments if nb_of_template: info_ID[y] = 100. * nb_of_identities / nb_of_template if nb_of_residues: ID[y] = 100. * nb_of_identities / nb_of_residues if nb_cov_res: cov_ID[y] = 100. * nb_of_identities / nb_cov_res aln_dictionary[i]["info_ID"] = info_ID aln_dictionary[i]["ID"] = ID aln_dictionary[i]["cov_ID"] = cov_ID aln_dictionary[i]["template_info"] = template_info return aln_dictionary
def parse_result( self ): """ Extract some information about the profile as well as the match state emmission scores. Keys of the returned dictionary:: 'AA', 'name', 'NrSeq', 'emmScore', 'accession', 'maxAllScale', 'seqNr', 'profLength', 'ent', 'absSum' @return: dictionary with warious information about the profile @rtype: dict """ ## check that the outfut file is there and seems valid if not os.path.exists( self.f_out ): raise HmmerError,\ 'Hmmerfetch result file %s does not exist.'%self.f_out if T.fileLength( self.f_out ) < 10: raise HmmerError,\ 'Hmmerfetch result file %s seems incomplete.'%self.f_out profileDic = {} ## read result hmm = open( self.f_out, 'r') out = hmm.read() hmm.close() ## collect some data about the hmm profile profileDic['name'] = self.hmmName profileDic['profLength'] = \ int( string.split(re.findall('LENG\s+[0-9]+', out)[0])[1] ) profileDic['accession'] = \ string.split(re.findall('ACC\s+PF[0-9]+', out)[0])[1] profileDic['NrSeq'] = \ int( string.split(re.findall('NSEQ\s+[0-9]+', out)[0])[1] ) profileDic['AA'] = \ string.split(re.findall('HMM[ ]+' + '[A-Y][ ]+'*20, out)[0] )[1:] ## collect null emmission scores pattern = 'NULE[ ]+' + '[-0-9]+[ ]+'*20 nullEmm = [ float(j) for j in string.split(re.findall(pattern, out)[0])[1:] ] ## get emmision scores prob=[] for i in range(1, profileDic['profLength']+1): pattern = "[ ]+%i"%i + "[ ]+[-0-9]+"*20 e = [ float(j) for j in string.split(re.findall(pattern, out)[0]) ] prob += [ e ] profileDic['seqNr'] = N0.transpose( N0.take( prob, (0,),1 ) ) profileDic['emmScore'] = N0.array(prob)[:,1:] ## calculate emission probablitities emmProb, nullProb = self.hmmEmm2Prob( nullEmm, profileDic['emmScore']) ent = [ N0.resize( self.entropy(e, nullProb), (1,20) )[0] for e in emmProb ] profileDic['ent'] = N0.array(ent) ###### TEST ##### proba = N0.array(prob)[:,1:] ## # test set all to max score ## p = proba ## p1 = [] ## for i in range( len(p) ): ## p1 += [ N0.resize( p[i][N0.argmax( N0.array( p[i] ) )] , N0.shape( p[i] ) ) ] ## profileDic['maxAll'] = p1 # test set all to N0.sum( abs( probabilities ) ) p = proba p2 = [] for i in range( len(p) ) : p2 += [ N0.resize( N0.sum( N0.absolute( p[i] )), N0.shape( p[i] ) ) ] profileDic['absSum'] = p2 # set all to normalized max score p = proba p4 = [] for i in range( len(p) ) : p_scale = (p[i] - N0.average(p[i]) )/ math.SD(p[i]) p4 += [ N0.resize( p_scale[N0.argmax( N0.array(p_scale) )] , N0.shape( p[i] ) ) ] profileDic['maxAllScale'] = p4 return profileDic
doper.addSurfaceRacer( probe=1.4 ) surf_rec = rec.profile2mask( 'MS', 0.0001, 101 ) doper = PDBDope( lig ) doper.addSurfaceRacer( probe=1.4 ) surf_lig = lig.profile2mask( 'MS', 0.0001, 101 ) ## kick out non-surface rec = rec.compress( surf_rec ) lig = lig.compress( surf_lig ) com = Complex( rec, lig ) ## get interface patch cont = com.atomContacts( cutoff=6.0 ) rec_if = N0.sum( cont, 1 ) lig_if = N0.sum( cont, 0 ) ## center distance c2c = N0.sqrt( N0.sum( (rec.center() - lig.center())**2, 0 ) ) print "Center2Center: ", c2c ## get patches and put them into Pymoler for display print "Patching" excl = N0.compress( N0.ones( len( rec_if ) ), rec_if ) pm = test( rec, c2c, nAtoms=len(N0.nonzero(rec_if)), exclude=rec_if ) pm.addPdb( rec.compress( rec_if ), 'rec_interface' ) pm.addPdb( lig.compress( lig_if ), 'lig_interface' ) pm.addPdb( com.model(), 'complex')