def contactResDistribution(self, cm=None): """ Count occurrence of residues in protein-protein interface. @param cm: pre-calculated contact matrix (default: None) @type cm: matrix @return: dict {'A':3, 'C':1, .. } (20 standard amino acids) @rtype: dict """ if cm is None: cm = self.resContacts() ## get mask for residues involved in contacts maskLig = N0.sum(cm) maskRec = N0.sum(N0.transpose(cm)) ## get sequence of contact residues only seqLig = N0.compress(maskLig, self.lig().sequence()) seqRec = N0.compress(maskRec, self.rec().sequence()) seq = ''.join(seqLig) + ''.join(seqRec) ## convert back to string ## count occurrence of letters result = {} for aa in molUtils.allAA(): result[aa] = seq.count(aa) return result
def castHmmDic( self, hmmDic, repete, hmmGap, key ): """ Blow up hmmDic to the number of repetes of the profile used. Correct scores for possible deletions in the search sequence. @param hmmDic: dictionary from L{getHmmProfile} @type hmmDic: dict @param repete: repete information from L{align} @type repete: int @param hmmGap: information about gaps from L{align} @type hmmGap: [int] @param key: name of scoring method to adjust for gaps and repetes @type key: str @return: dictionary with information about the profile @rtype: dict """ s = hmmDic[key] for i in range( repete ): mask = N0.ones( len(s) ) N0.put( mask, hmmGap[i], 0 ) if i == 0: score = N0.compress( mask, s, 0 ) if i > 0: score = N0.concatenate( ( N0.compress( mask, s, 0 ), score ) ) hmmDic[key] = score return hmmDic
def castHmmDic(self, hmmDic, repete, hmmGap, key): """ Blow up hmmDic to the number of repetes of the profile used. Correct scores for possible deletions in the search sequence. @param hmmDic: dictionary from L{getHmmProfile} @type hmmDic: dict @param repete: repete information from L{align} @type repete: int @param hmmGap: information about gaps from L{align} @type hmmGap: [int] @param key: name of scoring method to adjust for gaps and repetes @type key: str @return: dictionary with information about the profile @rtype: dict """ s = hmmDic[key] for i in range(repete): mask = N0.ones(len(s)) N0.put(mask, hmmGap[i], 0) if i == 0: score = N0.compress(mask, s, 0) if i > 0: score = N0.concatenate((N0.compress(mask, s, 0), score)) hmmDic[key] = score return hmmDic
def __atomContacts(self, cutoff, rec_mask, lig_mask, cache): """ Intermolecular distances below cutoff after applying the two masks. @param cutoff: cutoff for B{atom-atom} contact in \AA @type cutoff: float @param rec_mask: atom mask @type rec_mask: [1|0] @param lig_mask: atom mask @type lig_mask: [1|0] @param cache: cache pairwise atom distance matrix @type cache: 1|0 @return: atom contact matrix, array sum_rec_mask x sum_lig_mask @rtype: array """ ## get atom coordinats as array 3 x all_atoms rec_xyz = self.rec().getXyz() lig_xyz = self.lig().getXyz() ## get pair-wise distances -> atoms_rec x atoms_lig dist = getattr(self, 'pw_dist', None) if dist is None or \ N0.shape( dist ) != ( N0.sum(rec_mask), N0.sum(lig_mask) ): dist = self.__pairwiseDistances(N0.compress(rec_mask, rec_xyz, 0), N0.compress(lig_mask, lig_xyz, 0)) if cache: self.pw_dist = dist ## reduce to 1 (distance < cutoff) or 0 -> n_atoms_rec x n_atoms_lig return N0.less(dist, cutoff)
def takeFrames( self, indices ): """ Return a copy of the trajectory containing only the specified frames. @param indices: positions to take @type indices: [int] @return: copy of this Trajectory (fewer frames, semi-deep copy of ref) @rtype: Trajectory """ ## remove out-of-bound indices indices = N0.compress( N0.less( indices, len( self.frames) ), indices ) r = self.__class__() ## this step takes some time for large frames ! r.frames = N0.take( self.frames, indices, 0 ) ## semi-deep copy of reference model r.setRef( self.ref.take( range( self.ref.lenAtoms() )) ) if self.frameNames is not None: r.frameNames = N0.take( self.frameNames, indices, 0 ) r.frameNames = map( ''.join, r.frameNames.tolist() ) r.pc = self.__takePca( indices ) r.profiles = self.profiles.take( indices ) r.resIndex = self.resIndex return r
def residusMaximus( self, atomValues, mask=None ): """ Take list of value per atom, return list where all atoms of any residue are set to the highest value of any atom in that residue. (after applying mask) @param atomValues: list 1 x N, values per atom @type atomValues: [ float ] @param mask: list 1 x N, 0|1, 'master' atoms of each residue @type mask: [1|0] @return: Numpy array 1 x N of float @rtype: array """ if mask is None: mask = N0.ones( len( self.frames[0] ), N0.Int32 ) ## eliminate all values that do not belong to the selected atoms masked = atomValues * mask result = [] ## set all atoms of each residue to uniform value for res in range( 0, self.resMap()[-1]+1 ): ## get atom entries for this residue resAtoms = N0.compress( N0.equal( self.resMap(), res ), masked ) ## get maximum value masterValue = max( resAtoms ) result += resAtoms * 0.0 + masterValue return N0.array( result )
def logConfidence( x, R, clip=0 ): """ Estimate the probability of x NOT beeing a random observation from a lognormal distribution that is described by a set of random values. @param x: observed value @type x: float @param R: sample of random values @type R: [float] @param clip: clip zeros at this value 0->don't clip (default: 0) @type clip: float @return: confidence that x is not random, median of random distr. @rtype: (float, float) """ if clip and 0 in R: R = N0.clip( R, clip, max( R ) ) if clip and x == 0: x = clip ## remove 0 instead of clipping R = N0.compress( R, R ) if x == 0: return 0, 0 ## get mean and stdv of log-transformed random sample alpha = N0.average( N0.log( R ) ) n = len( R ) beta = N0.sqrt(N0.sum(N0.power(N0.log( R ) - alpha, 2)) / (n - 1.)) return logArea( x, alpha, beta ), logMedian( alpha )
def pairwiseRmsd( self, aMask=None, noFit=0 ): """ Calculate rmsd between each 2 coordinate frames. @param aMask: atom mask @type aMask: [1|0] @return: frames x frames array of float @rtype: array """ frames = self.frames if aMask is not None: frames = N0.compress( aMask, frames, 1 ) result = N0.zeros( (len( frames ), len( frames )), N0.Float32 ) for i in range(0, len( frames ) ): for j in range( i+1, len( frames ) ): if noFit: d = N0.sqrt(N0.sum(N0.power(frames[i]-frames[j], 2), 1)) result[i,j] = result[j,i] = N0.sqrt( N0.average(d**2) ) else: rt, rmsdLst = rmsFit.match( frames[i], frames[j], 1 ) result[i,j] = result[j,i] = rmsdLst[0][1] return result
def centerSurfDist(model, surf_mask, mask=None): """ Calculate the longest and shortest distance from the center of the molecule to the surface. @param mask: atoms not to be considerd (default: None) @type mask: [1|0] @param surf_mask: atom surface mask, needed for minimum surface distance @type surf_mask: [1|0] @return: max distance, min distance @rtype: float, float """ if mask is None: mask = model.maskHeavy() ## calculate center of mass center = model.centerOfMass() ## surface atom coordinates surf_xyz = N0.compress(mask * surf_mask, model.getXyz(), 0) ## find the atom closest and furthest away from center dist = N0.sqrt(N0.sum((surf_xyz - center)**2, 1)) minDist = min(dist) maxDist = max(dist) return maxDist, minDist
def __extractLigandMatrix(self, fcomplex): """ Compare structure from hex complex with original ligand pdb and store transformation matrix of ligand in self.ligandMatrix. @param fcomplex: pdb file with hex complex @type fcomplex: complec @return: rotation matrix and translation matrix as tuple @rtype: (array, array) """ docked_pdb = self._extractLigandStructure(fcomplex) xyz_docked = N0.compress(docked_pdb.maskCA(), docked_pdb.xyz) xyz_template = N0.compress(self.lig_model.maskCA(), self.lig_model.xyz) (r, t) = self._findTransformation(xyz_docked, xyz_template) return (r, t)
def phi_and_psi(self, model): """ Calculate phi and psi torsion angles for all residues in model:: phi - rotation about the N-CA bond - last position in a chain = None psi - rotation about CA-C - first position in a chain = None @param model: PDBModel @type model: PDBModel """ for c in range(model.lenChains(breaks=1)): cModel = model.takeChains([c], breaks=1) xyz = cModel.xyz xyz_CA = N0.compress(cModel.maskCA(), xyz, 0) xyz_N = N0.compress(cModel.mask(['N']), xyz, 0) xyz_C = N0.compress(cModel.mask(['C']), xyz, 0) ## phi: c1 - N ## c2 - CA ## c3 - C ## c4 - N of next residue for i in range(len(xyz_N) - 1): self.phi += [ self.dihedral(xyz_N[i], xyz_CA[i], xyz_C[i], xyz_N[i + 1]) ] self.phi += [None] ## psi: c1 - C of previous residue ## c2 - N ## c3 - CA ## c4 - C self.psi += [None] for i in range(1, len(xyz_N)): self.psi += [ self.dihedral(xyz_C[i - 1], xyz_N[i], xyz_CA[i], xyz_C[i]) ]
def plotContactDensity(self, step=1, cutoff=4.5): """ Example. plot histogramm of contact density. Somehing wrong?? @raise ComplexTrajError: if gnuplot program is not installed """ if not gnuplot.installed: raise ComplexTrajError, 'gnuplot program is not installed' r = self.averageContacts(step, cutoff) r = N0.ravel(r) r = N0.compress(r, r) gnuplot.plot(hist.density(r, 10))
def compareSequences(seqAA_1, seqAA_2): """ """ seqAA_1 = list(seqAA_1) seqAA_2 = list(seqAA_2) seqNr_1 = range(len(seqAA_1)) seqNr_2 = range(len(seqAA_2)) # get mask mask_1 = N0.zeros(len(seqNr_1)) mask_2 = N0.zeros(len(seqNr_2)) # compare sequences seqDiff = getOpCodes(seqAA_1, seqAA_2) # get delete lists del_1, del_2 = getSkipLists(seqDiff) del_1 = [expandRepeats(seqAA_1, *pos) for pos in del_1] del_2 = [expandRepeats(seqAA_2, *pos) for pos in del_2] mask1 = del2mask(seqAA_1, *del_1) mask2 = del2mask(seqAA_2, *del_2) seqAA_1 = N0.compress(mask1, seqAA_1).tolist() seqNr_1 = N0.compress(mask1, seqNr_1).tolist() seqAA_2 = N0.compress(mask2, seqAA_2).tolist() seqNr_2 = N0.compress(mask2, seqNr_2).tolist() # get equal parts seqDiff = getOpCodes(seqAA_1, seqAA_2) equal_1, equal_2 = getEqualLists(seqDiff) seqAA_1, seqNr_1 = getEqual(seqAA_1, seqNr_1, equal_1) seqAA_2, seqNr_2 = getEqual(seqAA_2, seqNr_2, equal_2) N0.put(mask_1, seqNr_1, 1) N0.put(mask_2, seqNr_2, 1) return mask_1, mask_2
def pca( self, atomMask=None, frameMask=None, fit=1 ): """ Calculate principal components of trajectory frames. @param atomMask: 1 x N_atoms, [111001110..] atoms to consider (default: all) @type atomMask: [1|0] @param frameMask: 1 x N_frames, [001111..] frames to consider (default all ) @type frameMask: [1|0] @return: (N_frames x N_frames), (1 x N_frames), projection of each frame in PC space, eigenvalue of each PC @rtype: array, array, array """ if frameMask is None: frameMask = N0.ones( len( self.frames ), N0.Int32 ) if atomMask is None: atomMask = N0.ones(self.getRef().lenAtoms(), N0.Int32) if fit: self.fit( atomMask ) refxyz = N0.average( self.frames, 0 ) data = N0.compress( frameMask, self.frames, 0 ) data = data - refxyz data = N0.compress( atomMask, data, 1 ) ## reduce to 2D array data = N0.array( map( N0.ravel, data ) ) V, L, U = LA.svd( data ) return U, V * L, N0.power(L, 2)
def calc_rmsd(self, fitted_model_if, fitted_model_wo_if, reference, model): """ Takes the two fitted structures (with and without iterative fitting), the known structure (reference), and the associated model inside the pdb_list. Calculates the different RMSD and set the profiles @param fitted_model_if: itteratively fitted model @type fitted_model_if: PDBModel @param fitted_model_wo_if: normaly fitted model @type fitted_model_wo_if: PDBModel @param reference: reference model @type reference: PDBModel @param model: model @type model: PDBModel """ ## first calculate rmsd for heavy atoms and CA without ## removing any residues from the model mask_CA = fitted_model_wo_if.maskCA() rmsd_aa = fitted_model_wo_if.rms( reference, fit=0 ) rmsd_ca = fitted_model_wo_if.rms( reference, mask=mask_CA, fit=1 ) model.info["rmsd2ref_aa_wo_if"] = rmsd_aa model.info["rmsd2ref_ca_wo_if"] = rmsd_ca outliers_mask = N0.logical_not(fitted_model_if.profile("rms_outliers")) ## Now remove the residues that were outliers in the iterative fit ## and calculate the rmsd again fitted_model_if = fitted_model_if.compress( outliers_mask ) reference = reference.compress( outliers_mask ) mask_CA = fitted_model_if.maskCA() rmsd_aa_if = fitted_model_if.rms( reference, fit=0 ) rmsd_ca_if = fitted_model_if.rms( reference, mask=mask_CA, fit=1 ) model.info["rmsd2ref_aa_if"] = rmsd_aa_if model.info["rmsd2ref_ca_if"] = rmsd_ca_if model.info["rmsd2ref_aa_outliers"] = 1.*(len(outliers_mask) \ - N0.sum(outliers_mask)) / len(outliers_mask) model.info["rmsd2ref_ca_outliers"] = 1.*(N0.sum(mask_CA) \ - N0.sum(N0.compress(mask_CA, outliers_mask))) \ / N0.sum(mask_CA)
def calc_rmsd(self, fitted_model_if, fitted_model_wo_if, reference, model): """ Takes the two fitted structures (with and without iterative fitting), the known structure (reference), and the associated model inside the pdb_list. Calculates the different RMSD and set the profiles @param fitted_model_if: itteratively fitted model @type fitted_model_if: PDBModel @param fitted_model_wo_if: normaly fitted model @type fitted_model_wo_if: PDBModel @param reference: reference model @type reference: PDBModel @param model: model @type model: PDBModel """ ## first calculate rmsd for heavy atoms and CA without ## removing any residues from the model mask_CA = fitted_model_wo_if.maskCA() rmsd_aa = fitted_model_wo_if.rms(reference, fit=0) rmsd_ca = fitted_model_wo_if.rms(reference, mask=mask_CA, fit=1) model.info["rmsd2ref_aa_wo_if"] = rmsd_aa model.info["rmsd2ref_ca_wo_if"] = rmsd_ca outliers_mask = N0.logical_not(fitted_model_if.profile("rms_outliers")) ## Now remove the residues that were outliers in the iterative fit ## and calculate the rmsd again fitted_model_if = fitted_model_if.compress(outliers_mask) reference = reference.compress(outliers_mask) mask_CA = fitted_model_if.maskCA() rmsd_aa_if = fitted_model_if.rms(reference, fit=0) rmsd_ca_if = fitted_model_if.rms(reference, mask=mask_CA, fit=1) model.info["rmsd2ref_aa_if"] = rmsd_aa_if model.info["rmsd2ref_ca_if"] = rmsd_ca_if model.info["rmsd2ref_aa_outliers"] = 1.*(len(outliers_mask) \ - N0.sum(outliers_mask)) / len(outliers_mask) model.info["rmsd2ref_ca_outliers"] = 1.*(N0.sum(mask_CA) \ - N0.sum(N0.compress(mask_CA, outliers_mask))) \ / N0.sum(mask_CA)
def getFluct_global( self, mask=None ): """ Get RMS of each atom from it's average position in trajectory. The frames should be superimposed (fit() ) to a reference. @param mask: N x 1 list/Numpy array of 0|1, (N=atoms), atoms to be considered. @type mask: [1|0] @return: Numpy array ( N_unmasked x 1 ) of float. @rtype: array """ frames = self.frames if mask is not None: frames = N0.compress( mask, frames, 1 ) ## mean position of each atom in all frames avg = N0.average( frames ) return N0.average(N0.sqrt(N0.sum(N0.power(frames - avg, 2), 2) ))
def test_Ramachandran(self): """Ramachandran test""" self.traj = T.load(T.testRoot() + '/lig_pcr_00/traj.dat') self.traj.ref.atoms.set('mass', self.traj.ref.masses()) self.mdl = [self.traj[0], self.traj[11]] self.mdl = [md.compress(md.maskProtein()) for md in self.mdl] self.rama = Ramachandran(self.mdl, name='test', profileName='mass', verbose=self.local) self.psi = N0.array(self.rama.psi) if self.local: self.rama.show() r = N0.sum( N0.compress(N0.logical_not(N0.equal(self.psi, None)), self.psi)) self.assertAlmostEqual(r, -11717.909796797909, 2)
def addDensity(self, radius=6, minasa=None, profName='density'): """ Count the number of heavy atoms within the given radius. Values are only collected for atoms with |minasa| accessible surface area. @param minasa: relative exposed surface - 0 to 100% @type minasa: float @param radius: in Angstrom @type radius: float """ mHeavy = self.m.maskHeavy() xyz = N0.compress(mHeavy, self.m.getXyz(), 0) if minasa and self.m.profile('relAS', 0) == 0: self.addASA() if minasa: mSurf = self.m.profile2mask('relAS', minasa) else: mSurf = N0.ones(self.m.lenAtoms()) ## loop over all surface atoms surf_pos = N0.nonzero(mSurf) contacts = [] for i in surf_pos: dist = N0.sum((xyz - self.m.xyz[i])**2, 1) contacts += [N0.sum(N0.less(dist, radius**2)) - 1] self.m.atoms.set(profName, contacts, mSurf, default=-1, comment='atom density radius %3.1fA' % radius, version=T.dateString() + ' ' + self.version())
def go(self, model_list=None, reference=None): """ Run benchmarking. @param model_list: list of models (default: None S{->} outFolder/L{F_PDBModels}) @type model_list: ModelList @param reference: reference model (default: None S{->} outFolder/L{F_INPUT_REFERENCE}) @type reference: PDBModel """ model_list = model_list or self.outFolder + self.F_PDBModels reference = reference or self.outFolder + self.F_INPUT_REFERENCE pdb_list = T.load('%s' % model_list) reference = PDBModel(reference) # check with python 2.4 iref, imodel = reference.compareAtoms(pdb_list[0]) mask_casting = N0.zeros(len(pdb_list[0])) N0.put(mask_casting, imodel, 1) reference = reference.take(iref) #reference_mask_CA = reference_rmsd.maskCA() atom_mask = N0.zeros(len(pdb_list[0])) N0.put(atom_mask, imodel, 1) rmask = pdb_list[0].profile2mask("n_templates", 1, 1000) amask = pdb_list[0].res2atomMask(rmask) mask_final_ref = N0.compress(mask_casting, amask) mask_final = mask_casting * amask reference = reference.compress(mask_final_ref) for i in range(len(pdb_list)): #self.cad(reference, pdb_list[i]) pdb_list[i], pdb_wo_if = self.output_fittedStructures(\ pdb_list[i], reference, i, mask_final) fitted_model_if = pdb_list[i].compress(mask_final) fitted_model_wo_if = pdb_wo_if.compress(mask_final) coord1 = reference.getXyz() coord2 = fitted_model_if.getXyz() aprofile = self.rmsd_res(coord1, coord2) self.calc_rmsd(fitted_model_if, fitted_model_wo_if, reference, pdb_list[i]) pdb_list[i].atoms.set('rmsd2ref_if', aprofile, mask=mask_final, default=-1, comment="rmsd to known reference structure") self.output_rmsd_aa(pdb_list) self.output_rmsd_ca(pdb_list) self.output_rmsd_res(pdb_list) self.write_PDBModels(pdb_list)
def _removeDuplicateChains(self, chainMask=None): """ Get rid of identical chains by comparing all chains with Blast2seq. @param chainMask: chain mask for overriding the chain identity checking (default: None) @type chainMask: [int] @return: number of chains removed @rtype: int """ chainCount = len(self.chains) matrix = 1.0 * N0.zeros((chainCount, chainCount)) chain_ids = [] ## create identity matrix for all chains against all chains for i in range(0, chainCount): chain_ids = chain_ids + [self.chains[i].chain_id ] # collect for log file for j in range(i, len(self.chains)): # convert 3-letter-code res list into 1-letter-code String seq1 = singleAA(self.chains[i].sequence()) seq2 = singleAA(self.chains[j].sequence()) ## if len(seq1) > len(seq2): # take shorter sequence ## # aln len at least half the len of the shortest sequence ## alnCutoff = len(seq2) * 0.5 ## else: ## alnCutoff = len(seq1) * 0.5 ## if id['aln_len'] > alnCutoff: ## matrix[i,j] = id['aln_id'] ## else: # aln length too short, ignore ## matrix[i,j] = 0 matrix[i, j] = self._compareSequences(seq1, seq2) ## report activity self.log.add("\n Chain ID's of compared chains: " + str(chain_ids)) self.log.add(" Cross-Identity between chains:\n" + str(matrix)) self.log.add(" Identity threshold used: " + str(self.threshold)) ## override the automatic chain deletion by supplying a ## chain mask to this function if chainMask: if len(chainMask) == chainCount: self.chains = N0.compress(chainMask, self.chains) self.log.add( "NOTE: chain mask %s used for removing chains.\n" % chainMask) else: self.log.add("########## ERROR ###############") self.log.add("# Chain mask is only %i chains long" % len(chainMask)) self.log.add("# when a mask of length %i is needed" % chainCount) self.log.add("# No cleaning will be performed.\n") if not chainMask: ## look at diagonals in "identity matrix" ## (each chain against each) duplicate = len(self.chains) for offset in range(1, chainCount): diag = N0.diagonal(matrix, offset, 0, 1) # diagonal of 1's mark begin of duplicate avg = 1.0 * N0.sum(diag) / len(diag) if (avg >= self.threshold): duplicate = offset break self.chains = self.chains[:duplicate] self.log.add( "NOTE: Identity matrix will be used for removing identical chains." ) ## report activit self.log.add(str(chainCount - len(self.chains))+\ " chains have been removed.\n") # how many chains have been removed? return (chainCount - len(self.chains))
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05): """ Matches two arrays onto each other, while iteratively removing outliers. Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }. @param n_iterations: number of calculations:: 1 .. no iteration 0 .. until convergence @type n_iterations: 1|0 @param z: number of standard deviations for outlier definition (default: 2) @type z: float @param eps_rmsd: tolerance in rmsd (default: 0.5) @type eps_rmsd: float @param eps_stdv: tolerance in standard deviations (default: 0.05) @type eps_stdv: float @return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ] @rtype: (array, array), [float, float, int] """ iter_trace = [] rmsd_old = 0 stdv_old = 0 n = 0 converged = 0 mask = N0.ones(len(y), N0.Int32) while not converged: ## find transformation for best match r, t = findTransformation(N0.compress(mask, x, 0), N0.compress(mask, y, 0)) ## transform coordinates xt = N0.dot(y, N0.transpose(r)) + t ## calculate row distances d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask ## calculate rmsd and stdv rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2)) stdv = MU.SD(N0.compress(mask, d)) ## check conditions for convergence d_rmsd = abs(rmsd - rmsd_old) d_stdv = abs(1 - stdv_old / stdv) if d_rmsd < eps_rmsd and d_stdv < eps_stdv: converged = 1 else: rmsd_old = rmsd stdv_old = stdv ## store result perc = round(float(N0.sum(mask)) / float(len(mask)), 2) ## throw out non-matching rows mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv)) outliers = N0.nonzero(N0.logical_not(mask)) iter_trace.append([perc, round(rmsd, 3), outliers]) n += 1 if n_iterations and n >= n_iterations: break return (r, t), iter_trace
doper.addSurfaceRacer(probe=1.4) surf_lig = lig.profile2mask('MS', 0.0001, 101) ## kick out non-surface rec = rec.compress(surf_rec) lig = lig.compress(surf_lig) com = Complex(rec, lig) ## get interface patch cont = com.atomContacts(cutoff=6.0) rec_if = N0.sum(cont, 1) lig_if = N0.sum(cont, 0) ## center distance c2c = N0.sqrt(N0.sum((rec.center() - lig.center())**2, 0)) print "Center2Center: ", c2c ## get patches and put them into Pymoler for display print "Patching" excl = N0.compress(N0.ones(len(rec_if)), rec_if) pm = test(rec, c2c, nAtoms=len(N0.nonzero(rec_if)), exclude=rec_if) pm.addPdb(rec.compress(rec_if), 'rec_interface') pm.addPdb(lig.compress(lig_if), 'lig_interface') pm.addPdb(com.model(), 'complex') ## show everything ## the patches are as movie in 'model' pm.show()
class ReduceCoordinates: """ ReduceCoordinates ================= Translate a PDBModel or frames from a trajectory to structure(s) with only one backbone and up to 2 side chain atoms per residue. The new atoms are the centers of mass of several atoms and carry the weight of the pooled atoms in an atom profile called 'mass'. Examples -------- >>> ## create with reference PDBModel >>> reducer = ReduceCoordinates( m_ref ) >>> ## creates reduced PDBModel from m_ref >>> m_red = reducer.reduceToModel() OR: >>> m_red_1 = reducer.reduceToModel( m1.getXyz() ) ## reduce many models >>> m_red_2 = reducer.reduceToModel( m2.getXyz() ) ## with identical atoms OR: >>> ## reduce a complete Trajectory >>> reducer = ReduceCoordinates( traj.ref ) >>> red_ref= reducer.reduceToModel() >>> frames = reducer.reduceXyz( traj.frames ) >>> traj_red = Trajectory( ref=red_ref ) >>> traj_red.frames = frames """ ## modify order of TYR/PHE ring atoms to move centers away from ring axis aaAtoms = MU.aaAtoms aaAtoms['TYR'] = [ 'N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CE1', 'CD2', 'CE2', 'CZ', 'OH', 'OXT' ] aaAtoms['PHE'] = [ 'N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CE1', 'CD2', 'CE2', 'CZ', 'OXT' ] def __init__(self, model, maxPerCenter=4): """ Prepare reduction of coordinates from a given model. @param model: reference model defining atom content and order @type model: PDBModel @param maxPerCenter: max number of atoms per side chain center atom (default: 4) @type maxPerCenter: int """ self.m = model self.__addMassProfile(self.m) ## sort atoms within residues into standard order def cmpAtoms(a1, a2): """ Comparison function for bringing atoms into standard order within residues as defined by L{ aaAtoms }. @param a1: model @type a1: PDBModel @param a2: model @type a2: PDBModel @return: int or list of matching positions @rtype: [-1|0|1] """ res = a1['residue_name'] target = self.aaAtoms[res] try: return cmp(target.index(a1['name']), target.index(a2['name'])) except ValueError, why: return cmp(a1['name'], a2['name']) ## s = "Unknown atom for %s %i: %s or %s" % \ ## (res, a1['residue_number'], a1['name'], a2['name'] ) ## raise PDBError( s ) self.a_indices = self.m.argsort(cmpAtoms) self.m_sorted = self.m.sort(self.a_indices) ## remove H from internal model and from list of atom positions maskH = self.m_sorted.remove(self.m_sorted.maskH()) self.a_indices = N0.compress(maskH, self.a_indices) self.makeMap(maxPerCenter)
def makeMap(self, maxPerCenter=4): """ Calculate mapping between complete and reduced atom list. Creates a (list of lists of int, list of atom dictionaries) containing groups of atom indices into original model, new center atoms @param maxPerCenter: max number of atoms per side chain center atom (default: 4) @type maxPerCenter: int """ resIndex = self.m_sorted.resIndex() resModels = self.m_sorted.resModels() m = self.m_sorted self.currentAtom = 0 groups = [] atoms = DictList() for i in range(len(resIndex)): first_atom = resIndex[i] if i < len(resIndex) - 1: last_atom = resIndex[i + 1] - 1 else: last_atom = len(self.a_indices) - 1 a = m.atoms[first_atom] ## res_name = m.atoms[ first_atom ]['residue_name'] ## segid = m.atoms[ first_atom ]['segment_id'] ## chainId = m.atoms[ first_atom ]['chain_id'] ## res_number= m.atoms[ first_atom ]['serial_number'] ## position of this residue's atoms in original PDBModel (unsorted) a_indices = self.a_indices[first_atom:last_atom + 1] ## for each center create list of atom indices and a center atom if a['residue_name'] != 'GLY' and a['residue_name'] != 'ALA': bb_a_indices = N0.compress(resModels[i].maskBB(), a_indices) sc_a_indices = N0.compress( N0.logical_not(resModels[i].maskBB()), a_indices) sc_groups = self.group(sc_a_indices, maxPerCenter) else: bb_a_indices = a_indices sc_groups = [] groups += [bb_a_indices] atoms += [self.nextAtom(a, 'BB')] i = 0 for g in sc_groups: groups += [g] atoms += [self.nextAtom(a, 'SC%i' % i)] i += 1 self.groups = groups self.atoms = atoms
surf_lig = lig.profile2mask( 'MS', 0.0001, 101 ) ## kick out non-surface rec = rec.compress( surf_rec ) lig = lig.compress( surf_lig ) com = Complex( rec, lig ) ## get interface patch cont = com.atomContacts( cutoff=6.0 ) rec_if = N0.sum( cont, 1 ) lig_if = N0.sum( cont, 0 ) ## center distance c2c = N0.sqrt( N0.sum( (rec.center() - lig.center())**2, 0 ) ) print "Center2Center: ", c2c ## get patches and put them into Pymoler for display print "Patching" excl = N0.compress( N0.ones( len( rec_if ) ), rec_if ) pm = test( rec, c2c, nAtoms=len(N0.nonzero(rec_if)), exclude=rec_if ) pm.addPdb( rec.compress( rec_if ), 'rec_interface' ) pm.addPdb( lig.compress( lig_if ), 'lig_interface' ) pm.addPdb( com.model(), 'complex') ## show everything ## the patches are as movie in 'model' pm.show()
def fit( self, mask=None, ref=None, n_it=1, prof='rms', verbose=1, fit=1, **profInfos ): """ Superimpose all coordinate frames on reference coordinates. Put rms values in a profile. If n_it > 1, the fraction of atoms considered for the fit is put into a profile called |prof|_considered (i.e. by default 'rms_considered'). @param mask: atom mask, atoms to consider default: [all] @type mask: [1|0] @param ref: use as reference, default: None, average Structure @type ref: PDBModel @param n_it: number of fit iterations, kicking out outliers on the way 1 -> classic single fit, 0 -> until convergence (default: 1) @type n_it: int @param prof: save rms per frame in profile of this name, ['rms'] @type prof: str @param verbose: print progress info to STDERR (default: 1) @type verbose: 1|0 @param fit: transform frames after match, otherwise just calc rms (default: 1) @type fit: 1|0 @param profInfos: additional key=value pairs for rms profile info [] @type profInfos: key=value """ if ref is None: refxyz = N0.average( self.frames, 0 ) else: refxyz = ref.getXyz() if mask is None: mask = N0.ones( len( refxyz ), N0.Int32 ) refxyz = N0.compress( mask, refxyz, 0 ) if verbose: T.errWrite( "rmsd fitting..." ) rms = [] ## rms value of each frame non_outliers = [] ## fraction of atoms considered for rms and fit iterations = [] ## number of iterations performed on each frame for i in range(0, len( self.frames) ): xyz = self.frames[i] if n_it != 1: (r, t), rmsdList = rmsFit.match( refxyz, N0.compress( mask, xyz, 0), n_it) iterations.append( len( rmsdList ) ) non_outliers.append( rmsdList[-1][0] ) xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t rms += [ rmsdList[-1][1] ] else: r, t = rmsFit.findTransformation( refxyz, N0.compress( mask, xyz, 0)) xyz_transformed = N0.dot( xyz, N0.transpose(r)) + t d = N0.sqrt(N0.sum(N0.power( N0.compress(mask, xyz_transformed,0)\ - refxyz, 2), 1)) rms += [ N0.sqrt( N0.average(d**2) ) ] if fit: self.frames[i] = xyz_transformed.astype(N0.Float32) if verbose and i%100 == 0: T.errWrite( '#' ) self.setProfile( prof, rms, n_iterations=n_it, **profInfos ) if non_outliers: self.setProfile( prof+'_considered', non_outliers, n_iterations=n_it, comment='fraction of atoms considered for iterative fit' ) if verbose: T.errWrite( 'done\n' )
def go(self, model_list = None, reference = None): """ Run benchmarking. @param model_list: list of models (default: None S{->} outFolder/L{F_PDBModels}) @type model_list: ModelList @param reference: reference model (default: None S{->} outFolder/L{F_INPUT_REFERENCE}) @type reference: PDBModel """ model_list = model_list or self.outFolder + self.F_PDBModels reference = reference or self.outFolder + self.F_INPUT_REFERENCE pdb_list = T.load('%s'%model_list) reference = PDBModel(reference) # check with python 2.4 iref, imodel = reference.compareAtoms(pdb_list[0]) mask_casting = N0.zeros(len(pdb_list[0])) N0.put(mask_casting, imodel, 1) reference = reference.take(iref) #reference_mask_CA = reference_rmsd.maskCA() atom_mask = N0.zeros(len(pdb_list[0])) N0.put(atom_mask,imodel,1) rmask = pdb_list[0].profile2mask("n_templates", 1,1000) amask = pdb_list[0].res2atomMask(rmask) mask_final_ref = N0.compress(mask_casting, amask) mask_final = mask_casting * amask reference = reference.compress(mask_final_ref) for i in range(len(pdb_list)): #self.cad(reference, pdb_list[i]) pdb_list[i], pdb_wo_if = self.output_fittedStructures(\ pdb_list[i], reference, i, mask_final) fitted_model_if = pdb_list[i].compress(mask_final) fitted_model_wo_if = pdb_wo_if.compress(mask_final) coord1 = reference.getXyz() coord2 = fitted_model_if.getXyz() aprofile = self.rmsd_res(coord1,coord2) self.calc_rmsd(fitted_model_if, fitted_model_wo_if, reference, pdb_list[i]) pdb_list[i].atoms.set('rmsd2ref_if', aprofile, mask=mask_final, default = -1, comment="rmsd to known reference structure") self.output_rmsd_aa(pdb_list) self.output_rmsd_ca(pdb_list) self.output_rmsd_res(pdb_list) self.write_PDBModels(pdb_list)