def removeAtoms( self, what ): """ Remove atoms from all frames of trajectory and from reference structure. @param what: Specify what atoms to remove:: - function( atom_dict ) -> 1 || 0 or (1..remove) - list of int [4, 5, 6, 200, 201..], indices of atoms to remove - list of int [11111100001101011100..N_atoms], mask (1..remove) - int, remove atom with this index @type what: any @return: N0.array(1 x N_atoms_old) of 0||1, mask used to compress the atoms and xyz arrays. This mask can be used to apply the same change to another array of same dimension as the old(!) xyz and atoms. @rtype: array """ ## pass what on to PDBModel, collect resulting mask mask = N0.logical_not( self.atomMask( what ) ) self.keepAtoms( N0.nonzero( mask ) ) return mask
def __inverseIndices(self, model, i_atoms): """ @param model: model @type model: PDBMode @param i_atoms: atom index @type i_atoms: [int] @return: remaining atom indices of m that are NOT in i_atoms @rtype: [int] """ mask = N0.zeros(len(model), N0.Int) N0.put(mask, i_atoms, 1) return N0.nonzero(N0.logical_not(mask))
def __center_model( self, model ): """ translate PDBModel so that it's center is in 0,0,0 @param model: model to center @type model: PDBModel @return: PDBModel (clone of model) @rtype: PDBModel """ r = model.clone() r.keep( N0.nonzero( N0.logical_not( r.maskH2O() ) ) ) center = r.centerOfMass() r.setXyz( r.getXyz() - center ) return r
def test_ReduceCoordinates(self): """ReduceCoordinates test""" self.m = PDBModel(T.testRoot() + '/com/1BGS.pdb') self.m = self.m.compress(N0.logical_not(self.m.maskH2O())) self.m.atoms.set('test', range(len(self.m))) self.red = ReduceCoordinates(self.m, 4) self.mred = self.red.reduceToModel() if self.local: print '\nAtoms before reduction %i' % self.m.lenAtoms() print 'Atoms After reduction %i' % self.mred.lenAtoms() self.assertEqual(self.mred.lenAtoms(), 445)
def calc_rmsd(self, fitted_model_if, fitted_model_wo_if, reference, model): """ Takes the two fitted structures (with and without iterative fitting), the known structure (reference), and the associated model inside the pdb_list. Calculates the different RMSD and set the profiles @param fitted_model_if: itteratively fitted model @type fitted_model_if: PDBModel @param fitted_model_wo_if: normaly fitted model @type fitted_model_wo_if: PDBModel @param reference: reference model @type reference: PDBModel @param model: model @type model: PDBModel """ ## first calculate rmsd for heavy atoms and CA without ## removing any residues from the model mask_CA = fitted_model_wo_if.maskCA() rmsd_aa = fitted_model_wo_if.rms(reference, fit=0) rmsd_ca = fitted_model_wo_if.rms(reference, mask=mask_CA, fit=1) model.info["rmsd2ref_aa_wo_if"] = rmsd_aa model.info["rmsd2ref_ca_wo_if"] = rmsd_ca outliers_mask = N0.logical_not(fitted_model_if.profile("rms_outliers")) ## Now remove the residues that were outliers in the iterative fit ## and calculate the rmsd again fitted_model_if = fitted_model_if.compress(outliers_mask) reference = reference.compress(outliers_mask) mask_CA = fitted_model_if.maskCA() rmsd_aa_if = fitted_model_if.rms(reference, fit=0) rmsd_ca_if = fitted_model_if.rms(reference, mask=mask_CA, fit=1) model.info["rmsd2ref_aa_if"] = rmsd_aa_if model.info["rmsd2ref_ca_if"] = rmsd_ca_if model.info["rmsd2ref_aa_outliers"] = 1.*(len(outliers_mask) \ - N0.sum(outliers_mask)) / len(outliers_mask) model.info["rmsd2ref_ca_outliers"] = 1.*(N0.sum(mask_CA) \ - N0.sum(N0.compress(mask_CA, outliers_mask))) \ / N0.sum(mask_CA)
def calc_rmsd(self, fitted_model_if, fitted_model_wo_if, reference, model): """ Takes the two fitted structures (with and without iterative fitting), the known structure (reference), and the associated model inside the pdb_list. Calculates the different RMSD and set the profiles @param fitted_model_if: itteratively fitted model @type fitted_model_if: PDBModel @param fitted_model_wo_if: normaly fitted model @type fitted_model_wo_if: PDBModel @param reference: reference model @type reference: PDBModel @param model: model @type model: PDBModel """ ## first calculate rmsd for heavy atoms and CA without ## removing any residues from the model mask_CA = fitted_model_wo_if.maskCA() rmsd_aa = fitted_model_wo_if.rms( reference, fit=0 ) rmsd_ca = fitted_model_wo_if.rms( reference, mask=mask_CA, fit=1 ) model.info["rmsd2ref_aa_wo_if"] = rmsd_aa model.info["rmsd2ref_ca_wo_if"] = rmsd_ca outliers_mask = N0.logical_not(fitted_model_if.profile("rms_outliers")) ## Now remove the residues that were outliers in the iterative fit ## and calculate the rmsd again fitted_model_if = fitted_model_if.compress( outliers_mask ) reference = reference.compress( outliers_mask ) mask_CA = fitted_model_if.maskCA() rmsd_aa_if = fitted_model_if.rms( reference, fit=0 ) rmsd_ca_if = fitted_model_if.rms( reference, mask=mask_CA, fit=1 ) model.info["rmsd2ref_aa_if"] = rmsd_aa_if model.info["rmsd2ref_ca_if"] = rmsd_ca_if model.info["rmsd2ref_aa_outliers"] = 1.*(len(outliers_mask) \ - N0.sum(outliers_mask)) / len(outliers_mask) model.info["rmsd2ref_ca_outliers"] = 1.*(N0.sum(mask_CA) \ - N0.sum(N0.compress(mask_CA, outliers_mask))) \ / N0.sum(mask_CA)
def __setAll_1D(self, a): """ Replace content of this sparseArray with values from Numeric array or list of numbers -- only for 1-dimensional arrays. @param a: array OR list @type a: array OR [ number ] """ if type(a) is list: a = N0.array(a, self.__typecode) if self.shape != a.shape: raise SparseArrayError, 'dimensions not aligned' self.indices = N0.nonzero(N0.logical_not(N0.equal(a, self.__default))) self.indices = self.indices.tolist() self.values = N0.take(a, self.indices) self.values = self.values.tolist()
def test_outliers(self, traj=None): """EnsembleTraj.outliers/concat test""" self.t2 = self.tr.concat(self.tr) self.o = self.t2.outliers(z=1.2, mask=self.tr.ref.maskCA(), verbose=self.local) if self.local: print self.o self.t = self.t2.compressMembers(N0.logical_not(self.o)) self.p2 = self.t.plotMemberProfiles('rms', xlabel='frame') if self.local or self.VERBOSITY > 2: self.p2.show() self.assertEqual(self.o, 10 * [False])
def __setAll_1D( self, a ): """ Replace content of this sparseArray with values from Numeric array or list of numbers -- only for 1-dimensional arrays. @param a: array OR list @type a: array OR [ number ] """ if type( a ) is list: a = N0.array( a, self.__typecode ) if self.shape != a.shape: raise SparseArrayError, 'dimensions not aligned' self.indices = N0.nonzero( N0.logical_not( N0.equal(a, self.__default) ) ) self.indices = self.indices.tolist() self.values = N0.take( a, self.indices ) self.values = self.values.tolist()
def test_AmberParmMirror(self): """AmberParmBuilder.parmMirror test""" ref = self.ref mask = N0.logical_not(ref.maskH2O()) ## keep protein and Na+ ion self.mdry = ref.compress(mask) self.a = AmberParmBuilder(self.mdry, verbose=self.local, leap_out=self.leapout, debug=self.DEBUG) self.a.parmMirror(f_out=self.dryparm, f_out_crd=self.drycrd) self.a.parm2pdb(self.dryparm, self.drycrd, self.drypdb) self.m1 = PDBModel(self.drypdb) self.m2 = PDBModel(self.refdry) eq = N0.array(self.m1.xyz == self.m2.xyz) self.assert_(eq.all())
def test_Ramachandran(self): """Ramachandran test""" self.traj = T.load(T.testRoot() + '/lig_pcr_00/traj.dat') self.traj.ref.atoms.set('mass', self.traj.ref.masses()) self.mdl = [self.traj[0], self.traj[11]] self.mdl = [md.compress(md.maskProtein()) for md in self.mdl] self.rama = Ramachandran(self.mdl, name='test', profileName='mass', verbose=self.local) self.psi = N0.array(self.rama.psi) if self.local: self.rama.show() r = N0.sum( N0.compress(N0.logical_not(N0.equal(self.psi, None)), self.psi)) self.assertAlmostEqual(r, -11717.909796797909, 2)
def test_Trajectory(self): """Trajectory test""" ## f = T.testRoot() + '/lig_pc2_00/pdb/' ## allfiles = os.listdir( f ) ## pdbs = [] ## for fn in allfiles: ## try: ## if (fn[-7:].upper() == '.PDB.GZ'): ## pdbs += [f + fn] ## except: ## pass ## ref = pdbs[0] ## traj = Trajectory( pdbs[:3], ref, rmwat=0 ) ## Loading self.traj = T.load(T.testRoot() + '/lig_pcr_00/traj.dat') ## sort frames after frameNames self.traj.sortFrames() ## sort atoms self.traj.sortAtoms() ## remove waters self.traj = self.traj.compressAtoms( N0.logical_not( self.traj.ref.maskH2O()) ) ## get fluctuation on a residue level r1 = self.traj.getFluct_local( verbose=self.local ) ## fit backbone of frames to reference structure self.traj.fit( ref=self.traj.ref, mask=self.traj.ref.maskBB(), verbose=self.local ) self.assertAlmostEqual( N0.sum( self.traj.profile('rms') ), 58.101235746353879, 2 )
def match(x, y, n_iterations=1, z=2, eps_rmsd=0.5, eps_stdv=0.05): """ Matches two arrays onto each other, while iteratively removing outliers. Superimposed array y would be C{ N0.dot(y, N0.transpose(r)) + t }. @param n_iterations: number of calculations:: 1 .. no iteration 0 .. until convergence @type n_iterations: 1|0 @param z: number of standard deviations for outlier definition (default: 2) @type z: float @param eps_rmsd: tolerance in rmsd (default: 0.5) @type eps_rmsd: float @param eps_stdv: tolerance in standard deviations (default: 0.05) @type eps_stdv: float @return: (r,t), [ [percent_considered, rmsd_for_it, outliers] ] @rtype: (array, array), [float, float, int] """ iter_trace = [] rmsd_old = 0 stdv_old = 0 n = 0 converged = 0 mask = N0.ones(len(y), N0.Int32) while not converged: ## find transformation for best match r, t = findTransformation(N0.compress(mask, x, 0), N0.compress(mask, y, 0)) ## transform coordinates xt = N0.dot(y, N0.transpose(r)) + t ## calculate row distances d = N0.sqrt(N0.sum(N0.power(x - xt, 2), 1)) * mask ## calculate rmsd and stdv rmsd = N0.sqrt(N0.average(N0.compress(mask, d)**2)) stdv = MU.SD(N0.compress(mask, d)) ## check conditions for convergence d_rmsd = abs(rmsd - rmsd_old) d_stdv = abs(1 - stdv_old / stdv) if d_rmsd < eps_rmsd and d_stdv < eps_stdv: converged = 1 else: rmsd_old = rmsd stdv_old = stdv ## store result perc = round(float(N0.sum(mask)) / float(len(mask)), 2) ## throw out non-matching rows mask = N0.logical_and(mask, N0.less(d, rmsd + z * stdv)) outliers = N0.nonzero(N0.logical_not(mask)) iter_trace.append([perc, round(rmsd, 3), outliers]) n += 1 if n_iterations and n >= n_iterations: break return (r, t), iter_trace
def addSurfaceRacer(self, probe=1.4, vdw_set=1, probe_suffix=0, mask=None): """ Always adds three different profiles as calculated by fastSurf:: curvature - average curvature (or curvature_1.4 if probe_suffix=1) MS - molecular surface area (or MS_1.4 if probe_suffix=1) AS - accessible surface area (or AS_1.4 if probe_suffix=1) If the probe radii is 1.4 Angstrom and the Richards vdw radii set is used the following two profiles are also added:: relAS - Relative solvent accessible surface relMS - Relative molecular surface See {Biskit.SurfaceRacer} @param probe: probe radius @type probe: float @param vdw_set: defines what wdv-set to use (1-Richards, 2-Chothia) @type vdw_set: 1|2 @param probe_suffix: append probe radius to profile names @type probe_suffix: 1|0 @param mask: optional atom mask to apply before calling surface racer (default: heavy atoms AND NOT solvent) @type mask: [ bool ] @raise ExeConfigError: if external application is missing """ name_MS = 'MS' + probe_suffix * ('_%3.1f' % probe) name_AS = 'AS' + probe_suffix * ('_%3.1f' % probe) name_curv = 'curvature' + probe_suffix * ('_%3.1f' % probe) ## hydrogens + waters are not allowed during FastSurf calculation mask = mask if mask is not None else \ self.m.maskHeavy() * N0.logical_not( self.m.maskSolvent() ) fs = SurfaceRacer(self.m, probe, vdw_set=vdw_set, mask=mask) fs_dic = fs.run() fs_info = fs_dic['surfaceRacerInfo'] self.m.atoms.set(name_MS, fs_dic['MS'], mask, 0, comment='Molecular Surface area in A', version=T.dateString() + ' ' + self.version(), **fs_info) self.m.atoms.set(name_AS, fs_dic['AS'], mask, 0, comment='Accessible Surface area in A', version=T.dateString() + ' ' + self.version(), **fs_info) self.m.atoms.set(name_curv, fs_dic['curvature'], mask, 0, comment='Average curvature', version=T.dateString() + ' ' + self.version(), **fs_info) if round(probe, 1) == 1.4 and vdw_set == 1 and 'relAS' in fs_dic: self.m.atoms.set('relAS', fs_dic['relAS'], mask, 0, comment='Relative solvent accessible surf.', version=T.dateString() + ' ' + self.version(), **fs_info) self.m.atoms.set('relMS', fs_dic['relMS'], mask, 0, comment='Relative molecular surf.', version=T.dateString() + ' ' + self.version(), **fs_info)
def __init__( self, model, probe, vdw_set=1, mode=3, mask=None, **kw ): """ SurfaceRacer creates three output files:: result.txt - contains breakdown of surface areas and is writen to the directory where the program resides. This file is discarded here. <file>.txt - contains the accessible, molecular surface areas and average curvature information parsed here. The filename is that of the input pdb file but with a .txt extension. <file>_residue.txt - new in version 5.0 and not used by this wrapper stdout - some general information about the calculation. Redirected to /dev/null @param model: model analyze @type model: PDBModel @param probe: probe radii, Angstrom @type probe: float @param vdw_set: Van del Waals radii set (default: 1):: 1 - Richards (1977) 2 - Chothia (1976) @type vdw_set: 1|2 @param mode: calculation mode (default: 3):: 1- Accessible surface area only 2- Accessible and molecular surface areas 3- Accessible, molecular surface areas and average curvature @type mode: 1|2|3 @param mask: optional atom mask to apply before calling surface racer (default: heavy atoms AND NOT solvent) @type mask: [ bool ] @param kw: additional key=value parameters for Executor: @type kw: key=value pairs :: debug - 0|1, keep all temporary files (default: 0) verbose - 0|1, print progress messages to log (log != STDOUT) node - str, host for calculation (None->local) NOT TESTED (default: None) nice - int, nice level (default: 0) log - Biskit.LogFile, program log (None->STOUT) (default: None) """ Executor.__init__( self, 'surfaceracer', template=self.inp,\ **kw ) self.model = model.clone() self.mask = mask if mask is not None else \ model.maskHeavy() * N0.logical_not( model.maskSolvent()) self.model = self.model.compress( self.mask ) ## will be filled in by self.prepare() after the temp folder is ready self.f_pdb = None self.f_pdb_name = None self.f_out_name = None ## parameters that can be changed self.probe = probe self.vdw_set = vdw_set self.mode = mode ## random data dictionaries self.ranMS = SRT.ranMS self.ranAS = SRT.ranAS self.ranMS_Nter = SRT.ranMS_N self.ranAS_Nter = SRT.ranAS_N self.ranMS_Cter = SRT.ranMS_C self.ranAS_Cter = SRT.ranAS_C ## count failures self.i_failed = 0
def conservationScore(self, cons_type='cons_ent', ranNr=150, log=StdLog(), verbose=1): """ Score of conserved residue pairs in the interaction surface. Optionally, normalized by radom surface contacts. @param cons_type: precalculated conservation profile name, see L{Biskit.PDBDope}. @type cons_type: str @param ranNr: number of random matricies to use (default: 150) @type ranNr: int @param log: log file [STDOUT] @type log: Biskit.LogFile @param verbose: give progress report [1] @type verbose: bool | int @return: conservation score @rtype: float """ try: recCons = self.rec().profile(cons_type, updateMissing=1) except: if verbose: log.add('\n'+'*'*30+'\nNO HHM PROFILE FOR RECEPTOR\n'+\ '*'*30+'\n') recCons = N0.ones(self.rec().lenResidues()) try: ligCons = self.lig().profile(cons_type, updateMissing=1) except: if verbose: log.add(\ '\n'+'*'*30+'\nNO HHM PROFILE FOR LIGAND\n'+'*'*30+'\n') ligCons = N0.ones(self.lig().lenResidues()) if self.rec().profile('surfMask'): recSurf = self.rec().profile('surfMask') else: d = PDBDope(self.rec()) d.addSurfaceMask() if self.lig().profile('surfMask'): ligSurf = self.lig().profile('surfMask') else: d = PDBDope(self.lig()) d.addSurfaceMask() surfMask = N0.ravel(N0.outerproduct(recSurf, ligSurf)) missing = N0.outerproduct(N0.equal(recCons, 0), N0.equal(ligCons, 0)) cont = self.resContacts() * N0.logical_not(missing) consMat = N0.outerproduct(recCons, ligCons) score = cont * consMat # get a random score if ranNr != 0: if self.verbose: self.log.write('.') ranMat = mathUtils.random2DArray(cont, ranNr, mask=surfMask) random_score = N0.sum(N0.sum(ranMat * consMat)) / (ranNr * 1.0) return N0.sum(N0.sum(score)) / random_score else: return N0.sum(N0.sum(score)) / N0.sum(N0.sum(cont))
def makeMap(self, maxPerCenter=4): """ Calculate mapping between complete and reduced atom list. Creates a (list of lists of int, list of atom dictionaries) containing groups of atom indices into original model, new center atoms @param maxPerCenter: max number of atoms per side chain center atom (default: 4) @type maxPerCenter: int """ resIndex = self.m_sorted.resIndex() resModels = self.m_sorted.resModels() m = self.m_sorted self.currentAtom = 0 groups = [] atoms = DictList() for i in range(len(resIndex)): first_atom = resIndex[i] if i < len(resIndex) - 1: last_atom = resIndex[i + 1] - 1 else: last_atom = len(self.a_indices) - 1 a = m.atoms[first_atom] ## res_name = m.atoms[ first_atom ]['residue_name'] ## segid = m.atoms[ first_atom ]['segment_id'] ## chainId = m.atoms[ first_atom ]['chain_id'] ## res_number= m.atoms[ first_atom ]['serial_number'] ## position of this residue's atoms in original PDBModel (unsorted) a_indices = self.a_indices[first_atom:last_atom + 1] ## for each center create list of atom indices and a center atom if a['residue_name'] != 'GLY' and a['residue_name'] != 'ALA': bb_a_indices = N0.compress(resModels[i].maskBB(), a_indices) sc_a_indices = N0.compress( N0.logical_not(resModels[i].maskBB()), a_indices) sc_groups = self.group(sc_a_indices, maxPerCenter) else: bb_a_indices = a_indices sc_groups = [] groups += [bb_a_indices] atoms += [self.nextAtom(a, 'BB')] i = 0 for g in sc_groups: groups += [g] atoms += [self.nextAtom(a, 'SC%i' % i)] i += 1 self.groups = groups self.atoms = atoms