def __add3(self, n_members, excluded, trippleIndex): """ Add a tripple of numbers from range( n_members ) to be excluded for error estimation. Tripples are chosen to have minimal overlap. For 10 trajectories (*n_members*=10), the first 3 tripples will be (1,2,3), (4,5,6), (7,8,9). :param n_members: number of member trajectories :type n_members: int :param excluded: excluded member trajectories :type excluded: [ int ] :param trippleIndex: :type trippleIndex: int :return: the indices of all excluded member trajectories :rtype: [ int ] """ remaining = MU.difference(range(n_members), excluded) tripple = self.tripples(remaining, trippleIndex + 1)[-1] return MU.union(excluded, list(tripple))
def __add3( self, n_members, excluded, trippleIndex ): """ Add a tripple of numbers from range( n_members ) to be excluded for error estimation. Tripples are chosen to have minimal overlap. For 10 trajectories (*n_members*=10), the first 3 tripples will be (1,2,3), (4,5,6), (7,8,9). :param n_members: number of member trajectories :type n_members: int :param excluded: excluded member trajectories :type excluded: [ int ] :param trippleIndex: :type trippleIndex: int :return: the indices of all excluded member trajectories :rtype: [ int ] """ remaining = MU.difference( range( n_members ), excluded ) tripple = self.tripples( remaining, trippleIndex+1 )[-1] return MU.union( excluded, list(tripple) )
def parmSolvated( self, f_out, f_out_crd=None, f_out_pdb=None, hetatm=0, norun=0, cap=0, capN=[], capC=[], fmod=['frcmod.ionsjc_tip3p'], fprep=[], box=10.0, center=True, **kw ): """ :param f_out: target file for parm (topology) :type f_out: str :param f_out_crd: target file for crd (coordinates) (default:|f_out_base|.crd) :type f_out_crd: str :param f_out_pdb: target file for pdb (default:|f_out_base|.pdb) :type f_out_pdb: str :param hetatm: keep hetero atoms (default: 0) :type hetatm: 1|0 :param cap: put ACE and NME capping residue on chain breaks (default: 0) :type cap: 1|0 :param capN: indices of chains that should get ACE cap (default: []) :type capN: [int] :param capC: indices of chains that should get NME cap (default: []) :type capC: [int] :param box: minimal distance of solute from box edge (default: 10.0) :type box: float :param center: re-center coordinates (default: True) :type center: bool :param fmod: list of files with amber parameter modifications to be loaded into leap with loadAmberParams (default:['frcmod.ionsjc_tip3p'] ... mod file needed for default Amber ff10 ions -- topology saving will fail if this one is missing) :type fmod: [str] :param fprep: list of files with amber residue definitions (to be loaded into leap with loadAmberPrep) (default: []) :type fprep: [str] :param kw: additional key=value pairs for leap input template :type kw: key=value :raise IOError: """ f_out = t.absfile( f_out ) f_out_crd = t.absfile( f_out_crd ) or t.stripSuffix( f_out ) + '.crd' f_out_pdb = t.absfile( f_out_pdb ) or t.stripSuffix( f_out ) +\ '_leap.pdb' ## removed: (bugfix 3434136) #fmod = [ t.absfile( f ) for f in t.toList( fmod ) ] #fprep = [ t.absfile( f ) for f in t.toList( fprep ) ] try: if self.verbose: self.log.add( '\nCleaning PDB file for Amber:' ) m = self.leapModel( hetatm=hetatm, center=center ) if cap: end_broken = m.atom2chainIndices( m.chainBreaks() ) capC = MU.union( capC, end_broken ) capN = MU.union( capN, N0.array( end_broken ) + 1 ) for i in capN: if self.verbose: self.log.add( 'Adding ACE cap to chain %i' % i ) m = self.capACE( m, i ) for i in capC: if self.verbose: self.log.add( 'Adding NME cap to chain %i' % i ) m = self.capNME( m, i ) m.renumberResidues( addChainId=1 ) ## again, to accomodate capping template = open( self.leap_template ).read() leap_mod = self.__fLines( 'm = loadAmberParams %s\n', fmod ) leap_prep= self.__fLines( 'loadAmberPrep %s\n', fprep ) ss = self.__ssBonds( m, cutoff=4. ) self.__cys2cyx( m, ss ) leap_ss = self.__fLines( self.ss_bond, ss ) if self.verbose: self.log.add('Found %i disulfide bonds: %s' % (len(ss),str(ss))) if self.verbose: self.log.add( 'writing cleaned PDB to %s' % self.leap_pdb ) m.writePdb( self.leap_pdb, ter=3 ) self.__runLeap( template, in_pdb=self.leap_pdb, out_parm=f_out, out_crd=f_out_crd, ss_bonds=leap_ss, fmod=leap_mod, fprep=leap_prep, norun=norun, box=box, **kw ) if not norun: parm_pdb = self.parm2pdb( f_out, f_out_crd, f_out_pdb ) if not self.keep_leap_pdb and not self.debug: t.tryRemove( self.leap_pdb ) except IOError as why: raise IOError(why)
def concat(self, *profiles): """ Concatenate all profiles in this with corresponding profiles in the given ProfileCollection(s). Profiles that are not found in all ProfileCollections are skipped:: p0.concat( p1 [, p2, ..]) -> single ProfileCollection with the same number of profiles as p0 but with the length of p0+p1+p2.. :param profiles: profile(s) to concatenate :type profiles: ProfileCollection(s) :return: concatenated profile(s) :rtype: ProfileCollection / subclass """ ## end recursion (no more arguments) if len(profiles) == 0: return self next = profiles[0] r = self.__class__() ##!!! BIG FAT WARNING: empty profilecollection does not imply empty model ## an empty PC w/o any profiles currently doesn't know which length ## is is supposed to have. If profLength == 0 for real, then ## the next PC's profiles don't need to be skipped ## Otherwise, ## this creates too-short profiles if the PC parent model has ## non-zero length and simply doesn't have any profiles registered. ## ## special case 1: concat something to empty profile collection ## if not self.keys(): ## return next.clone().concat( *profiles[1:] ) ## ## ## special case 2: concat empty profile collection to this one ## if not next.keys(): ## return self.clone().concat( *profiles[1:] ) ## allkeys = M.union(list(self.profiles.keys()), list(next.keys())) ## for k, p in self.profiles.items(): for k in allkeys: p = self.profiles.get(k, None) pnext = next.profiles.get(k, None) infos = {} if p is None: default = next[k, 'default'] p = self.__clonedefault(pnext, self.profLength(), default) infos = next.infos[k] if pnext is None: default = self[k, 'default'] pnext = self.__clonedefault(p, next.profLength(), default) infos = self.infos[k] try: if isinstance(p, N.ndarray): if len(pnext) == 0: pnext = pnext.astype(p.dtype) r.set(k, N.concatenate((p, pnext)), **infos) else: r.set(k, p + pnext, **infos) except: EHandler.warning("Profile %s skipped during concat." % k, error=1) r.remove(k) return r.concat(*profiles[1:])
def capTerminals( self, auto=False, breaks=False, capN=[], capC=[], checkgap=True): """ Add NME and ACE capping residues to chain breaks or normal N- and C-terminals. Note: these capping residues contain hydrogen atoms. Chain indices for capN and capC arguments can be interpreted either with or without chain break detection enabled. For example, let's assume we have a two-chain protein with some missing residues (chain break) in the first chain: A: MGSKVSK---FLNAGSK B: FGHLAKSDAK Then: capTerminals( breaks=False, capN=[1], capC=[1]) will add N-and C-terminal caps to chain B. However: capTerminals( breaks=True, capN=[1], capC=[1]) will add N- and C-terminal caps to the second fragment of chain A. Note: this operation *replaces* the internal model. :param auto: put ACE and NME capping residue on chain breaks and on suspected false N- and C-termini (default: False) :type auto: bool :param breaks: switch on chain break detection before interpreting capN and capC :type breaks: False :param capN: indices of chains that should get ACE cap (default: []) :type capN: [int] :param capC: indices of chains that should get NME cap (default: []) :type capC: [int] """ m = self.model c_len = m.lenChains() i_breaks = m.chainBreaks() if auto: if not breaks: capN = self.convertChainIdsNter( m, capN ) capC = self.convertChainIdsCter( m, capC ) breaks=True capN, capC = self.unresolvedTerminals( m ) end_broken = m.atom2chainIndices( m.chainBreaks(), breaks=1 ) capC = M.union( capC, end_broken ) capN = M.union( capN, N0.array( end_broken ) + 1 ) capN = self.filterProteinChains(m, capN, m.chainIndex(breaks=breaks)) capC = self.filterProteinChains(m, capC, m.chainEndIndex(breaks=breaks)) for i in capN: m = self.capACE( m, i, breaks=breaks, checkgap=checkgap ) assert m.lenChains() == c_len, '%i != %i' % \ (m.lenChains(), c_len) assert len(m.chainBreaks(force=True)) == len(i_breaks) assert m['serial_number'].dtype == N0.Int32, 'serial_number not int' for i in capC: m = self.capNME( m, i, breaks=breaks, checkgap=checkgap ) assert m.lenChains() == c_len assert len(m.chainBreaks(force=True)) == len(i_breaks) self.model = m return self.model
def concat( self, *profiles ): """ Concatenate all profiles in this with corresponding profiles in the given ProfileCollection(s). Profiles that are not found in all ProfileCollections are skipped:: p0.concat( p1 [, p2, ..]) -> single ProfileCollection with the same number of profiles as p0 but with the length of p0+p1+p2.. :param profiles: profile(s) to concatenate :type profiles: ProfileCollection(s) :return: concatenated profile(s) :rtype: ProfileCollection / subclass """ ## end recursion (no more arguments) if len( profiles ) == 0: return self next = profiles[0] r = self.__class__() ##!!! BIG FAT WARNING: empty profilecollection does not imply empty model ## an empty PC w/o any profiles currently doesn't know which length ## is is supposed to have. If profLength == 0 for real, then ## the next PC's profiles don't need to be skipped ## Otherwise, ## this creates too-short profiles if the PC parent model has ## non-zero length and simply doesn't have any profiles registered. ## ## special case 1: concat something to empty profile collection ## if not self.keys(): ## return next.clone().concat( *profiles[1:] ) ## ## ## special case 2: concat empty profile collection to this one ## if not next.keys(): ## return self.clone().concat( *profiles[1:] ) ## allkeys = M.union( list(self.profiles.keys()), list(next.keys()) ) ## for k, p in self.profiles.items(): for k in allkeys: p = self.profiles.get(k, None) pnext = next.profiles.get(k, None) infos = {} if p is None: default = next[k,'default'] p = self.__clonedefault(pnext, self.profLength(), default) infos = next.infos[k] if pnext is None: default = self[k,'default'] pnext = self.__clonedefault(p, next.profLength(), default) infos = self.infos[k] try: if isinstance( p, N.ndarray ): if len(pnext) == 0: pnext = pnext.astype(p.dtype) r.set( k, N.concatenate( (p, pnext) ), **infos ) else: r.set( k, p + pnext, **infos ) except: EHandler.warning("Profile %s skipped during concat." % k, error=1) r.remove( k ) return r.concat( *profiles[1:] )
def update(self, model, source, skipRes=None, updateMissing=0, force=0, headPatterns=[]): """ Update empty or missing fields of model from the source. The model will be connected to the source via model.source. Profiles that are derived from the source are labeled 'changed'=0. The same holds for coordinates (xyzChanged=0). However, existing profiles or coordinates or fields remain untouched. :param model: existing model :type model: PDBModel :param source: source PDB file :type source: str :param skipRes: list residue names that should not be parsed :type skipRes: [ str ] :param updateMissing: ignored :type updateMissing: 1|0 :param headPatterns: [(putIntoKey, regex)] extract given REMARKS :type headPatterns: [(str, str)] :raise PDBParserError - if something is wrong with the source file """ try: ## atoms and/or coordinates need to be updated from PDB if force or self.needsUpdate(model): atoms, xyz, info = self.__collectAll(source, skipRes, headPatterns) keys = M.union(list(atoms.keys()), list(self.DEFAULTS.keys())) for k in keys: a = model.atoms.get(k, default=0, update=False) if (a is 0) or (a is None): dflt = self.DEFAULTS.get(k, None) model.atoms.set(k, atoms.get(k, dflt), changed=0) if model.xyz is None: model.xyz = xyz model.xyzChanged = 0 model._resIndex = None model._chainIndex = None model.fileName = model.fileName or source model.pdbCode = model.pdbCode or info.get('pdb_code', None) or \ self.idFromName( model.fileName) ## ## make biounit from the dictionary we have parsed if 'BIOMT' in info: ## biomt = info['BIOMT'] ## model.biounit = BU.BioUnit(model, biomt) ## del info['BIOMT'] model.info.update(info) except: msg = self.__xplorAtomIndicesTest(source) or ' ' raise PDBParserError('Cannot read ' + str(source) + ' as PDB\n'\ '\ERROR: ' + T.lastError() + msg) model.setSource(source)
def capTerminals(self, auto=False, breaks=False, capN=[], capC=[], checkgap=True): """ Add NME and ACE capping residues to chain breaks or normal N- and C-terminals. Note: these capping residues contain hydrogen atoms. Chain indices for capN and capC arguments can be interpreted either with or without chain break detection enabled. For example, let's assume we have a two-chain protein with some missing residues (chain break) in the first chain: A: MGSKVSK---FLNAGSK B: FGHLAKSDAK Then: capTerminals( breaks=False, capN=[1], capC=[1]) will add N-and C-terminal caps to chain B. However: capTerminals( breaks=True, capN=[1], capC=[1]) will add N- and C-terminal caps to the second fragment of chain A. Note: this operation *replaces* the internal model. :param auto: put ACE and NME capping residue on chain breaks and on suspected false N- and C-termini (default: False) :type auto: bool :param breaks: switch on chain break detection before interpreting capN and capC :type breaks: False :param capN: indices of chains that should get ACE cap (default: []) :type capN: [int] :param capC: indices of chains that should get NME cap (default: []) :type capC: [int] """ m = self.model c_len = m.lenChains() i_breaks = m.chainBreaks() if auto: if not breaks: capN = self.convertChainIdsNter(m, capN) capC = self.convertChainIdsCter(m, capC) breaks = True capN, capC = self.unresolvedTerminals(m) end_broken = m.atom2chainIndices(m.chainBreaks(), breaks=1) capC = M.union(capC, end_broken) capN = M.union(capN, N0.array(end_broken) + 1) capN = self.filterProteinChains(m, capN, m.chainIndex(breaks=breaks)) capC = self.filterProteinChains(m, capC, m.chainEndIndex(breaks=breaks)) for i in capN: m = self.capACE(m, i, breaks=breaks, checkgap=checkgap) assert m.lenChains() == c_len, '%i != %i' % \ (m.lenChains(), c_len) assert len(m.chainBreaks(force=True)) == len(i_breaks) assert m[ 'serial_number'].dtype == N0.Int32, 'serial_number not int' for i in capC: m = self.capNME(m, i, breaks=breaks, checkgap=checkgap) assert m.lenChains() == c_len assert len(m.chainBreaks(force=True)) == len(i_breaks) self.model = m return self.model
def update( self, model, source, skipRes=None, updateMissing=0, force=0, headPatterns=[]): """ Update empty or missing fields of model from the source. The model will be connected to the source via model.source. Profiles that are derived from the source are labeled 'changed'=0. The same holds for coordinates (xyzChanged=0). However, existing profiles or coordinates or fields remain untouched. :param model: existing model :type model: PDBModel :param source: source PDB file :type source: str :param skipRes: list residue names that should not be parsed :type skipRes: [ str ] :param updateMissing: ignored :type updateMissing: 1|0 :param headPatterns: [(putIntoKey, regex)] extract given REMARKS :type headPatterns: [(str, str)] :raise PDBParserError - if something is wrong with the source file """ try: ## atoms and/or coordinates need to be updated from PDB if force or self.needsUpdate( model ): atoms, xyz, info = self.__collectAll( source, skipRes, headPatterns ) keys = M.union( list(atoms.keys()), list(self.DEFAULTS.keys()) ) for k in keys: a = model.atoms.get( k, default=0, update=False ) if (a is 0) or (a is None): dflt = self.DEFAULTS.get( k, None ) model.atoms.set(k, atoms.get(k, dflt), changed=0 ) if model.xyz is None: model.xyz = xyz model.xyzChanged = 0 model._resIndex =None model._chainIndex=None model.fileName = model.fileName or source model.pdbCode = model.pdbCode or info.get('pdb_code', None) or \ self.idFromName( model.fileName) ## ## make biounit from the dictionary we have parsed if 'BIOMT' in info: ## biomt = info['BIOMT'] ## model.biounit = BU.BioUnit(model, biomt) ## del info['BIOMT'] model.info.update( info ) except: msg = self.__xplorAtomIndicesTest( source ) or ' ' raise PDBParserError('Cannot read ' + str(source) + ' as PDB\n'\ '\ERROR: ' + T.lastError() + msg) model.setSource( source )