def byName(self, rescode, topo=None ): """ Identify matching reference residue by residue name. Note: residue names are not guaranteed to be unique if several topology files have been read in (the default set of Amber topologies uses unique names though). The optional topo parameter can be used to specify in which topology the residue is looked up. Note: residue 3 letter names are all UPPERCASE. :param rescode: three-letter name of residue to look up :type rescode: str :param topo: optional (file) name of topology (see also: `topokeys()` ) :type topo: str :return: matching reference residue :rtype: AmberResidueType :raise: KeyError if the topology or residue name are not found """ if topo: fbase = T.stripFilename( topo ) return self.topoindex[ fbase ][ rescode ] for topo, residues in self.topoindex.items(): if rescode in residues: return residues[rescode] raise KeyError('No residue type found for name '+str(rescode))
def byName(self, rescode, topo=None ): """ Identify matching reference residue by residue name. Note: residue names are not guaranteed to be unique if several topology files have been read in (the default set of Amber topologies uses unique names though). The optional topo parameter can be used to specify in which topology the residue is looked up. Note: residue 3 letter names are all UPPERCASE. :param rescode: three-letter name of residue to look up :type rescode: str :param topo: optional (file) name of topology (see also: `topokeys()` ) :type topo: str :return: matching reference residue :rtype: AmberResidueType :raise: KeyError if the topology or residue name are not found """ if topo: fbase = T.stripFilename( topo ) return self.topoindex[ fbase ][ rescode ] for topo, residues in self.topoindex.items(): if rescode in residues: return residues[rescode] raise KeyError('No residue type found for name '+str(rescode))
def failed( self ): """ If HEX job fails """ print("FAILED: ", self.host, ' ', t.stripFilename(self.finp)) print("\tJob details:") print("\tCommand: ", self.cmd) print("\tinput: ", self.finp) print("\tHex log: ", self.log) print("\tHex out: ", self.fout) print() print("\t", t.lastError()) self.owner.failedHex( self )
def idFromName(self, fname): """ Extract PDB code from file name. :param fname: file name :type fname: str :return: first 4 letters of filename if available :rtype: str """ name = T.stripFilename(fname) if len(name) > 3: return name[:4] return ''
def failed(self): """ If HEX job fails """ print("FAILED: ", self.host, ' ', t.stripFilename(self.finp)) print("\tJob details:") print("\tCommand: ", self.cmd) print("\tinput: ", self.finp) print("\tHex log: ", self.log) print("\tHex out: ", self.fout) print() print("\t", t.lastError()) self.owner.failedHex(self)
def idFromName( self, fname ): """ Extract PDB code from file name. :param fname: file name :type fname: str :return: first 4 letters of filename if available :rtype: str """ name = T.stripFilename( fname ) if len( name ) > 3: return name[:4] return ''
def run(self): """ Run HEX job. @raise DockerError: if HEX exists with error """ try: if not os.path.exists(self.fout): if self.verbose: print("Executing on ", self.host, ' with ', \ t.stripFilename(self.finp)) print("Command: ", self.cmd) cmd_lst = self.cmd.split() ## self.status = os.spawnvp(os.P_WAIT, cmd_lst[0], cmd_lst ) p = subprocess.Popen( cmd_lst, executable=cmd_lst[0], universal_newlines=True, stdout=subprocess.DEVNULL, ## see flog ) self.pid = p.pid output, error = p.communicate() self.status = p.returncode if self.status != 0: raise DockerError('Hex returned exit status %i' % self.status) waited = 0 while waited < 25 and not os.path.exists(self.fout): sleep(5) waited += 5 ## replace model numbers in HEX output file self.__hackHexOut(self.nRec, self.nLig, self.fout) parser = HexParser(self.fout, self.owner.recDic, self.owner.ligDic) ## generate ComplexList from hex output self.result = parser.parseHex() self.done() except: self.failed()
def run( self ): """ Run HEX job. @raise DockerError: if HEX exists with error """ try: if not os.path.exists( self.fout ): if self.verbose: print("Executing on ", self.host, ' with ', \ t.stripFilename(self.finp)) print("Command: ", self.cmd) cmd_lst = self.cmd.split() ## self.status = os.spawnvp(os.P_WAIT, cmd_lst[0], cmd_lst ) p = subprocess.Popen( cmd_lst, executable=cmd_lst[0], universal_newlines=True, stdout=subprocess.DEVNULL, ## see flog ) self.pid = p.pid output, error = p.communicate() self.status = p.returncode if self.status != 0: raise DockerError('Hex returned exit status %i' % self.status) waited = 0 while waited < 25 and not os.path.exists( self.fout ): sleep( 5 ) waited += 5 ## replace model numbers in HEX output file self.__hackHexOut( self.nRec, self.nLig, self.fout ) parser = HexParser(self.fout, self.owner.recDic, self.owner.ligDic) ## generate ComplexList from hex output self.result = parser.parseHex() self.done() except: self.failed()
def addTopology(self, topofile, override=False): """ Include an additional topology (off) library in the collection. :param topofile: file name of topology, either full path or simple file name which will then be looked for in biskit/data/amber/residues. :type topofile: str :param override: override topologies or residue entries with same name (default False) :type override: False :return: dictionary of all residue types parsed from topofile indexed by three-letter residue name :rtype : {str : AmberResidueType} :raise: AmberResidueLibraryError if override==False and a topology or a residue with identical atom content have already been registered. """ fbase = T.stripFilename( topofile ) if fbase in self.topoindex and not override: raise AmberResidueLibraryError('duplicate topology '+fbase) if self.verbose: self.log.add('parsing %s...' % topofile ) resdic = AmberPrepParser( topofile ).residueDict() if self.verbose: self.log.add( 'Read %i residue definitions.\n' % len(resdic) ) self.topoindex[ fbase ] = resdic for resname, restype in resdic.items(): akey = restype.atomkey(compress=False) if akey in self.aindex and not override: raise AmberResidueLibraryError('duplicate residue entry: %s -> %s' %\ (resname, self.aindex[akey].code)) self.aindex[ akey ] = restype return self.topoindex[ fbase ]
def addTopology(self, topofile, override=False): """ Include an additional topology (off) library in the collection. :param topofile: file name of topology, either full path or simple file name which will then be looked for in Biskit/data/amber/residues. :type topofile: str :param override: override topologies or residue entries with same name (default False) :type override: False :return: dictionary of all residue types parsed from topofile indexed by three-letter residue name :rtype : {str : AmberResidueType} :raise: AmberResidueLibraryError if override==False and a topology or a residue with identical atom content have already been registered. """ fbase = T.stripFilename( topofile ) if fbase in self.topoindex and not override: raise AmberResidueLibraryError('duplicate topology '+fbase) if self.verbose: self.log.add('parsing %s...' % topofile ) resdic = AmberPrepParser( topofile ).residueDict() if self.verbose: self.log.add( 'Read %i residue definitions.\n' % len(resdic) ) self.topoindex[ fbase ] = resdic for resname, restype in resdic.items(): akey = restype.atomkey(compress=False) if akey in self.aindex and not override: raise AmberResidueLibraryError('duplicate residue entry: %s -> %s' %\ (resname, self.aindex[akey].code)) self.aindex[ akey ] = restype return self.topoindex[ fbase ]
def addMovie( self, pdb, modName=None ): """ Add one or several existing pdb files or Structure objects to one model. Several files will hence end up as single movie (i.e. as frames of a model in PyMol). @param pdb: file name or a list of file names OR PDBModel or list of PDBModels @type pdb: str or [str] OR PDBModel or [PDBModel] @param modName: model name, will show up in PyMol. If 'None' a model name will be created from the source file name and a serial number. @type modName: str OR None @return: the modName of the added model @rtype: str """ if type( pdb ) is not list: pdb = [pdb] ## dream up a nice model name if modName is None: if type( pdb[0]) is str: modName = T.stripFilename( pdb[0] ) modName = self._getFreeModName( modName, 0 ) else: modName = self._getFreeModName( 'models', 0 ) ## create new empty list if modName not in self.dic: self.dic[ modName ] = [] ## create model object for each file and append it to dic for f in pdb: ## create model from Structure or file name model = PymolModel( f, modName ) self.dic[ modName ].append( model ) ## add load statement to Pymol script self.add( 'load '+ model.fname + ',' + modName ) return modName
def test_TrajParsePDBs(self): """TrajParsePDBs test""" import os f = T.testRoot('amber/md_pdbs/') allfiles = os.listdir(f) pdbs = [] for fn in allfiles: try: if (fn[-4:].upper() == '.PDB'): pdbs += [f + fn] except: pass ref = pdbs[0] self.assertTrue(TrajParsePDBs.supports(pdbs)) p = TrajParsePDBs(verbose=self.local, rmwat=True, analyzeEach=False) t = p.parse2new(pdbs, ref=ref) self.assertEqual(t.lenAtoms(), 876) self.assertEqual(len(t), 10) self.assertEqual(t.frameNames, [T.stripFilename(f) for f in pdbs])
def createHexInp(recPdb, recModel, ligPdb, ligModel, comPdb=None, outFile=None, macDock=None, silent=0, sol=512): """ Prepare a Hex macro file for the docking of the receptor(s) against ligand(s). @param recPdb: hex-formatted PDB @type recPdb: str @param recModel: hex-formatted PDB @type recModel: str @param ligPdb: PDBModel, get distances from this one @type ligPdb: PDBModel @param ligModel: PDBModel, getdistances from this one @type ligModel: PDBModel @param comPdb: reference PDB @type comPdb: str @param outFile: base of file name for mac and out @type outFile: str @param macDock: None -> hex decides (from the size of the molecule), 1 -> force macroDock, 0-> force off (default: None) @type macDock: None|1|0 @param silent: don't print distances and macro warnings (default: 0) @type silent: 0|1 @param sol: number of solutions that HEx should save (default: 512) @type sol: int @return: HEX macro file name, HEX out generated bu the macro, macro docking status @rtype: str, str, boolean """ ## files and names recCode = t.stripFilename(recPdb)[0:4] ligCode = t.stripFilename(ligPdb)[0:4] outFile = outFile or recCode + '-' + ligCode ## hex macro name macName = t.absfile(outFile + '_hex.mac') ## hex rotation matrix output name outName_all = t.absfile(outFile + '_hex.out') outName_clust = t.absfile(outFile + '_hex_cluster.out') ## add surface profiles if not there if 'relAS' not in recModel.atoms: #t.flushPrint('\nCalculating receptor surface profile') rec_asa = PDBDope(recModel) rec_asa.addSurfaceRacer() if 'relAS' not in ligModel.atoms: #t.flushPrint('\nCalculating ligand surface profile') lig_asa = PDBDope(ligModel) lig_asa.addSurfaceRacer() ## surface masks, > 95% exposed rec_surf_mask = N0.greater(recModel.profile('relAS'), 95) lig_surf_mask = N0.greater(ligModel.profile('relAS'), 95) ## maximun and medisn distance from centre of mass to any surface atom recMax, recMin = centerSurfDist(recModel, rec_surf_mask) ligMax, ligMin = centerSurfDist(ligModel, lig_surf_mask) ## approxinate max and min center to centre distance maxDist = recMax + ligMax minDist = recMin + ligMin ## molecular separation and search range to be used in the docking molSep = (maxDist + minDist) / 2 molRange = 2 * (maxDist - molSep) if not silent: print( 'Docking setup: %s\nRecMax: %.1f RecMin: %.1f\nLigMax: %.1f LigMin: %.1f\nMaxDist: %.1f MinDist: %.1f\nmolecular_separation: %.1f r12_range: %.1f\n' % (outFile, recMax, recMin, ligMax, ligMin, maxDist, minDist, molSep, molRange)) if recMax > 30 and ligMax > 30 and not silent: print('\nWARNING! Both the receptor and ligand radius is ', end=' ') print('greater than 30A.\n') ## determine docking mode to use macroDocking = 0 if macDock == None: if recMax > 35 and not silent: print('\nReceptor has a radius that exceeds 35A ', end=' ') print('-> Macro docking will be used') macroDocking = 1 else: macroDocking = macDock ##################### ## write macro file macOpen = open(macName, 'w') macOpen.write('# -- ' + macName + ' --\n') macOpen.write(' \n') macOpen.write('open_receptor ' + t.absfile(recPdb) + '\n') macOpen.write('open_ligand ' + t.absfile(ligPdb) + '\n') if comPdb and comPdb[-4:] == '.pdb': macOpen.write('open_complex ' + comPdb + '\n') macOpen.write('\n') head = """ # -------------- general settings ---------------- disc_cache 1 # disc cache on (0 off) docking_sort_mode 1 # Sort solutions by cluster (0 by energy) docking_cluster_mode 1 # Display all clusters (0 display best) docking_cluster_threshold 2.00 # docking_cluster_bumps number # ------------ molecule orientation -------------- molecule_separation %(separation)i commit_view """ % ({ 'separation': round(molSep) }) macro = """ # -------------- macro docking ------------------- macro_min_coverage 25 macro_sphere_radius 15 macro_docking_separation 25 activate_macro_model""" tail = """ # -------------- docking setup ------------------- docking_search_mode 0 # full rotational search receptor_range_angle 180 # 0, 15, 30, 45, 60, 75, 90, 180 docking_receptor_samples 720 # 362, 492, 642, 720, 980, 1280 ligand_range_angle 180 docking_ligand_samples 720 twist_range_angle 360 # 0, 15, 30, 60, 90, 180, 360 docking_alpha_samples 128 # 64, 128, 256 r12_step 0.500000 # 0.1, 0.2, 0.25, 0.5, 0.75, 1, 1.5, 2 r12_range %(range)i docking_radial_filter 0 # Radial Envelope Filter - None grid_size 0.600 # 0.4, 0.5, 0.6, 0.75, 1.0 # docking_electrostatics 0 # use only surface complimentarity docking_electrostatics 1 # use electrostatic term for scoring clusters docking_main_scan 16 # docking_main_search 26 max_docking_solutions %(nr_sol)i # number of solutions to save # -------------- post-processing ---------------- docking_refine 0 # None # docking_refine 1 # Backbone Bumps # docking_refine 2 # MM energies # docking_refine 3 # MM minimization # ---------------- run docking ------------------ activate_docking # save_docking %(output_clust)s # save_range 1 512 ./ dock .pdb # ------------ also save all solutions ---------- docking_sort_mode 0 # Sort solutions by energy (1 by cluster) save_docking %(output_all)s""" \ %({'range':round(molRange), 'output_all':outName_all, 'nr_sol':int(sol), 'output_clust':outName_clust} ) macOpen.writelines(head) ## macro docking will not work with multiple models, if both are added to ## the hex macro file - macrodocking will be skipped during the docking run if macroDocking: macOpen.writelines(macro) macOpen.writelines(tail) macOpen.close() return macName, outName_all, macroDocking
def __parseBiomt( self, pdbFile, firstLine): """ Extract BIOMT (biological unit) information from REMARK 350 lines Creates a 'BIOMT' dictionary. """ line = firstLine biomtDict = {} moleculeNum = -1 while line[0] == 'REMARK' and line[1].startswith(' 350'): # 5 = len(' 350 ') biomtLine = line[1][5:].lstrip() if biomtLine.startswith('BIOMOLECULE:'): # start a new molecule if moleculeNum != -1: # lets update the dictionary with what we've got biomtDict[moleculeNum] = (targetChains,rtList) #12 = len('BIOMOLECULE:') moleculeNum = int(biomtLine[12:].strip()) targetChains = [] rotation = [] translation = [] rtList = [] matrixLine = 0 if biomtLine.startswith('APPLY THE FOLLOWING TO CHAINS:'): # parse targeted chains, we assume this comes after BIOMOLECULE line # 30 = len('APPLY THE FOLLOWING TO CHAINS:') targetChains.extend(c.strip() for c in biomtLine[30:].split(',')) if biomtLine.startswith('AND CHAINS:'): # 11 = len('AND CHAINS:') targetChains.extend(c.strip() for c in biomtLine[11:].split(',')) if biomtLine.startswith('BIOMT'): # parse rotate-translate matri{x/ces}, we assume this comes after BIOMOLECULE line matrixLine += 1 # 6 = len('BIOMT#') rawCoords = biomtLine[6:].split() rotation.append([float(x) for x in rawCoords[1:4]]) translation.append(float(rawCoords[4])) if matrixLine % 3 == 0: rotation = N0.array( rotation ) translation = N0.transpose( [ translation ] ) rotation = N0.concatenate( (rotation, translation), axis=1 ) rtList.append(N0.array(rotation)) ## rtList.append((rotation,translation)) rotation = [] translation = [] try: line = pdbFile.readLine() except ValueError as what: self.log.add('Warning: Error parsing line %i of %s' % (i, T.stripFilename( fname )) ) self.log.add('\tError: '+str(what) ) continue # process last molecule group biomtDict[moleculeNum] = (targetChains,rtList) # return (indexed transformation dictionary , last line which isn't ours) return {'BIOMT': biomtDict}, line
def parse2new(self, source, ref=None, traj=None): """ Create / Replace Trajectory from the source list of PDBModels or PDBs. Args: source (str): list of file names or PDBModel instances ref (str or PDBModel): reference structure instance or file traj (Biskit.md.Trajectory): existing instance to be updated Returns: Biskit.Trajectory: new Trajectory instance """ r = traj if traj is None: import biskit.md r = biskit.md.Trajectory() r.setRef(B.PDBModel(ref or source[0])) n_frames = len(source) if self.rmwat: r.ref = r.ref.compress(N.logical_not(r.ref.maskSolvent())) r.resIndex = r.ref.resMap() refNames = r.ref.atomNames() ## cache for atom checking if self.verbose: T.errWrite('reading %i pdbs...' % n_frames) r.frames = N.zeros( (n_frames, r.ref.lenAtoms(), 3)) ## target coordinate array r.frameNames = ['#%i07' % i for i in range(n_frames)] atomCast = None reportIntervall = 1 if n_frames < 100 else round(n_frames / 100) for i, f in enumerate(source): m = B.PDBModel(f) ## compare atom order & content of first frame to reference pdb if self.analyzeEach or i == 0: atomCast, castRef = m.compareAtoms(r.ref) if castRef != list(range(r.ref.lenAtoms())): ## we can remove/reorder atoms from each frame but not from ref raise P.TrajParserError("Reference PDB doesn't match %s." % m.fileName) if N.all(atomCast == list(range(len(m)))): atomCast = None ## no casting necessary else: if self.verbose: T.errWrite(' casting ') ## assert that frame fits reference if atomCast: m = m.take(atomCast) ## additional check on each 100st frame if i % reportIntervall == 0 and m.atomNames() != refNames: raise P.TrajParserError("%s doesn't match reference pdb." % m.fileName) r.frames[i] = m.xyz if type(f) is str: ## save original file name r.frameNames[i] = T.stripFilename(f) if i % reportIntervall == 0 and self.verbose: T.errWrite('#') if self.verbose: T.errWrite('done\n') return r
def __collectAll(self, fname, skipRes=None, headPatterns=[]): """ Parse ATOM/HETATM lines from PDB. Collect coordinates plus dictionaries with the other pdb records of each atom. REMARK, HEADER, etc. lines are ignored. Some changes are made to the dictionary from PDBFile.readline():: - the 'position' entry (with the coordinates) is removed - leading and trailing spaces are removed from 'name' .. - .. but a 'name_original' entry keeps the old name with spaces - a 'type' entry is added. Its value is 'ATOM' or 'HETATM' - a 'after_ter' entry is added. Its value is 1, if atom is preceeded by a 'TER' line, otherwise 0 - empty 'element' entries are filled with the first non-number letter from the atom 'name' :param fname: name of pdb file :type fname: str :param skipRes: list with residue names that should be skipped :type skipRes: list of str :return: tuple of (1) dictionary of profiles and (2) xyz array N x 3 :rtype: ( list, array ) """ xyz = [] aProfs = {} info = {} in_header = True headPatterns = headPatterns or self.RE_REMARKS patterns = [(key, re.compile(ex)) for key, ex in headPatterns] for k in B.PDBModel.PDB_KEYS: aProfs[k] = list() f = IO.PDBFile(fname) skipLine = False try: line, i = ('', ''), 0 while line[0] != 'END' and line[0] != 'ENDMDL': i += 1 if not skipLine: try: line = f.readLine() except ValueError as what: self.log.add('Warning: Error parsing line %i of %s' % (i, T.stripFilename(fname))) self.log.add('\tError: ' + str(what)) continue else: skipLine = False ## header handling if in_header and line[0] == 'HEADER': info.update(self.__parseHeader(line)) if in_header and line[0] == 'REMARK': if line[1].startswith(' 350'): biomtDict, line = self.__parseBiomt(f, line) info.update(biomtDict) # we've hogged a line beyond REMARK 350 records in # __parseBiomt(), now we need to process it here skipLine = True continue else: info.update(self.__parseRemark(line, patterns)) ## preserve position of TER records newChain = line[0] == 'TER' if newChain: line = f.readLine() if (line[0] in ['ATOM', 'HETATM']): if in_header: in_header = False ## switch off HEADER parsing a = line[1] if skipRes and a['residue_name'] in skipRes: continue a['name_original'] = a['name'] a['name'] = a['name'].strip() a['type'] = line[0] if newChain: a['after_ter'] = 1 else: a['after_ter'] = 0 if a['element'] == '': a['element'] = self.__firstLetter(a['name']) xyz.append(a['position']) del a['position'] for k, v in a.items(): aProfs[k].append(v) except: raise PDBParserError("Error parsing file "+fname+": " \ + T.lastError()) try: f.close() except: pass if len(xyz) == 0: raise PDBParserError("Error parsing file " + fname + ": " + "Couldn't find any atoms.") return aProfs, N0.array(xyz, N0.Float32), info
def __parseBiomt(self, pdbFile, firstLine): """ Extract BIOMT (biological unit) information from REMARK 350 lines Creates a 'BIOMT' dictionary. """ line = firstLine biomtDict = {} moleculeNum = -1 while line[0] == 'REMARK' and line[1].startswith(' 350'): # 5 = len(' 350 ') biomtLine = line[1][5:].lstrip() if biomtLine.startswith('BIOMOLECULE:'): # start a new molecule if moleculeNum != -1: # lets update the dictionary with what we've got biomtDict[moleculeNum] = (targetChains, rtList) #12 = len('BIOMOLECULE:') moleculeNum = int(biomtLine[12:].strip()) targetChains = [] rotation = [] translation = [] rtList = [] matrixLine = 0 if biomtLine.startswith('APPLY THE FOLLOWING TO CHAINS:'): # parse targeted chains, we assume this comes after BIOMOLECULE line # 30 = len('APPLY THE FOLLOWING TO CHAINS:') targetChains.extend(c.strip() for c in biomtLine[30:].split(',')) if biomtLine.startswith('AND CHAINS:'): # 11 = len('AND CHAINS:') targetChains.extend(c.strip() for c in biomtLine[11:].split(',')) if biomtLine.startswith('BIOMT'): # parse rotate-translate matri{x/ces}, we assume this comes after BIOMOLECULE line matrixLine += 1 # 6 = len('BIOMT#') rawCoords = biomtLine[6:].split() rotation.append([float(x) for x in rawCoords[1:4]]) translation.append(float(rawCoords[4])) if matrixLine % 3 == 0: rotation = N0.array(rotation) translation = N0.transpose([translation]) rotation = N0.concatenate((rotation, translation), axis=1) rtList.append(N0.array(rotation)) ## rtList.append((rotation,translation)) rotation = [] translation = [] try: line = pdbFile.readLine() except ValueError as what: self.log.add('Warning: Error parsing line %i of %s' % (i, T.stripFilename(fname))) self.log.add('\tError: ' + str(what)) continue # process last molecule group biomtDict[moleculeNum] = (targetChains, rtList) # return (indexed transformation dictionary , last line which isn't ours) return {'BIOMT': biomtDict}, line
p.ylabel= 'RMSD [Å]' return p ######## ## MAIN ######## syntax() ## get and clean up options o = options() o['step'] = int( o['step'] ) o['i'] = T.absfile( o['i'] ) o['o'] = o.get('o', '%s/%s_rms.eps' % (osp.dirname(o['i']), T.stripFilename(o['i']))) o['show'] = 'show' in o T.flushPrint( "Loading..." ) t = T.load( o['i'] ) T.flushPrint( "done loading trajectory with %i frames." % len(t) ) if o['step'] != 1: t = t.thin( o['step'] ) T.flushPrint( "Fitting ...") calcRmsd( t ) T.flushPrint( "done." ) p = plot( t, o.get( 'title', T.stripFilename(o['i']) ) )
def __collectAll( self, fname, skipRes=None, headPatterns=[] ): """ Parse ATOM/HETATM lines from PDB. Collect coordinates plus dictionaries with the other pdb records of each atom. REMARK, HEADER, etc. lines are ignored. Some changes are made to the dictionary from PDBFile.readline():: - the 'position' entry (with the coordinates) is removed - leading and trailing spaces are removed from 'name' .. - .. but a 'name_original' entry keeps the old name with spaces - a 'type' entry is added. Its value is 'ATOM' or 'HETATM' - a 'after_ter' entry is added. Its value is 1, if atom is preceeded by a 'TER' line, otherwise 0 - empty 'element' entries are filled with the first non-number letter from the atom 'name' :param fname: name of pdb file :type fname: str :param skipRes: list with residue names that should be skipped :type skipRes: list of str :return: tuple of (1) dictionary of profiles and (2) xyz array N x 3 :rtype: ( list, array ) """ xyz = [] aProfs = {} info = {} in_header = True headPatterns = headPatterns or self.RE_REMARKS patterns = [ (key, re.compile(ex)) for key,ex in headPatterns ] for k in B.PDBModel.PDB_KEYS: aProfs[k] = list() f = IO.PDBFile( fname ) skipLine = False try: line, i = ('',''), 0 while line[0] != 'END' and line[0] != 'ENDMDL': i += 1 if not skipLine: try: line = f.readLine() except ValueError as what: self.log.add('Warning: Error parsing line %i of %s' % (i, T.stripFilename( fname )) ) self.log.add('\tError: '+str(what) ) continue else: skipLine = False ## header handling if in_header and line[0] == 'HEADER': info.update( self.__parseHeader( line ) ) if in_header and line[0] == 'REMARK': if line[1].startswith(' 350'): biomtDict, line = self.__parseBiomt( f, line ) info.update( biomtDict ) # we've hogged a line beyond REMARK 350 records in # __parseBiomt(), now we need to process it here skipLine = True continue else: info.update( self.__parseRemark( line, patterns ) ) ## preserve position of TER records newChain = line[0] == 'TER' if newChain: line = f.readLine() if (line[0] in ['ATOM','HETATM'] ): if in_header: in_header = False ## switch off HEADER parsing a = line[1] if skipRes and a['residue_name'] in skipRes: continue a['name_original'] = a['name'] a['name'] = a['name'].strip() a['type'] = line[0] if newChain: a['after_ter'] = 1 else: a['after_ter'] = 0 if a['element'] == '': a['element'] = self.__firstLetter( a['name'] ) xyz.append( a['position'] ) del a['position'] for k, v in a.items(): aProfs[k].append( v ) except: raise PDBParserError("Error parsing file "+fname+": " \ + T.lastError()) try: f.close() except: pass if len( xyz ) == 0: raise PDBParserError("Error parsing file "+fname+": "+ "Couldn't find any atoms.") return aProfs, N0.array( xyz, N0.Float32 ), info