def _parsePDBLines(atomgroup, lines, split, model, chain, subset, altloc_torf, format='PDB'): """Returns an AtomGroup. See also :func:`.parsePDBStream()`. :arg lines: PDB/PQR lines :arg split: starting index for coordinate data lines""" format = format.upper() if format == 'PDB': isPDB = True else: isPDB = False if subset: if subset == 'ca': subset = set(('CA',)) elif subset in 'bb': subset = flags.BACKBONE only_subset = True protein_resnames = flags.AMINOACIDS else: only_subset = False if chain is None: only_chains = False else: only_chains = True onlycoords = False n_atoms = atomgroup.numAtoms() if n_atoms > 0: asize = n_atoms else: asize = len(lines) - split addcoords = False if atomgroup.numCoordsets() > 0: addcoords = True alength = asize coordinates = np.zeros((asize, 3), dtype=float) atomnames = np.zeros(asize, dtype=ATOMIC_FIELDS['name'].dtype) resnames = np.zeros(asize, dtype=ATOMIC_FIELDS['resname'].dtype) resnums = np.zeros(asize, dtype=ATOMIC_FIELDS['resnum'].dtype) chainids = np.zeros(asize, dtype=ATOMIC_FIELDS['chain'].dtype) hetero = np.zeros(asize, dtype=bool) termini = np.zeros(asize, dtype=bool) altlocs = np.zeros(asize, dtype=ATOMIC_FIELDS['altloc'].dtype) icodes = np.zeros(asize, dtype=ATOMIC_FIELDS['icode'].dtype) serials = np.zeros(asize, dtype=ATOMIC_FIELDS['serial'].dtype) charges = np.zeros(asize, dtype=ATOMIC_FIELDS['charge'].dtype) if isPDB: segnames = np.zeros(asize, dtype=ATOMIC_FIELDS['segment'].dtype) elements = np.zeros(asize, dtype=ATOMIC_FIELDS['element'].dtype) bfactors = np.zeros(asize, dtype=ATOMIC_FIELDS['beta'].dtype) occupancies = np.zeros(asize, dtype=ATOMIC_FIELDS['occupancy'].dtype) anisou = None siguij = None else: radii = np.zeros(asize, dtype=ATOMIC_FIELDS['radius'].dtype) asize = 2000 # increase array length by this much when needed start = split stop = len(lines) nmodel = 0 # if a specific model is requested, skip lines until that one if isPDB and model is not None and model != 1: for i in range(split, len(lines)): if lines[i][:5] == 'MODEL': nmodel += 1 if model == nmodel: start = i+1 stop = len(lines) break if nmodel != model: raise PDBParseError('model {0} is not found'.format(model)) if isinstance(altloc_torf, str): if altloc_torf.strip() != 'A': LOGGER.info('Parsing alternate locations {0}.' .format(altloc_torf)) which_altlocs = ' ' + ''.join(altloc_torf.split()) else: which_altlocs = ' A' altloc_torf = False else: which_altlocs = ' A' altloc_torf = True acount = 0 coordsets = None altloc = defaultdict(list) i = start END = False while i < stop: line = lines[i] if not isPDB: fields = line.split() if len(fields) == 10: fields.insert(4, '') elif len(fields) != 11: LOGGER.warn('wrong number of fields for PQR format at line %d'%i) i += 1 continue if isPDB: startswith = line[0:6].strip() else: startswith = fields[0] if startswith == 'ATOM' or startswith == 'HETATM': if isPDB: atomname = line[12:16].strip() resname = line[17:21].strip() else: atomname= fields[2] resname = fields[3] if only_subset: if not (atomname in subset and resname in protein_resnames): i += 1 continue if isPDB: chid = line[21] else: chid = fields[4] if only_chains: if not chid in chain: i += 1 continue if isPDB: alt = line[16] if alt not in which_altlocs: altloc[alt].append((line, i)) i += 1 continue else: alt = ' ' try: if isPDB: coordinates[acount, 0] = line[30:38] coordinates[acount, 1] = line[38:46] coordinates[acount, 2] = line[46:54] else: coordinates[acount, 0] = fields[6] coordinates[acount, 1] = fields[7] coordinates[acount, 2] = fields[8] except: if acount >= n_atoms > 0: if nmodel == 0: raise ValueError(format + 'file and AtomGroup ag must ' 'have same number of atoms') LOGGER.warn('Discarding model {0}, which contains {1} more ' 'atoms than first model does.' .format(nmodel+1,acount-n_atoms+1)) acount = 0 nmodel += 1 coordinates = np.zeros((n_atoms, 3), dtype=float) if isPDB: while lines[i][:6] != 'ENDMDL': i += 1 else: raise PDBParseError('invalid or missing coordinate(s) at ' 'line {0}'.format(i+1)) if onlycoords: acount += 1 i += 1 continue try: serials[acount] = int(line[6:11]) if isPDB else int(fields[1]) except ValueError: try: serials[acount] = int(line[6:11], 16) if isPDB else int(fields[1], 16) except ValueError: LOGGER.warn('failed to parse serial number in line {0}' .format(i)) serials[acount] = serials[acount-1]+1 altlocs[acount] = alt atomnames[acount] = atomname resnames[acount] = resname chainids[acount] = chid if isPDB: resnums[acount] = line[22:26] icodes[acount] = line[26] else: resnum = fields[5] if resnum[-1].isalpha(): icode = resnum[-1] else: icode = ' ' resnums[acount] = resnum icodes[acount] = icode if isPDB: try: occupancies[acount] = line[54:60] except: LOGGER.warn('failed to parse occupancy at line {0}' .format(i)) try: bfactors[acount] = line[60:66] except: LOGGER.warn('failed to parse beta-factor at line {0}' .format(i)) hetero[acount] = startswith[0] == 'H' segnames[acount] = line[72:76] elements[acount] = line[76:78] try: charges[acount] = int(line[79] + line[78]) except: charges[acount] = 0 else: try: charges[acount] = fields[9] except: LOGGER.warn('failed to parse charge at line {0}' .format(i)) try: radii[acount] = fields[10] except: LOGGER.warn('failed to parse radius at line {0}' .format(i)) acount += 1 if n_atoms == 0 and acount >= alength: # if arrays are short extend them with zeros alength += asize coordinates = np.concatenate( (coordinates, np.zeros((asize, 3), float))) atomnames = np.concatenate((atomnames, np.zeros(asize, ATOMIC_FIELDS['name'].dtype))) resnames = np.concatenate((resnames, np.zeros(asize, ATOMIC_FIELDS['resname'].dtype))) resnums = np.concatenate((resnums, np.zeros(asize, ATOMIC_FIELDS['resnum'].dtype))) chainids = np.concatenate((chainids, np.zeros(asize, ATOMIC_FIELDS['chain'].dtype))) hetero = np.concatenate((hetero, np.zeros(asize, bool))) termini = np.concatenate((termini, np.zeros(asize, bool))) altlocs = np.concatenate((altlocs, np.zeros(asize, ATOMIC_FIELDS['altloc'].dtype))) icodes = np.concatenate((icodes, np.zeros(asize, ATOMIC_FIELDS['icode'].dtype))) serials = np.concatenate((serials, np.zeros(asize, ATOMIC_FIELDS['serial'].dtype))) if isPDB: bfactors = np.concatenate((bfactors, np.zeros(asize, ATOMIC_FIELDS['beta'].dtype))) occupancies = np.concatenate((occupancies, np.zeros(asize, ATOMIC_FIELDS['occupancy'].dtype))) segnames = np.concatenate((segnames, np.zeros(asize, ATOMIC_FIELDS['segment'].dtype))) elements = np.concatenate((elements, np.zeros(asize, ATOMIC_FIELDS['element'].dtype))) if anisou is not None: anisou = np.concatenate((anisou, np.zeros((asize, 6), ATOMIC_FIELDS['anisou'].dtype))) if siguij is not None: siguij = np.concatenate((siguij, np.zeros((asize, 6), ATOMIC_FIELDS['siguij'].dtype))) else: charges = np.concatenate((charges, np.zeros(asize, ATOMIC_FIELDS['charge'].dtype))) radii = np.concatenate((radii, np.zeros(asize, ATOMIC_FIELDS['radius'].dtype))) #elif startswith == 'END ' or startswith == 'CONECT': # i += 1 # break elif not onlycoords and (startswith == 'TER ' or startswith.strip() == 'TER'): termini[acount - 1] = True elif startswith == 'ENDMDL' or startswith[:3] == 'END': if acount == 0: # If there is no atom record between ENDMDL & END skip to next i += 1 continue if model is not None: i += 1 break diff = stop - i - 1 END = diff < acount if coordsets is not None: END = END or nmodel >= coordsets.shape[0] if onlycoords: if acount < n_atoms: LOGGER.warn('Discarding model {0}, which contains ' '{1} fewer atoms than the first model ' 'does.'.format(nmodel+1, n_atoms-acount)) else: coordsets[nmodel] = coordinates nmodel += 1 acount = 0 if not END: coordinates = coordsets[nmodel] else: if acount != n_atoms > 0: raise ValueError('PDB file and AtomGroup ag must have ' 'same number of atoms') # this is where to decide if more coordsets should be expected if END: coordinates.resize((acount, 3), refcheck=False) if addcoords: atomgroup.addCoordset(coordinates) else: atomgroup._setCoords(coordinates) else: coordsets = np.zeros((int(diff//acount+1), acount, 3)) coordsets[0] = coordinates[:acount] onlycoords = True atomnames.resize(acount, refcheck=False) resnames.resize(acount, refcheck=False) resnums.resize(acount, refcheck=False) chainids.resize(acount, refcheck=False) hetero.resize(acount, refcheck=False) termini.resize(acount, refcheck=False) altlocs.resize(acount, refcheck=False) icodes.resize(acount, refcheck=False) serials.resize(acount, refcheck=False) if not only_subset: atomnames = np.char.strip(atomnames) resnames = np.char.strip(resnames) atomgroup.setNames(atomnames) atomgroup.setResnames(resnames) atomgroup.setResnums(resnums) atomgroup.setChids(chainids) atomgroup.setFlags('hetatm', hetero) atomgroup.setFlags('pdbter', termini) atomgroup.setAltlocs(altlocs) atomgroup.setIcodes(np.char.strip(icodes)) atomgroup.setSerials(serials) if isPDB: bfactors.resize(acount, refcheck=False) occupancies.resize(acount, refcheck=False) segnames.resize(acount, refcheck=False) elements.resize(acount, refcheck=False) atomgroup.setBetas(bfactors) atomgroup.setOccupancies(occupancies) atomgroup.setSegnames(np.char.strip(segnames)) atomgroup.setElements(np.char.strip(elements)) from prody.utilities.misctools import getMasses atomgroup.setMasses(getMasses(np.char.strip(elements))) if anisou is not None: anisou.resize((acount, 6), refcheck=False) atomgroup.setAnisous(anisou / 10000) if siguij is not None: siguij.resize((acount, 6), refcheck=False) atomgroup.setAnistds(siguij / 10000) else: charges.resize(acount, refcheck=False) radii.resize(acount, refcheck=False) atomgroup.setCharges(charges) atomgroup.setRadii(radii) nmodel += 1 n_atoms = acount acount = 0 coordinates = np.zeros((n_atoms, 3), dtype=float) if altloc and altloc_torf: _evalAltlocs(atomgroup, altloc, chainids, resnums, resnames, atomnames) altloc = defaultdict(list) if END: break elif isPDB and startswith == 'ANISOU': if anisou is None: anisou = True anisou = np.zeros((alength, 6), dtype=ATOMIC_FIELDS['anisou'].dtype) try: index = acount - 1 anisou[index, 0] = line[28:35] anisou[index, 1] = line[35:42] anisou[index, 2] = line[43:49] anisou[index, 3] = line[49:56] anisou[index, 4] = line[56:63] anisou[index, 5] = line[63:70] except: LOGGER.warn('failed to parse anisotropic temperature ' 'factors at line {0}'.format(i)) elif isPDB and startswith =='SIGUIJ': if siguij is None: siguij = np.zeros((alength, 6), dtype=ATOMIC_FIELDS['siguij'].dtype) try: index = acount - 1 siguij[index, 0] = line[28:35] siguij[index, 1] = line[35:42] siguij[index, 2] = line[43:49] siguij[index, 3] = line[49:56] siguij[index, 4] = line[56:63] siguij[index, 5] = line[63:70] except: LOGGER.warn('failed to parse standard deviations of ' 'anisotropic temperature factors at line {0}'.format(i)) elif startswith =='SIGATM': pass i += 1 if onlycoords: if acount == atomgroup.numAtoms(): coordsets[nmodel] = coordinates nmodel += 1 del coordinates coordsets.resize((nmodel, atomgroup.numAtoms(), 3), refcheck=False) if addcoords: atomgroup.addCoordset(coordsets) else: atomgroup._setCoords(coordsets) elif not END: # this means last line was an ATOM line, so atomgroup is not decorated coordinates.resize((acount, 3), refcheck=False) if addcoords: atomgroup.addCoordset(coordinates) else: atomgroup._setCoords(coordinates) atomnames.resize(acount, refcheck=False) resnames.resize(acount, refcheck=False) resnums.resize(acount, refcheck=False) chainids.resize(acount, refcheck=False) hetero.resize(acount, refcheck=False) termini.resize(acount, refcheck=False) altlocs.resize(acount, refcheck=False) icodes.resize(acount, refcheck=False) serials.resize(acount, refcheck=False) if not only_subset: atomnames = np.char.strip(atomnames) resnames = np.char.strip(resnames) atomgroup.setNames(atomnames) atomgroup.setResnames(resnames) atomgroup.setResnums(resnums) atomgroup.setChids(chainids) atomgroup.setFlags('hetatm', hetero) atomgroup.setFlags('pdbter', termini) atomgroup.setAltlocs(altlocs) atomgroup.setIcodes(np.char.strip(icodes)) atomgroup.setSerials(serials) if isPDB: if anisou is not None: anisou.resize((acount, 6), refcheck=False) atomgroup.setAnisous(anisou / 10000) if siguij is not None: siguij.resize((acount, 6), refcheck=False) atomgroup.setAnistds(siguij / 10000) bfactors.resize(acount, refcheck=False) occupancies.resize(acount, refcheck=False) segnames.resize(acount, refcheck=False) elements.resize(acount, refcheck=False) atomgroup.setSegnames(np.char.strip(segnames)) atomgroup.setElements(np.char.strip(elements)) from prody.utilities.misctools import getMasses atomgroup.setMasses(getMasses(np.char.strip(elements))) atomgroup.setBetas(bfactors) atomgroup.setOccupancies(occupancies) else: charges.resize(acount, refcheck=False) radii.resize(acount, refcheck=False) atomgroup.setCharges(charges) atomgroup.setRadii(radii) if altloc and altloc_torf: _evalAltlocs(atomgroup, altloc, chainids, resnums, resnames, atomnames) return atomgroup
def getMasses(self): """get the mass atom. """ from prody.utilities.misctools import getMasses return getMasses(self.getElement())
def _parsePDBLines(atomgroup, lines, split, model, chain, subset, altloc_torf, format='PDB'): """Returns an AtomGroup. See also :func:`.parsePDBStream()`. :arg lines: PDB/PQR lines :arg split: starting index for coordinate data lines""" format = format.upper() if format == 'PDB': isPDB = True else: isPDB = False if subset: if subset == 'ca': subset = set(('CA', )) elif subset in 'bb': subset = flags.BACKBONE only_subset = True protein_resnames = flags.AMINOACIDS else: only_subset = False if chain is None: only_chains = False else: only_chains = True onlycoords = False n_atoms = atomgroup.numAtoms() if n_atoms > 0: asize = n_atoms else: # most PDB files contain less than 99999 atoms asize = min(len(lines) - split, 99999) addcoords = False if atomgroup.numCoordsets() > 0: addcoords = True alength = asize coordinates = np.zeros((asize, 3), dtype=float) atomnames = np.zeros(asize, dtype=ATOMIC_FIELDS['name'].dtype) resnames = np.zeros(asize, dtype=ATOMIC_FIELDS['resname'].dtype) resnums = np.zeros(asize, dtype=ATOMIC_FIELDS['resnum'].dtype) chainids = np.zeros(asize, dtype=ATOMIC_FIELDS['chain'].dtype) hetero = np.zeros(asize, dtype=bool) termini = np.zeros(asize, dtype=bool) altlocs = np.zeros(asize, dtype=ATOMIC_FIELDS['altloc'].dtype) icodes = np.zeros(asize, dtype=ATOMIC_FIELDS['icode'].dtype) serials = np.zeros(asize, dtype=ATOMIC_FIELDS['serial'].dtype) charges = np.zeros(asize, dtype=ATOMIC_FIELDS['charge'].dtype) if isPDB: segnames = np.zeros(asize, dtype=ATOMIC_FIELDS['segment'].dtype) elements = np.zeros(asize, dtype=ATOMIC_FIELDS['element'].dtype) bfactors = np.zeros(asize, dtype=ATOMIC_FIELDS['beta'].dtype) occupancies = np.zeros(asize, dtype=ATOMIC_FIELDS['occupancy'].dtype) anisou = None siguij = None else: radii = np.zeros(asize, dtype=ATOMIC_FIELDS['radius'].dtype) asize = 2000 # increase array length by this much when needed start = split stop = len(lines) nmodel = 0 # if a specific model is requested, skip lines until that one if isPDB and model is not None and model != 1: for i in range(split, len(lines)): if lines[i][:5] == 'MODEL': nmodel += 1 if model == nmodel: start = i + 1 stop = len(lines) break if nmodel != model: raise PDBParseError('model {0} is not found'.format(model)) if isinstance(altloc_torf, str): if altloc_torf.strip() != 'A': LOGGER.info('Parsing alternate locations {0}.'.format(altloc_torf)) which_altlocs = ' ' + ''.join(altloc_torf.split()) else: which_altlocs = ' A' altloc_torf = False else: which_altlocs = ' A' altloc_torf = True acount = 0 coordsets = None altloc = defaultdict(list) i = start END = False while i < stop: line = lines[i] if not isPDB: fields = line.split() if len(fields) == 10: fields.insert(4, '') elif len(fields) != 11: LOGGER.warn( 'wrong number of fields for PQR format at line %d' % i) i += 1 continue if isPDB: startswith = line[0:6].strip() else: startswith = fields[0] if startswith == 'ATOM' or startswith == 'HETATM': if isPDB: atomname = line[12:16].strip() resname = line[17:21].strip() else: atomname = fields[2] resname = fields[3] if only_subset: if not (atomname in subset and resname in protein_resnames): i += 1 continue if isPDB: chid = line[21] else: chid = fields[4] if only_chains: if not chid in chain: i += 1 continue if isPDB: alt = line[16] if alt not in which_altlocs: altloc[alt].append((line, i)) i += 1 continue else: alt = ' ' try: if isPDB: coordinates[acount, 0] = line[30:38] coordinates[acount, 1] = line[38:46] coordinates[acount, 2] = line[46:54] else: coordinates[acount, 0] = fields[6] coordinates[acount, 1] = fields[7] coordinates[acount, 2] = fields[8] except: if acount >= n_atoms > 0: if nmodel == 0: raise ValueError(format + 'file and AtomGroup ag must ' 'have same number of atoms') LOGGER.warn( 'Discarding model {0}, which contains {1} more ' 'atoms than first model does.'.format( nmodel + 1, acount - n_atoms + 1)) acount = 0 nmodel += 1 coordinates = np.zeros((n_atoms, 3), dtype=float) if isPDB: while lines[i][:6] != 'ENDMDL': i += 1 else: raise PDBParseError('invalid or missing coordinate(s) at ' 'line {0}'.format(i + 1)) if onlycoords: acount += 1 i += 1 continue try: serials[acount] = int(line[6:11]) if isPDB else int(fields[1]) except ValueError: try: serials[acount] = int(line[6:11], 16) if isPDB else int( fields[1], 16) except ValueError: LOGGER.warn( 'failed to parse serial number in line {0}'.format(i)) serials[acount] = serials[acount - 1] + 1 altlocs[acount] = alt atomnames[acount] = atomname resnames[acount] = resname chainids[acount] = chid if isPDB: resnums[acount] = line[22:26] icodes[acount] = line[26] else: resnum = fields[5] if resnum[-1].isalpha(): icode = resnum[-1] else: icode = ' ' resnums[acount] = resnum icodes[acount] = icode if isPDB: try: occupancies[acount] = line[54:60] except: LOGGER.warn( 'failed to parse occupancy at line {0}'.format(i)) try: bfactors[acount] = line[60:66] except: LOGGER.warn( 'failed to parse beta-factor at line {0}'.format(i)) hetero[acount] = startswith[0] == 'H' segnames[acount] = line[72:76] elements[acount] = line[76:78] try: charges[acount] = int(line[79] + line[78]) except: charges[acount] = 0 else: try: charges[acount] = fields[9] except: LOGGER.warn('failed to parse charge at line {0}'.format(i)) try: radii[acount] = fields[10] except: LOGGER.warn('failed to parse radius at line {0}'.format(i)) acount += 1 if n_atoms == 0 and acount >= alength: # if arrays are short extend them with zeros alength += asize coordinates = np.concatenate( (coordinates, np.zeros((asize, 3), float))) atomnames = np.concatenate( (atomnames, np.zeros(asize, ATOMIC_FIELDS['name'].dtype))) resnames = np.concatenate( (resnames, np.zeros(asize, ATOMIC_FIELDS['resname'].dtype))) resnums = np.concatenate( (resnums, np.zeros(asize, ATOMIC_FIELDS['resnum'].dtype))) chainids = np.concatenate( (chainids, np.zeros(asize, ATOMIC_FIELDS['chain'].dtype))) hetero = np.concatenate((hetero, np.zeros(asize, bool))) termini = np.concatenate((termini, np.zeros(asize, bool))) altlocs = np.concatenate( (altlocs, np.zeros(asize, ATOMIC_FIELDS['altloc'].dtype))) icodes = np.concatenate( (icodes, np.zeros(asize, ATOMIC_FIELDS['icode'].dtype))) serials = np.concatenate( (serials, np.zeros(asize, ATOMIC_FIELDS['serial'].dtype))) if isPDB: bfactors = np.concatenate( (bfactors, np.zeros(asize, ATOMIC_FIELDS['beta'].dtype))) occupancies = np.concatenate( (occupancies, np.zeros(asize, ATOMIC_FIELDS['occupancy'].dtype))) segnames = np.concatenate( (segnames, np.zeros(asize, ATOMIC_FIELDS['segment'].dtype))) elements = np.concatenate( (elements, np.zeros(asize, ATOMIC_FIELDS['element'].dtype))) if anisou is not None: anisou = np.concatenate( (anisou, np.zeros((asize, 6), ATOMIC_FIELDS['anisou'].dtype))) if siguij is not None: siguij = np.concatenate( (siguij, np.zeros((asize, 6), ATOMIC_FIELDS['siguij'].dtype))) else: charges = np.concatenate( (charges, np.zeros(asize, ATOMIC_FIELDS['charge'].dtype))) radii = np.concatenate( (radii, np.zeros(asize, ATOMIC_FIELDS['radius'].dtype))) #elif startswith == 'END ' or startswith == 'CONECT': # i += 1 # break elif not onlycoords and (startswith == 'TER ' or startswith.strip() == 'TER'): termini[acount - 1] = True elif startswith == 'ENDMDL' or startswith[:3] == 'END': if acount == 0: # If there is no atom record between ENDMDL & END skip to next i += 1 continue if model is not None: i += 1 break diff = stop - i - 1 END = diff < acount if coordsets is not None: END = END or nmodel >= coordsets.shape[0] if onlycoords: if acount < n_atoms: LOGGER.warn('Discarding model {0}, which contains ' '{1} fewer atoms than the first model ' 'does.'.format(nmodel + 1, n_atoms - acount)) else: coordsets[nmodel] = coordinates nmodel += 1 acount = 0 if not END: coordinates = coordsets[nmodel] else: if acount != n_atoms > 0: raise ValueError('PDB file and AtomGroup ag must have ' 'same number of atoms') # this is where to decide if more coordsets should be expected if END: coordinates.resize((acount, 3), refcheck=False) if addcoords: atomgroup.addCoordset(coordinates) else: atomgroup._setCoords(coordinates) else: coordsets = np.zeros((int(diff // acount + 1), acount, 3)) coordsets[0] = coordinates[:acount] onlycoords = True atomnames.resize(acount, refcheck=False) resnames.resize(acount, refcheck=False) resnums.resize(acount, refcheck=False) chainids.resize(acount, refcheck=False) hetero.resize(acount, refcheck=False) termini.resize(acount, refcheck=False) altlocs.resize(acount, refcheck=False) icodes.resize(acount, refcheck=False) serials.resize(acount, refcheck=False) if not only_subset: atomnames = np.char.strip(atomnames) resnames = np.char.strip(resnames) atomgroup.setNames(atomnames) atomgroup.setResnames(resnames) atomgroup.setResnums(resnums) atomgroup.setChids(chainids) atomgroup.setFlags('hetatm', hetero) atomgroup.setFlags('pdbter', termini) atomgroup.setAltlocs(altlocs) atomgroup.setIcodes(np.char.strip(icodes)) atomgroup.setSerials(serials) if isPDB: bfactors.resize(acount, refcheck=False) occupancies.resize(acount, refcheck=False) segnames.resize(acount, refcheck=False) elements.resize(acount, refcheck=False) atomgroup.setBetas(bfactors) atomgroup.setOccupancies(occupancies) atomgroup.setSegnames(np.char.strip(segnames)) atomgroup.setElements(np.char.strip(elements)) from prody.utilities.misctools import getMasses atomgroup.setMasses(getMasses(np.char.strip(elements))) if anisou is not None: anisou.resize((acount, 6), refcheck=False) atomgroup.setAnisous(anisou / 10000) if siguij is not None: siguij.resize((acount, 6), refcheck=False) atomgroup.setAnistds(siguij / 10000) else: charges.resize(acount, refcheck=False) radii.resize(acount, refcheck=False) atomgroup.setCharges(charges) atomgroup.setRadii(radii) nmodel += 1 n_atoms = acount acount = 0 coordinates = np.zeros((n_atoms, 3), dtype=float) if altloc and altloc_torf: _evalAltlocs(atomgroup, altloc, chainids, resnums, resnames, atomnames) altloc = defaultdict(list) if END: break elif isPDB and startswith == 'ANISOU': if anisou is None: anisou = True anisou = np.zeros((alength, 6), dtype=ATOMIC_FIELDS['anisou'].dtype) try: index = acount - 1 anisou[index, 0] = line[28:35] anisou[index, 1] = line[35:42] anisou[index, 2] = line[43:49] anisou[index, 3] = line[49:56] anisou[index, 4] = line[56:63] anisou[index, 5] = line[63:70] except: LOGGER.warn('failed to parse anisotropic temperature ' 'factors at line {0}'.format(i)) elif isPDB and startswith == 'SIGUIJ': if siguij is None: siguij = np.zeros((alength, 6), dtype=ATOMIC_FIELDS['siguij'].dtype) try: index = acount - 1 siguij[index, 0] = line[28:35] siguij[index, 1] = line[35:42] siguij[index, 2] = line[43:49] siguij[index, 3] = line[49:56] siguij[index, 4] = line[56:63] siguij[index, 5] = line[63:70] except: LOGGER.warn( 'failed to parse standard deviations of ' 'anisotropic temperature factors at line {0}'.format(i)) elif startswith == 'SIGATM': pass i += 1 if onlycoords: if acount == atomgroup.numAtoms(): coordsets[nmodel] = coordinates nmodel += 1 del coordinates coordsets.resize((nmodel, atomgroup.numAtoms(), 3), refcheck=False) if addcoords: atomgroup.addCoordset(coordsets) else: atomgroup._setCoords(coordsets) elif not END: # this means last line was an ATOM line, so atomgroup is not decorated coordinates.resize((acount, 3), refcheck=False) if addcoords: atomgroup.addCoordset(coordinates) else: atomgroup._setCoords(coordinates) atomnames.resize(acount, refcheck=False) resnames.resize(acount, refcheck=False) resnums.resize(acount, refcheck=False) chainids.resize(acount, refcheck=False) hetero.resize(acount, refcheck=False) termini.resize(acount, refcheck=False) altlocs.resize(acount, refcheck=False) icodes.resize(acount, refcheck=False) serials.resize(acount, refcheck=False) if not only_subset: atomnames = np.char.strip(atomnames) resnames = np.char.strip(resnames) atomgroup.setNames(atomnames) atomgroup.setResnames(resnames) atomgroup.setResnums(resnums) atomgroup.setChids(chainids) atomgroup.setFlags('hetatm', hetero) atomgroup.setFlags('pdbter', termini) atomgroup.setAltlocs(altlocs) atomgroup.setIcodes(np.char.strip(icodes)) atomgroup.setSerials(serials) if isPDB: if anisou is not None: anisou.resize((acount, 6), refcheck=False) atomgroup.setAnisous(anisou / 10000) if siguij is not None: siguij.resize((acount, 6), refcheck=False) atomgroup.setAnistds(siguij / 10000) bfactors.resize(acount, refcheck=False) occupancies.resize(acount, refcheck=False) segnames.resize(acount, refcheck=False) elements.resize(acount, refcheck=False) atomgroup.setSegnames(np.char.strip(segnames)) atomgroup.setElements(np.char.strip(elements)) from prody.utilities.misctools import getMasses atomgroup.setMasses(getMasses(np.char.strip(elements))) atomgroup.setBetas(bfactors) atomgroup.setOccupancies(occupancies) else: charges.resize(acount, refcheck=False) radii.resize(acount, refcheck=False) atomgroup.setCharges(charges) atomgroup.setRadii(radii) if altloc and altloc_torf: _evalAltlocs(atomgroup, altloc, chainids, resnums, resnames, atomnames) return atomgroup
def _parseMMCIFLines(atomgroup, lines, model, chain, subset, altloc_torf, header): """Returns an AtomGroup. See also :func:`.parsePDBStream()`. :arg lines: mmCIF lines """ if subset is not None: if subset == 'ca': subset = set(('CA', )) elif subset in 'bb': subset = flags.BACKBONE protein_resnames = flags.AMINOACIDS asize = 0 i = 0 models = [] nModels = 0 fields = {} fieldCounter = -1 foundAtomBlock = False doneAtomBlock = False while not doneAtomBlock: line = lines[i] if line[:11] == '_atom_site.': fieldCounter += 1 fields[line.split('.')[1].strip()] = fieldCounter if line.startswith('ATOM') or line.startswith('HETATM'): if not foundAtomBlock: foundAtomBlock = True start = i models.append(line.split()[fields['pdbx_PDB_model_num']]) if models[asize] != models[asize - 1]: nModels += 1 asize += 1 else: if foundAtomBlock: doneAtomBlock = True stop = i i += 1 if nModels == 0: nModels = 1 if model is not None and model != 1: for i in range(start, stop): if str(models[i]) != model and str(models[i + 1]) == model: start = i + 1 if str(models[i]) == model and str(models[i + 1]) != model: stop = i + 1 break if not str(model) in models: raise mmCIFParseError('model {0} is not found'.format(model)) addcoords = False if atomgroup.numCoordsets() > 0: addcoords = True if isinstance(altloc_torf, str): if altloc_torf.strip() != 'A': LOGGER.info('Parsing alternate locations {0}.'.format(altloc_torf)) which_altlocs = '.' + ''.join(altloc_torf.split()) else: which_altlocs = '.A' altloc_torf = False else: which_altlocs = '.A' altloc_torf = True coordinates = np.zeros((asize, 3), dtype=float) atomnames = np.zeros(asize, dtype=ATOMIC_FIELDS['name'].dtype) resnames = np.zeros(asize, dtype=ATOMIC_FIELDS['resname'].dtype) resnums = np.zeros(asize, dtype=ATOMIC_FIELDS['resnum'].dtype) chainids = np.zeros(asize, dtype=ATOMIC_FIELDS['chain'].dtype) segnames = np.zeros(asize, dtype=ATOMIC_FIELDS['segment'].dtype) hetero = np.zeros(asize, dtype=bool) termini = np.zeros(asize, dtype=bool) altlocs = np.zeros(asize, dtype=ATOMIC_FIELDS['altloc'].dtype) icodes = np.zeros(asize, dtype=ATOMIC_FIELDS['icode'].dtype) serials = np.zeros(asize, dtype=ATOMIC_FIELDS['serial'].dtype) elements = np.zeros(asize, dtype=ATOMIC_FIELDS['element'].dtype) bfactors = np.zeros(asize, dtype=ATOMIC_FIELDS['beta'].dtype) occupancies = np.zeros(asize, dtype=ATOMIC_FIELDS['occupancy'].dtype) n_atoms = atomgroup.numAtoms() if n_atoms > 0: asize = n_atoms acount = 0 for line in lines[start:stop]: startswith = line.split()[fields['group_PDB']] atomname = line.split()[fields['auth_atom_id']] resname = line.split()[fields['auth_comp_id']] if subset is not None: if not (atomname in subset and resname in protein_resnames): continue chID = line.split()[fields['auth_asym_id']] if chain is not None: if isinstance(chain, str): chain = chain.split(',') if not chID in chain: continue segID = line.split()[fields['label_asym_id']] alt = line.split()[fields['label_alt_id']] if alt not in which_altlocs: continue if model is not None: if int(models[acount]) < model: continue elif int(models[acount]) > model: break coordinates[acount] = [ line.split()[fields['Cartn_x']], line.split()[fields['Cartn_y']], line.split()[fields['Cartn_z']] ] atomnames[acount] = atomname resnames[acount] = resname resnums[acount] = line.split()[fields['auth_seq_id']] chainids[acount] = chID segnames[acount] = segID hetero[acount] = startswith == 'HETATM' # True or False if chainids[acount] != chainids[acount - 1]: termini[acount - 1] = True altlocs[acount] = alt icodes[acount] = line.split()[fields['pdbx_PDB_ins_code']] if icodes[acount] == '?': icodes[acount] = '' serials[acount] = line.split()[fields['id']] elements[acount] = line.split()[fields['type_symbol']] bfactors[acount] = line.split()[fields['B_iso_or_equiv']] occupancies[acount] = line.split()[fields['occupancy']] acount += 1 if model is not None: nModels = 1 modelSize = acount // nModels if addcoords: atomgroup.addCoordset(coordinates[:modelSize]) else: atomgroup._setCoords(coordinates[:modelSize]) atomgroup.setNames(atomnames[:modelSize]) atomgroup.setResnames(resnames[:modelSize]) atomgroup.setResnums(resnums[:modelSize]) atomgroup.setSegnames(segnames[:modelSize]) atomgroup.setChids(chainids[:modelSize]) atomgroup.setFlags('hetatm', hetero[:modelSize]) atomgroup.setFlags('pdbter', termini[:modelSize]) atomgroup.setAltlocs(altlocs[:modelSize]) atomgroup.setIcodes(icodes[:modelSize]) atomgroup.setSerials(serials[:modelSize]) atomgroup.setElements(elements[:modelSize]) from prody.utilities.misctools import getMasses atomgroup.setMasses(getMasses(elements[:modelSize])) atomgroup.setBetas(bfactors[:modelSize]) atomgroup.setOccupancies(occupancies[:modelSize]) for n in range(1, nModels): atomgroup.addCoordset(coordinates[n * modelSize:(n + 1) * modelSize]) if header: header = parseSTARLines(lines[:start - fieldCounter - 2] + lines[stop:], shlex=True) return atomgroup, header return atomgroup
def _parseMMCIFLines(atomgroup, lines, model, chain, subset, altloc_torf): """Returns an AtomGroup. See also :func:`.parsePDBStream()`. :arg lines: mmCIF lines """ if subset is not None: if subset == 'ca': subset = set(('CA',)) elif subset in 'bb': subset = flags.BACKBONE protein_resnames = flags.AMINOACIDS asize = 0 i = 0 models = [] nModels = 0 fields = OrderedDict() fieldCounter = -1 foundAtomBlock = False doneAtomBlock = False start = 0 stop = 0 while not doneAtomBlock: line = lines[i] if line[:11] == '_atom_site.': fieldCounter += 1 fields[line.split('.')[1].strip()] = fieldCounter if line.startswith('ATOM ') or line.startswith('HETATM'): if not foundAtomBlock: foundAtomBlock = True start = i models.append(line.split()[fields['pdbx_PDB_model_num']]) if len(models) == 1 or (models[asize] != models[asize-1]): nModels += 1 asize += 1 else: if foundAtomBlock: doneAtomBlock = True stop = i i += 1 if model is not None and model != 1: for i in range(start, stop): if str(models[i]) != model and str(models[i+1]) == model: start = i+1 if str(models[i]) == model and str(models[i+1]) != model: stop = i+1 break if not str(model) in models: raise mmCIFParseError('model {0} is not found'.format(model)) addcoords = False if atomgroup.numCoordsets() > 0: addcoords = True if isinstance(altloc_torf, str): if altloc_torf == 'all': which_altlocs = 'all' elif altloc_torf.strip() != 'A': LOGGER.info('Parsing alternate locations {0}.' .format(altloc_torf)) which_altlocs = '.' + ''.join(altloc_torf.split()) else: which_altlocs = '.A' altloc_torf = False else: which_altlocs = '.A' altloc_torf = True coordinates = np.zeros((asize, 3), dtype=float) atomnames = np.zeros(asize, dtype=ATOMIC_FIELDS['name'].dtype) resnames = np.zeros(asize, dtype=ATOMIC_FIELDS['resname'].dtype) resnums = np.zeros(asize, dtype=ATOMIC_FIELDS['resnum'].dtype) chainids = np.zeros(asize, dtype=ATOMIC_FIELDS['chain'].dtype) segnames = np.zeros(asize, dtype=ATOMIC_FIELDS['segment'].dtype) hetero = np.zeros(asize, dtype=bool) termini = np.zeros(asize, dtype=bool) altlocs = np.zeros(asize, dtype=ATOMIC_FIELDS['altloc'].dtype) icodes = np.zeros(asize, dtype=ATOMIC_FIELDS['icode'].dtype) serials = np.zeros(asize, dtype=ATOMIC_FIELDS['serial'].dtype) elements = np.zeros(asize, dtype=ATOMIC_FIELDS['element'].dtype) bfactors = np.zeros(asize, dtype=ATOMIC_FIELDS['beta'].dtype) occupancies = np.zeros(asize, dtype=ATOMIC_FIELDS['occupancy'].dtype) n_atoms = atomgroup.numAtoms() if n_atoms > 0: asize = n_atoms acount = 0 for line in lines[start:stop]: startswith = line.split()[fields['group_PDB']] atomname = line.split()[fields['auth_atom_id']] if atomname.startswith('"') and atomname.endswith('"'): atomname = atomname[1:-1] resname = line.split()[fields['auth_comp_id']] if subset is not None: if not (atomname in subset and resname in protein_resnames): continue chID = line.split()[fields['auth_asym_id']] if chain is not None: if isinstance(chain, str): chain = chain.split(',') if not chID in chain: continue segID = line.split()[fields['label_asym_id']] alt = line.split()[fields['label_alt_id']] if alt not in which_altlocs and which_altlocs != 'all': continue if alt == '.': alt = ' ' if model is not None: if int(models[acount]) < model: continue elif int(models[acount]) > model: break coordinates[acount] = [line.split()[fields['Cartn_x']], line.split()[fields['Cartn_y']], line.split()[fields['Cartn_z']]] atomnames[acount] = atomname resnames[acount] = resname resnums[acount] = line.split()[fields['auth_seq_id']] chainids[acount] = chID segnames[acount] = segID hetero[acount] = startswith == 'HETATM' # True or False if chainids[acount] != chainids[acount-1]: termini[acount-1] = True altlocs[acount] = alt icodes[acount] = line.split()[fields['pdbx_PDB_ins_code']] if icodes[acount] == '?': icodes[acount] = '' serials[acount] = line.split()[fields['id']] elements[acount] = line.split()[fields['type_symbol']] bfactors[acount] = line.split()[fields['B_iso_or_equiv']] occupancies[acount] = line.split()[fields['occupancy']] acount += 1 if model is not None: nModels = 1 modelSize = acount//nModels if addcoords: atomgroup.addCoordset(coordinates[:modelSize]) else: atomgroup._setCoords(coordinates[:modelSize]) atomgroup.setNames(atomnames[:modelSize]) atomgroup.setResnames(resnames[:modelSize]) atomgroup.setResnums(resnums[:modelSize]) atomgroup.setSegnames(segnames[:modelSize]) atomgroup.setChids(chainids[:modelSize]) atomgroup.setFlags('hetatm', hetero[:modelSize]) atomgroup.setFlags('pdbter', termini[:modelSize]) atomgroup.setAltlocs(altlocs[:modelSize]) atomgroup.setIcodes(icodes[:modelSize]) atomgroup.setSerials(serials[:modelSize]) atomgroup.setElements(elements[:modelSize]) from prody.utilities.misctools import getMasses atomgroup.setMasses(getMasses(elements[:modelSize])) atomgroup.setBetas(bfactors[:modelSize]) atomgroup.setOccupancies(occupancies[:modelSize]) anisou = None siguij = None try: data = parseSTARSection(lines, "_atom_site_anisotrop") x = data[0] # check if data has anything in it except IndexError: LOGGER.warn("No anisotropic B factors found") else: anisou = np.zeros((acount, 6), dtype=ATOMIC_FIELDS['anisou'].dtype) if "_atom_site_anisotrop.U[1][1]_esd" in data[0].keys(): siguij = np.zeros((alength, 6), dtype=ATOMIC_FIELDS['siguij'].dtype) for entry in data: try: index = np.where(atomgroup.getSerials() == int( entry["_atom_site_anisotrop.id"]))[0][0] except: continue anisou[index, 0] = entry['_atom_site_anisotrop.U[1][1]'] anisou[index, 1] = entry['_atom_site_anisotrop.U[2][2]'] anisou[index, 2] = entry['_atom_site_anisotrop.U[3][3]'] anisou[index, 3] = entry['_atom_site_anisotrop.U[1][2]'] anisou[index, 4] = entry['_atom_site_anisotrop.U[1][3]'] anisou[index, 5] = entry['_atom_site_anisotrop.U[2][3]'] if siguij is not None: siguij[index, 0] = entry['_atom_site_anisotrop.U[1][1]_esd'] siguij[index, 1] = entry['_atom_site_anisotrop.U[2][2]_esd'] siguij[index, 2] = entry['_atom_site_anisotrop.U[3][3]_esd'] siguij[index, 3] = entry['_atom_site_anisotrop.U[1][2]_esd'] siguij[index, 4] = entry['_atom_site_anisotrop.U[1][3]_esd'] siguij[index, 5] = entry['_atom_site_anisotrop.U[2][3]_esd'] atomgroup.setAnisous(anisou) # no division needed anymore atomgroup.setAnistds(siguij) # no division needed anymore for n in range(1, nModels): atomgroup.addCoordset(coordinates[n*modelSize:(n+1)*modelSize]) return atomgroup
def _parseCIFLines(atomgroup, lines, model, chain, subset, altloc_torf): """Returns an AtomGroup. See also :func:`.parsePDBStream()`. :arg lines: CIF lines """ if subset is not None: if subset == 'ca': subset = set(('CA',)) elif subset in 'bb': subset = flags.BACKBONE protein_resnames = flags.AMINOACIDS asize = 0 i = 0 models = [] nModels = 0 fields = {} fieldCounter = -1 foundModelNumFieldID = False foundAtomBlock = False doneAtomBlock = False while not doneAtomBlock: line = lines[i] if line[:11] == '_atom_site.': fieldCounter += 1 fields[line.split('.')[1].strip()] = fieldCounter if line.startswith('ATOM') or line.startswith('HETATM'): if not foundAtomBlock: foundAtomBlock = True start = i models.append(line.split()[fields['pdbx_PDB_model_num']]) if models[asize] != models[asize-1]: nModels += 1 asize += 1 else: if foundAtomBlock: doneAtomBlock = True i += 1 stop = i-1 if nModels == 0: nModels = 1 if model is not None and model != 1: for i in range(start, stop): if str(models[i]) != model and str(models[i+1]) == model: start = i+1 if str(models[i]) == model and str(models[i+1]) != model: stop = i+1 break if not str(model) in models: raise CIFParseError('model {0} is not found'.format(model)) addcoords = False if atomgroup.numCoordsets() > 0: addcoords = True if isinstance(altloc_torf, str): if altloc_torf.strip() != 'A': LOGGER.info('Parsing alternate locations {0}.' .format(altloc_torf)) which_altlocs = '.' + ''.join(altloc_torf.split()) else: which_altlocs = '.A' altloc_torf = False else: which_altlocs = '.A' altloc_torf = True coordinates = np.zeros((asize, 3), dtype=float) atomnames = np.zeros(asize, dtype=ATOMIC_FIELDS['name'].dtype) resnames = np.zeros(asize, dtype=ATOMIC_FIELDS['resname'].dtype) resnums = np.zeros(asize, dtype=ATOMIC_FIELDS['resnum'].dtype) chainids = np.zeros(asize, dtype=ATOMIC_FIELDS['chain'].dtype) hetero = np.zeros(asize, dtype=bool) termini = np.zeros(asize, dtype=bool) altlocs = np.zeros(asize, dtype=ATOMIC_FIELDS['altloc'].dtype) icodes = np.zeros(asize, dtype=ATOMIC_FIELDS['icode'].dtype) serials = np.zeros(asize, dtype=ATOMIC_FIELDS['serial'].dtype) elements = np.zeros(asize, dtype=ATOMIC_FIELDS['element'].dtype) bfactors = np.zeros(asize, dtype=ATOMIC_FIELDS['beta'].dtype) occupancies = np.zeros(asize, dtype=ATOMIC_FIELDS['occupancy'].dtype) n_atoms = atomgroup.numAtoms() if n_atoms > 0: asize = n_atoms acount = 0 for line in lines[start:stop]: startswith = line.split()[fields['group_PDB']] atomname = line.split()[fields['auth_atom_id']] resname = line.split()[fields['auth_comp_id']] if subset is not None: if not (atomname in subset and resname in protein_resnames): continue chID = line.split()[fields['auth_asym_id']] if chain is not None: if not chID in chain: continue alt = line.split()[fields['label_alt_id']] if alt not in which_altlocs: continue if model is not None: if int(models[acount]) < model: continue elif int(models[acount]) > model: break coordinates[acount] = [line.split()[fields['Cartn_x']], \ line.split()[fields['Cartn_y']], \ line.split()[fields['Cartn_z']]] atomnames[acount] = atomname resnames[acount] = resname resnums[acount] = line.split()[fields['auth_seq_id']] chainids[acount] = chID hetero[acount] = startswith == 'HETATM' # True or False if chainids[acount] != chainids[acount-1]: termini[acount] = True altlocs[acount] = alt icodes[acount] = line.split()[fields['pdbx_PDB_ins_code']] if icodes[acount] == '?': icodes[acount] = '' serials[acount] = line.split()[fields['id']] elements[acount] = line.split()[fields['type_symbol']] bfactors[acount] = line.split()[fields['B_iso_or_equiv']] occupancies[acount] = line.split()[fields['occupancy']] acount += 1 if model is not None: nModels = 1 modelSize = acount//nModels if addcoords: atomgroup.addCoordset(coordinates[:modelSize]) else: atomgroup._setCoords(coordinates[:modelSize]) atomgroup.setNames(atomnames[:modelSize]) atomgroup.setResnames(resnames[:modelSize]) atomgroup.setResnums(resnums[:modelSize]) atomgroup.setChids(chainids[:modelSize]) atomgroup.setFlags('hetatm', hetero[:modelSize]) atomgroup.setFlags('pdbter', termini[:modelSize]) atomgroup.setAltlocs(altlocs[:modelSize]) atomgroup.setIcodes(icodes[:modelSize]) atomgroup.setSerials(serials[:modelSize]) atomgroup.setElements(elements[:modelSize]) from prody.utilities.misctools import getMasses atomgroup.setMasses(getMasses(elements[:modelSize])) atomgroup.setBetas(bfactors[:modelSize]) atomgroup.setOccupancies(occupancies[:modelSize]) for n in range(1,nModels): atomgroup.addCoordset(coordinates[n*modelSize:(n+1)*modelSize]) return atomgroup
def _parseCIFLines(atomgroup, lines, model, chain, subset, altloc_torf): """Returns an AtomGroup. See also :func:`.parsePDBStream()`. :arg lines: CIF lines """ if subset is not None: if subset == 'ca': subset = set(('CA',)) elif subset in 'bb': subset = flags.BACKBONE protein_resnames = flags.AMINOACIDS asize = 0 i = 0 models = [] nModels = 0 foundAtomBlock = False doneAtomBlock = False while not doneAtomBlock: line = lines[i] if line[:6] == 'ATOM ' or line[:6] == 'HETATM': if not foundAtomBlock: foundAtomBlock = True start = i models.append(line.split()[25]) # pdbx_PDB_model_num if models[asize] != models[asize-1]: nModels += 1 asize += 1 else: if foundAtomBlock: doneAtomBlock = True i += 1 stop = i-1 if nModels == 0: nModels = 1 if model is not None and model != 1: for i in range(start, stop): if str(models[i]) != model and str(models[i+1]) == model: start = i+1 if str(models[i]) == model and str(models[i+1]) != model: stop = i+1 break if not str(model) in models: raise CIFParseError('model {0} is not found'.format(model)) addcoords = False if atomgroup.numCoordsets() > 0: addcoords = True if isinstance(altloc_torf, str): if altloc_torf.strip() != 'A': LOGGER.info('Parsing alternate locations {0}.' .format(altloc_torf)) which_altlocs = '.' + ''.join(altloc_torf.split()) else: which_altlocs = '.A' altloc_torf = False else: which_altlocs = '.A' altloc_torf = True coordinates = np.zeros((asize, 3), dtype=float) atomnames = np.zeros(asize, dtype=ATOMIC_FIELDS['name'].dtype) resnames = np.zeros(asize, dtype=ATOMIC_FIELDS['resname'].dtype) resnums = np.zeros(asize, dtype=ATOMIC_FIELDS['resnum'].dtype) chainids = np.zeros(asize, dtype=ATOMIC_FIELDS['chain'].dtype) hetero = np.zeros(asize, dtype=bool) termini = np.zeros(asize, dtype=bool) altlocs = np.zeros(asize, dtype=ATOMIC_FIELDS['altloc'].dtype) icodes = np.zeros(asize, dtype=ATOMIC_FIELDS['icode'].dtype) serials = np.zeros(asize, dtype=ATOMIC_FIELDS['serial'].dtype) elements = np.zeros(asize, dtype=ATOMIC_FIELDS['element'].dtype) bfactors = np.zeros(asize, dtype=ATOMIC_FIELDS['beta'].dtype) occupancies = np.zeros(asize, dtype=ATOMIC_FIELDS['occupancy'].dtype) n_atoms = atomgroup.numAtoms() if n_atoms > 0: asize = n_atoms acount = 0 for line in lines[start:stop]: startswith = line.split()[0] # group_PDB atomname = line.split()[-2] # auth_atom_id in stardard pos resname = line.split()[-4] # auth_comp_id in standard pos if subset is not None: if not (atomname in subset and resname in protein_resnames): continue chID = line.split()[-3] # auth_asym_id in stardard pos if chain is not None: if not chID in chain: LOGGER.info('The loop has entered the chID continue block!!') continue alt = line.split()[4] # label_alt_id in standard pos if alt not in which_altlocs: LOGGER.info('The loop has entered the alt continue block!!') LOGGER.info('line = {0}'.format(line)) continue if model is not None: if int(models[acount]) < model: LOGGER.info('The loop has entered the model continue block!!') continue elif int(models[acount]) > model: LOGGER.info('The loop has entered the model break block!!') break coordinates[acount] = line.split()[10:13] atomnames[acount] = atomname resnames[acount] = resname resnums[acount] = line.split()[21] # auth_seq_id chainids[acount] = chID hetero[acount] = startswith == 'HETATM' # True or False if chainids[acount] != chainids[acount-1]: termini[acount] = True altlocs[acount] = alt icodes[acount] = line.split()[9] # pdbx_PDB_ins_code if icodes[acount] == '?': icodes[acount] = '' serials[acount] = line.split()[1] # id elements[acount] = line.split()[2] # type_symbol bfactors[acount] = line.split()[14] occupancies[acount] = line.split()[13] acount += 1 if model is not None: nModels = 1 modelSize = acount//nModels if addcoords: atomgroup.addCoordset(coordinates[:modelSize]) else: atomgroup._setCoords(coordinates[:modelSize]) atomgroup.setNames(atomnames[:modelSize]) atomgroup.setResnames(resnames[:modelSize]) atomgroup.setResnums(resnums[:modelSize]) atomgroup.setChids(chainids[:modelSize]) atomgroup.setFlags('hetatm', hetero[:modelSize]) atomgroup.setFlags('pdbter', termini[:modelSize]) atomgroup.setAltlocs(altlocs[:modelSize]) atomgroup.setIcodes(icodes[:modelSize]) atomgroup.setSerials(serials[:modelSize]) atomgroup.setElements(elements[:modelSize]) from prody.utilities.misctools import getMasses atomgroup.setMasses(getMasses(elements[:modelSize])) atomgroup.setBetas(bfactors[:modelSize]) atomgroup.setOccupancies(occupancies[:modelSize]) for n in range(1,nModels): atomgroup.addCoordset(coordinates[n*modelSize:(n+1)*modelSize]) return atomgroup