def buildFromChain(self, chain): """Build from a :class:`.Chain`.""" assert isinstance(chain, Chain), 'chain must be a Chain instance' gaps = self._gaps residues = list(chain.iterResidues()) temp = residues[0].getResnum()-1 protein_resnames = set(getKeywordResnames('protein')) for res in chain: if not res.getResname() in protein_resnames: continue resid = res.getResnum() incod = res.getIcode() aa = AAMAP.get(res.getResname(), 'X') simpres = SimpleResidue(resid, aa, incod, res) if gaps: diff = resid - temp - 1 if diff > 0: self._seq += NONE_A * diff temp = resid self._seq += aa self._list.append(simpres) self._dict[(resid, incod)] = simpres self._title = 'Chain {0:s} from {1:s}'.format(chain.getChid(), chain.getAtomGroup().getTitle())
def _parsePDBLines(atomgroup, lines, split, model, chain, subset, altloc_torf, format='PDB'): """Return an AtomGroup. See also :func:`.parsePDBStream()`. :arg lines: PDB/PQR lines :arg split: starting index for coordinate data lines""" format = format.upper() if format == 'PDB': isPDB = True else: isPDB = False if subset is not None: subset = subset.lower() if subset in ('calpha', 'ca'): subset = set(('CA',)) elif subset in ('backbone', 'bb'): subset = set(getBackboneAtomNames()) only_subset = True protein_resnames = set(getKeywordResnames('protein')) else: only_subset = False if chain is None: only_chains = False else: only_chains = True onlycoords = False n_atoms = atomgroup.numAtoms() if n_atoms > 0: asize = n_atoms else: # most PDB files contain less than 99999 atoms asize = min(len(lines) - split, 99999) addcoords = False if atomgroup.numCoordsets() > 0: addcoords = True alength = asize coordinates = np.zeros((asize, 3), dtype=float) atomnames = np.zeros(asize, dtype=ATOMIC_FIELDS['name'].dtype) resnames = np.zeros(asize, dtype=ATOMIC_FIELDS['resname'].dtype) resnums = np.zeros(asize, dtype=ATOMIC_FIELDS['resnum'].dtype) chainids = np.zeros(asize, dtype=ATOMIC_FIELDS['chain'].dtype) hetero = np.zeros(asize, dtype=ATOMIC_FIELDS['hetero'].dtype) altlocs = np.zeros(asize, dtype=ATOMIC_FIELDS['altloc'].dtype) icodes = np.zeros(asize, dtype=ATOMIC_FIELDS['icode'].dtype) serials = np.zeros(asize, dtype=ATOMIC_FIELDS['serial'].dtype) if isPDB: segnames = np.zeros(asize, dtype=ATOMIC_FIELDS['segment'].dtype) elements = np.zeros(asize, dtype=ATOMIC_FIELDS['element'].dtype) bfactors = np.zeros(asize, dtype=ATOMIC_FIELDS['beta'].dtype) occupancies = np.zeros(asize, dtype=ATOMIC_FIELDS['occupancy'].dtype) secondary = None anisou = None siguij = None else: charges = np.zeros(asize, dtype=ATOMIC_FIELDS['charge'].dtype) radii = np.zeros(asize, dtype=ATOMIC_FIELDS['radius'].dtype) asize = 2000 # increase array length by this much when needed start = split stop = len(lines) nmodel = 0 # if a specific model is requested, skip lines until that one if isPDB and model is not None and model != 1: for i in range(split, len(lines)): if lines[i][:5] == 'MODEL': nmodel += 1 if model == nmodel: start = i+1 stop = len(lines) break if nmodel != model: raise PDBParseError('model {0:d} is not found'.format(model)) if isinstance(altloc_torf, str): if altloc_torf.strip() != 'A': LOGGER.info('Parsing alternate locations {0:s}.' .format(altloc_torf)) which_altlocs = ' ' + ''.join(altloc_torf.split()) else: which_altlocs = ' A' altloc_torf = False else: which_altlocs = ' A' altloc_torf = True acount = 0 altloc = defaultdict(list) i = start END = False while i < stop: line = lines[i] startswith = line[0:6] if startswith == 'ATOM ' or startswith == 'HETATM': if only_subset: atomname = line[12:16].strip() resname = line[17:21].strip() if not (atomname in subset and resname in protein_resnames): i += 1 continue else: atomname = line[12:16] resname = line[17:21] chid = line[21] if only_chains: if not chid in chain: i += 1 continue alt = line[16] if alt not in which_altlocs: altloc[alt].append((line, i)) i += 1 continue try: coordinates[acount, 0] = line[30:38] coordinates[acount, 1] = line[38:46] coordinates[acount, 2] = line[46:54] except: if acount >= n_atoms > 0: if nmodel ==0: raise ValueError(format + 'file and AtomGroup ag must ' 'have same number of atoms') LOGGER.warn('Discarding model {0:d}, which contains more ' 'atoms than first model does.'.format(nmodel+1)) acount = 0 nmodel += 1 coordinates = np.zeros((n_atoms, 3), dtype=float) while lines[i][:6] != 'ENDMDL': i += 1 else: raise PDBParseError('invalid or missing coordinate(s) at ' 'line {0:d}.'.format(i+1)) if onlycoords: acount += 1 i += 1 continue serials[acount] = line[6:11] altlocs[acount] = alt atomnames[acount] = atomname resnames[acount] = resname chainids[acount] = chid resnums[acount] = line[22:26]#.split()[0]) icodes[acount] = line[26] if isPDB: try: occupancies[acount] = line[54:60] except: LOGGER.warn('failed to parse occupancy at line {0:d}' .format(i)) try: bfactors[acount] = line[60:66] except: LOGGER.warn('failed to parse beta-factor at line {0:d}' .format(i)) hetero[acount] = startswith[0] == 'H' segnames[acount] = line[72:76] elements[acount] = line[76:78] else: try: charges[acount] = line[54:62] except: LOGGER.warn('failed to parse charge at line {0:d}' .format(i)) try: radii[acount] = line[62:69] except: LOGGER.warn('failed to parse radius at line {0:d}' .format(i)) acount += 1 if n_atoms == 0 and acount >= alength: # if arrays are short extend them with zeros alength += asize coordinates = np.concatenate( (coordinates, np.zeros((asize, 3), float))) atomnames = np.concatenate((atomnames, np.zeros(asize, ATOMIC_FIELDS['name'].dtype))) resnames = np.concatenate((resnames, np.zeros(asize, ATOMIC_FIELDS['resname'].dtype))) resnums = np.concatenate((resnums, np.zeros(asize, ATOMIC_FIELDS['resnum'].dtype))) chainids = np.concatenate((chainids, np.zeros(asize, ATOMIC_FIELDS['chain'].dtype))) hetero = np.concatenate((hetero, np.zeros(asize, ATOMIC_FIELDS['hetero'].dtype))) altlocs = np.concatenate((altlocs, np.zeros(asize, ATOMIC_FIELDS['altloc'].dtype))) icodes = np.concatenate((icodes, np.zeros(asize, ATOMIC_FIELDS['icode'].dtype))) serials = np.concatenate((serials, np.zeros(asize, ATOMIC_FIELDS['serial'].dtype))) if isPDB: bfactors = np.concatenate((bfactors, np.zeros(asize, ATOMIC_FIELDS['beta'].dtype))) occupancies = np.concatenate((occupancies, np.zeros(asize, ATOMIC_FIELDS['occupancy'].dtype))) segnames = np.concatenate((segnames, np.zeros(asize, ATOMIC_FIELDS['segment'].dtype))) elements = np.concatenate((elements, np.zeros(asize, ATOMIC_FIELDS['element'].dtype))) if anisou is not None: anisou = np.concatenate((anisou, np.zeros((asize, 6), ATOMIC_FIELDS['anisou'].dtype))) if siguij is not None: siguij = np.concatenate((siguij, np.zeros((asize, 6), ATOMIC_FIELDS['siguij'].dtype))) else: charges = np.concatenate((charges, np.zeros(asize, ATOMIC_FIELDS['charge'].dtype))) radii = np.concatenate((radii, np.zeros(asize, ATOMIC_FIELDS['radius'].dtype))) #elif startswith == 'END ' or startswith == 'CONECT': # i += 1 # break elif startswith == 'ENDMDL' or startswith[:3] == 'END': if acount == 0: # If there is no atom record between ENDMDL & END skip to next i += 1 continue if model is not None: i += 1 break diff = stop - i - 1 if diff < acount: END = True if onlycoords: if acount < n_atoms: LOGGER.warn('Discarding model {0:d}, which contains ' '{1:d} fewer atoms than the first model ' 'does.'.format(nmodel+1, n_atoms-acount)) else: coordsets[nmodel] = coordinates nmodel += 1 acount = 0 if not END: coordinates = coordsets[nmodel] else: if acount != n_atoms > 0: raise ValueError('PDB file and AtomGroup ag must have ' 'same number of atoms') # this is where to decide if more coordsets should be expected if END: if addcoords: atomgroup.addCoordset(coordinates[:acount]) else: atomgroup._setCoords(coordinates[:acount]) else: coordsets = np.zeros((diff/acount+1, acount, 3)) coordsets[0] = coordinates[:acount] onlycoords = True if not only_subset: atomnames = np.char.strip(atomnames[:acount]) resnames = np.char.strip(resnames[:acount]) atomgroup.setNames(atomnames[:acount]) atomgroup.setResnames(resnames[:acount]) atomgroup.setResnums(resnums[:acount]) atomgroup.setChids(chainids[:acount]) atomgroup.setHeteros(hetero[:acount]) atomgroup.setAltlocs(altlocs[:acount]) atomgroup.setIcodes(np.char.strip(icodes[:acount])) atomgroup.setSerials(serials[:acount]) if isPDB: atomgroup.setBetas(bfactors[:acount]) atomgroup.setOccupancies(occupancies[:acount]) atomgroup.setSegnames(np.char.strip(segnames[:acount])) atomgroup.setElements(np.char.strip(elements[:acount])) if anisou is not None: atomgroup.setAnisous(anisou[:acount] / 10000) if siguij is not None: atomgroup.setAnistds(siguij[:acount] / 10000) else: atomgroup.setCharges(charges[:acount]) atomgroup.setRadii(radii[:acount]) nmodel += 1 n_atoms = acount acount = 0 coordinates = np.zeros((n_atoms, 3), dtype=float) if altloc and altloc_torf: _evalAltlocs(atomgroup, altloc, chainids, resnums, resnames, atomnames) altloc = defaultdict(list) if END: break elif isPDB and startswith == 'ANISOU': if anisou is None: anisou = True anisou = np.zeros((alength, 6), dtype=ATOMIC_FIELDS['anisou'].dtype) try: index = acount - 1 anisou[index, 0] = line[28:35] anisou[index, 1] = line[35:42] anisou[index, 2] = line[43:49] anisou[index, 3] = line[49:56] anisou[index, 4] = line[56:63] anisou[index, 5] = line[63:70] except: LOGGER.warn('failed to parse anisotropic temperature ' 'factors at line {0:d}'.format(i)) elif isPDB and startswith =='SIGUIJ': if siguij is None: siguij = np.zeros((alength, 6), dtype=ATOMIC_FIELDS['siguij'].dtype) try: index = acount - 1 siguij[index, 0] = line[28:35] siguij[index, 1] = line[35:42] siguij[index, 2] = line[43:49] siguij[index, 3] = line[49:56] siguij[index, 4] = line[56:63] siguij[index, 5] = line[63:70] except: LOGGER.warn('failed to parse standard deviations of ' 'anisotropic temperature factors at line {0:d}'.format(i)) elif startswith =='SIGATM': pass i += 1 if onlycoords: if acount == atomgroup.numAtoms(): coordsets[nmodel] = coordinates nmodel += 1 if nmodel == coordsets.shape[0]: if addcoords: atomgroup.addCoordset(coordsets) else: atomgroup._setCoords(coordsets) else: if addcoords: atomgroup.addCoordset(coordsets[:nmodel]) else: atomgroup._setCoords(coordsets[:nmodel]) elif not END: # this means last line wast an ATOM line, so atomgroup is not decorated if addcoords: atomgroup.addCoordset(coordinates[:acount]) else: atomgroup._setCoords(coordinates[:acount]) if not only_subset: atomnames = np.char.strip(atomnames[:acount]) resnames = np.char.strip(resnames[:acount]) atomgroup.setNames(atomnames[:acount]) atomgroup.setResnames(resnames[:acount]) atomgroup.setResnums(resnums[:acount]) atomgroup.setChids(chainids[:acount]) atomgroup.setHeteros(hetero[:acount]) atomgroup.setAltlocs(altlocs[:acount]) atomgroup.setIcodes(np.char.strip(icodes[:acount])) atomgroup.setSerials(serials[:acount]) if isPDB: if anisou is not None: atomgroup.setAnisous(anisou[:acount] / 10000) if siguij is not None: atomgroup.setAnistds(siguij[:acount] / 10000) atomgroup.setSegnames(np.char.strip(segnames[:acount])) atomgroup.setElements(np.char.strip(elements[:acount])) atomgroup.setBetas(bfactors[:acount]) atomgroup.setOccupancies(occupancies[:acount]) else: atomgroup.setCharges(charges[:acount]) atomgroup.setRadii(radii[:acount]) if altloc and altloc_torf: _evalAltlocs(atomgroup, altloc, chainids, resnums, resnames, atomnames) return atomgroup