Ejemplo n.º 1
0
 def buildFromChain(self, chain):
     """Build from a :class:`.Chain`."""
     
     assert isinstance(chain, Chain), 'chain must be a Chain instance'
     gaps = self._gaps
     residues = list(chain.iterResidues())
     temp = residues[0].getResnum()-1
     protein_resnames = set(getKeywordResnames('protein'))
     for res in chain:
         if not res.getResname() in protein_resnames:
             continue
         resid = res.getResnum()
         incod = res.getIcode()
         aa = AAMAP.get(res.getResname(), 'X')
         simpres = SimpleResidue(resid, aa, incod, res)
         if gaps:
             diff = resid - temp - 1
             if diff > 0:
                 self._seq += NONE_A * diff
             temp = resid
         self._seq += aa
         self._list.append(simpres)
         self._dict[(resid, incod)] = simpres
     self._title = 'Chain {0:s} from {1:s}'.format(chain.getChid(),
                                          chain.getAtomGroup().getTitle())
Ejemplo n.º 2
0
def _parsePDBLines(atomgroup, lines, split, model, chain, subset, 
                   altloc_torf, format='PDB'):
    """Return an AtomGroup. See also :func:`.parsePDBStream()`.
    
    :arg lines: PDB/PQR lines 
    :arg split: starting index for coordinate data lines"""
    
    format = format.upper()
    if format == 'PDB':
        isPDB = True
    else:
        isPDB = False
    
    if subset is not None:
        subset = subset.lower()
        if subset in ('calpha', 'ca'):
            subset = set(('CA',))
        elif subset in ('backbone', 'bb'):
            subset = set(getBackboneAtomNames())
        only_subset = True
        protein_resnames = set(getKeywordResnames('protein'))
    else:
        only_subset = False
    if chain is None:
        only_chains = False
    else:
        only_chains = True
    onlycoords = False
    n_atoms = atomgroup.numAtoms()
    if n_atoms > 0:
        asize = n_atoms
    else:
        # most PDB files contain less than 99999 atoms
        asize = min(len(lines) - split, 99999)
    addcoords = False
    if atomgroup.numCoordsets() > 0:
        addcoords = True
    alength = asize
    coordinates = np.zeros((asize, 3), dtype=float)
    atomnames = np.zeros(asize, dtype=ATOMIC_FIELDS['name'].dtype)
    resnames = np.zeros(asize, dtype=ATOMIC_FIELDS['resname'].dtype)
    resnums = np.zeros(asize, dtype=ATOMIC_FIELDS['resnum'].dtype)
    chainids = np.zeros(asize, dtype=ATOMIC_FIELDS['chain'].dtype)
    hetero = np.zeros(asize, dtype=ATOMIC_FIELDS['hetero'].dtype)
    altlocs = np.zeros(asize, dtype=ATOMIC_FIELDS['altloc'].dtype)
    icodes = np.zeros(asize, dtype=ATOMIC_FIELDS['icode'].dtype)
    serials = np.zeros(asize, dtype=ATOMIC_FIELDS['serial'].dtype)
    if isPDB:
        segnames = np.zeros(asize, dtype=ATOMIC_FIELDS['segment'].dtype)
        elements = np.zeros(asize, dtype=ATOMIC_FIELDS['element'].dtype)
        bfactors = np.zeros(asize, dtype=ATOMIC_FIELDS['beta'].dtype)
        occupancies = np.zeros(asize, dtype=ATOMIC_FIELDS['occupancy'].dtype)
        secondary = None
        anisou = None
        siguij = None
    else:
        charges = np.zeros(asize, dtype=ATOMIC_FIELDS['charge'].dtype)
        radii = np.zeros(asize, dtype=ATOMIC_FIELDS['radius'].dtype)
        
    asize = 2000 # increase array length by this much when needed 
        
    start = split
    stop = len(lines)
    nmodel = 0
    # if a specific model is requested, skip lines until that one
    if isPDB and model is not None and model != 1:
        for i in range(split, len(lines)):
            if lines[i][:5] == 'MODEL':
                nmodel += 1
                if model == nmodel:
                    start = i+1 
                    stop = len(lines)
                    break
        if nmodel != model:
            raise PDBParseError('model {0:d} is not found'.format(model))
    if isinstance(altloc_torf, str): 
        if altloc_torf.strip() != 'A':
            LOGGER.info('Parsing alternate locations {0:s}.'
                        .format(altloc_torf))
            which_altlocs = ' ' + ''.join(altloc_torf.split())
        else:
            which_altlocs = ' A'
        altloc_torf = False
    else:
        which_altlocs = ' A'
        altloc_torf = True
        
    acount = 0
    altloc = defaultdict(list)
    i = start
    END = False
    while i < stop:
        line = lines[i]
        startswith = line[0:6]
        
        if startswith == 'ATOM  ' or startswith == 'HETATM':
            if only_subset:
                atomname = line[12:16].strip()
                resname = line[17:21].strip()
                if not (atomname in subset and resname in protein_resnames): 
                    i += 1
                    continue
            else:
                atomname = line[12:16]
                resname = line[17:21]
                
            chid = line[21]
            if only_chains:
                if not chid in chain:
                    i += 1
                    continue
            alt = line[16]
            if alt not in which_altlocs:
                altloc[alt].append((line, i))
                i += 1
                continue
            try:
                coordinates[acount, 0] = line[30:38]
                coordinates[acount, 1] = line[38:46]
                coordinates[acount, 2] = line[46:54]
            except:
                if acount >= n_atoms > 0:
                    if nmodel ==0:
                        raise ValueError(format + 'file and AtomGroup ag must '
                                         'have same number of atoms')
                    LOGGER.warn('Discarding model {0:d}, which contains more '
                            'atoms than first model does.'.format(nmodel+1))
                    acount = 0
                    nmodel += 1
                    coordinates = np.zeros((n_atoms, 3), dtype=float)
                    while lines[i][:6] != 'ENDMDL':
                        i += 1
                else:
                    raise PDBParseError('invalid or missing coordinate(s) at '
                                         'line {0:d}.'.format(i+1))
            if onlycoords:
                acount += 1
                i += 1
                continue
            
            serials[acount] = line[6:11]
            altlocs[acount] = alt 
            atomnames[acount] = atomname
            resnames[acount] = resname
            chainids[acount] = chid
            resnums[acount] = line[22:26]#.split()[0])
            icodes[acount] = line[26]
            if isPDB:
                try:
                    occupancies[acount] = line[54:60]
                except:
                    LOGGER.warn('failed to parse occupancy at line {0:d}'
                                .format(i))
                try:
                    bfactors[acount] = line[60:66]
                except:
                    LOGGER.warn('failed to parse beta-factor at line {0:d}'
                                .format(i))
                hetero[acount] = startswith[0] == 'H'
                segnames[acount] = line[72:76]
                elements[acount] = line[76:78]
            else:
                try:
                    charges[acount] = line[54:62]
                except:
                    LOGGER.warn('failed to parse charge at line {0:d}'
                                .format(i))
                try:
                    radii[acount] = line[62:69]
                except:
                    LOGGER.warn('failed to parse radius at line {0:d}'
                                .format(i))
            acount += 1
            if n_atoms == 0 and acount >= alength:
                # if arrays are short extend them with zeros
                alength += asize
                coordinates = np.concatenate(
                    (coordinates, np.zeros((asize, 3), float)))
                atomnames = np.concatenate((atomnames,
                    np.zeros(asize, ATOMIC_FIELDS['name'].dtype)))
                resnames = np.concatenate((resnames,
                    np.zeros(asize, ATOMIC_FIELDS['resname'].dtype)))
                resnums = np.concatenate((resnums,
                    np.zeros(asize, ATOMIC_FIELDS['resnum'].dtype)))
                chainids = np.concatenate((chainids,
                    np.zeros(asize, ATOMIC_FIELDS['chain'].dtype)))
                hetero = np.concatenate((hetero,
                    np.zeros(asize, ATOMIC_FIELDS['hetero'].dtype)))
                altlocs = np.concatenate((altlocs,
                    np.zeros(asize, ATOMIC_FIELDS['altloc'].dtype)))
                icodes = np.concatenate((icodes,
                    np.zeros(asize, ATOMIC_FIELDS['icode'].dtype)))
                serials = np.concatenate((serials,
                    np.zeros(asize, ATOMIC_FIELDS['serial'].dtype)))
                if isPDB:
                    bfactors = np.concatenate((bfactors,
                        np.zeros(asize, ATOMIC_FIELDS['beta'].dtype)))
                    occupancies = np.concatenate((occupancies,
                        np.zeros(asize, ATOMIC_FIELDS['occupancy'].dtype)))
                    segnames = np.concatenate((segnames,
                        np.zeros(asize, ATOMIC_FIELDS['segment'].dtype)))
                    elements = np.concatenate((elements,
                        np.zeros(asize, ATOMIC_FIELDS['element'].dtype)))
                    if anisou is not None:
                        anisou = np.concatenate((anisou, np.zeros((asize, 6), 
                            ATOMIC_FIELDS['anisou'].dtype)))
                    if siguij is not None:
                        siguij = np.concatenate((siguij, np.zeros((asize, 6), 
                            ATOMIC_FIELDS['siguij'].dtype)))
                else:
                    charges = np.concatenate((charges,
                        np.zeros(asize, ATOMIC_FIELDS['charge'].dtype)))
                    radii = np.concatenate((radii,
                        np.zeros(asize, ATOMIC_FIELDS['radius'].dtype)))
        #elif startswith == 'END   ' or startswith == 'CONECT':
        #    i += 1
        #    break
        elif startswith == 'ENDMDL' or startswith[:3] == 'END':
            if acount == 0:
                # If there is no atom record between ENDMDL & END skip to next
                i += 1
                continue
            if model is not None:
                i += 1
                break
            diff = stop - i - 1
            if diff < acount:
                END = True
            if onlycoords:
                if acount < n_atoms:
                    LOGGER.warn('Discarding model {0:d}, which contains '
                                '{1:d} fewer atoms than the first model '
                                'does.'.format(nmodel+1, n_atoms-acount))
                else:
                    coordsets[nmodel] = coordinates
                    nmodel += 1
                acount = 0
                if not END:
                    coordinates = coordsets[nmodel]
            else:
                if acount != n_atoms > 0:
                    raise ValueError('PDB file and AtomGroup ag must have '
                                    'same number of atoms')
                # this is where to decide if more coordsets should be expected
                if END:
                    if addcoords:
                        atomgroup.addCoordset(coordinates[:acount])
                    else:
                        atomgroup._setCoords(coordinates[:acount])
                else:
                    coordsets = np.zeros((diff/acount+1, acount, 3))
                    coordsets[0] = coordinates[:acount]
                    onlycoords = True
                if not only_subset:
                    atomnames = np.char.strip(atomnames[:acount])
                    resnames = np.char.strip(resnames[:acount])
                atomgroup.setNames(atomnames[:acount])
                atomgroup.setResnames(resnames[:acount])
                atomgroup.setResnums(resnums[:acount])
                atomgroup.setChids(chainids[:acount])
                atomgroup.setHeteros(hetero[:acount])
                atomgroup.setAltlocs(altlocs[:acount])
                atomgroup.setIcodes(np.char.strip(icodes[:acount]))
                atomgroup.setSerials(serials[:acount])
                if isPDB:
                    atomgroup.setBetas(bfactors[:acount])
                    atomgroup.setOccupancies(occupancies[:acount])
                    atomgroup.setSegnames(np.char.strip(segnames[:acount]))
                    atomgroup.setElements(np.char.strip(elements[:acount]))
                    if anisou is not None:
                        atomgroup.setAnisous(anisou[:acount] / 10000)
                    if siguij is not None:
                        atomgroup.setAnistds(siguij[:acount] / 10000)
                else:
                    atomgroup.setCharges(charges[:acount])
                    atomgroup.setRadii(radii[:acount])
                    
                nmodel += 1
                n_atoms = acount 
                acount = 0
                coordinates = np.zeros((n_atoms, 3), dtype=float)
                if altloc and altloc_torf:
                    _evalAltlocs(atomgroup, altloc, chainids, resnums, 
                                 resnames, atomnames)
                    altloc = defaultdict(list)
                if END:
                    break
        elif isPDB and startswith == 'ANISOU':
            if anisou is None:
                anisou = True
                anisou = np.zeros((alength, 6), 
                    dtype=ATOMIC_FIELDS['anisou'].dtype)
            try:
                index = acount - 1
                anisou[index, 0] = line[28:35]
                anisou[index, 1] = line[35:42]
                anisou[index, 2] = line[43:49]
                anisou[index, 3] = line[49:56]
                anisou[index, 4] = line[56:63]
                anisou[index, 5] = line[63:70]
            except:
                LOGGER.warn('failed to parse anisotropic temperature '
                    'factors at line {0:d}'.format(i))
        elif isPDB and startswith =='SIGUIJ':
            if siguij is None:
                siguij = np.zeros((alength, 6), 
                    dtype=ATOMIC_FIELDS['siguij'].dtype)
            try:
                index = acount - 1
                siguij[index, 0] = line[28:35]
                siguij[index, 1] = line[35:42]
                siguij[index, 2] = line[43:49]
                siguij[index, 3] = line[49:56]
                siguij[index, 4] = line[56:63]
                siguij[index, 5] = line[63:70]
            except:
                LOGGER.warn('failed to parse standard deviations of '
                    'anisotropic temperature factors at line {0:d}'.format(i))
        elif startswith =='SIGATM':
            pass
        i += 1
    if onlycoords:
        if acount == atomgroup.numAtoms():
            coordsets[nmodel] = coordinates
            nmodel += 1
        if nmodel == coordsets.shape[0]:
            if addcoords:
                atomgroup.addCoordset(coordsets)
            else:
                atomgroup._setCoords(coordsets)
        else:
            if addcoords:
                atomgroup.addCoordset(coordsets[:nmodel])
            else:
                atomgroup._setCoords(coordsets[:nmodel])
    elif not END:
        # this means last line wast an ATOM line, so atomgroup is not decorated
        if addcoords:
            atomgroup.addCoordset(coordinates[:acount])
        else:
            atomgroup._setCoords(coordinates[:acount])
        if not only_subset:
            atomnames = np.char.strip(atomnames[:acount])
            resnames = np.char.strip(resnames[:acount])
        atomgroup.setNames(atomnames[:acount])
        atomgroup.setResnames(resnames[:acount])
        atomgroup.setResnums(resnums[:acount])
        atomgroup.setChids(chainids[:acount])
        atomgroup.setHeteros(hetero[:acount])
        atomgroup.setAltlocs(altlocs[:acount])
        atomgroup.setIcodes(np.char.strip(icodes[:acount]))
        atomgroup.setSerials(serials[:acount])
        if isPDB:
            if anisou is not None:
                atomgroup.setAnisous(anisou[:acount] / 10000)
            if siguij is not None:
                atomgroup.setAnistds(siguij[:acount] / 10000)
            atomgroup.setSegnames(np.char.strip(segnames[:acount]))
            atomgroup.setElements(np.char.strip(elements[:acount]))
            atomgroup.setBetas(bfactors[:acount])
            atomgroup.setOccupancies(occupancies[:acount])
        else:
            atomgroup.setCharges(charges[:acount])
            atomgroup.setRadii(radii[:acount])
            

    if altloc and altloc_torf:
        _evalAltlocs(atomgroup, altloc, chainids, resnums, resnames, atomnames)
                
    return atomgroup