Ejemplo n.º 1
0
def _parsePDBLines(atomgroup, lines, split, model, chain, subset,
                   altloc_torf, format='PDB'):
    """Returns an AtomGroup. See also :func:`.parsePDBStream()`.

    :arg lines: PDB/PQR lines
    :arg split: starting index for coordinate data lines"""

    format = format.upper()
    if format == 'PDB':
        isPDB = True
    else:
        isPDB = False

    if subset:
        if subset == 'ca':
            subset = set(('CA',))
        elif subset in 'bb':
            subset = flags.BACKBONE
        only_subset = True
        protein_resnames = flags.AMINOACIDS
    else:
        only_subset = False
    if chain is None:
        only_chains = False
    else:
        only_chains = True
    onlycoords = False
    n_atoms = atomgroup.numAtoms()
    if n_atoms > 0:
        asize = n_atoms
    else:
        asize = len(lines) - split
    addcoords = False
    if atomgroup.numCoordsets() > 0:
        addcoords = True
    alength = asize
    coordinates = np.zeros((asize, 3), dtype=float)
    atomnames = np.zeros(asize, dtype=ATOMIC_FIELDS['name'].dtype)
    resnames = np.zeros(asize, dtype=ATOMIC_FIELDS['resname'].dtype)
    resnums = np.zeros(asize, dtype=ATOMIC_FIELDS['resnum'].dtype)
    chainids = np.zeros(asize, dtype=ATOMIC_FIELDS['chain'].dtype)
    hetero = np.zeros(asize, dtype=bool)
    termini = np.zeros(asize, dtype=bool)
    altlocs = np.zeros(asize, dtype=ATOMIC_FIELDS['altloc'].dtype)
    icodes = np.zeros(asize, dtype=ATOMIC_FIELDS['icode'].dtype)
    serials = np.zeros(asize, dtype=ATOMIC_FIELDS['serial'].dtype)
    charges = np.zeros(asize, dtype=ATOMIC_FIELDS['charge'].dtype)
    if isPDB:
        segnames = np.zeros(asize, dtype=ATOMIC_FIELDS['segment'].dtype)
        elements = np.zeros(asize, dtype=ATOMIC_FIELDS['element'].dtype)
        bfactors = np.zeros(asize, dtype=ATOMIC_FIELDS['beta'].dtype)
        occupancies = np.zeros(asize, dtype=ATOMIC_FIELDS['occupancy'].dtype)
        anisou = None
        siguij = None
    else:
        radii = np.zeros(asize, dtype=ATOMIC_FIELDS['radius'].dtype)

    asize = 2000 # increase array length by this much when needed

    start = split
    stop = len(lines)
    nmodel = 0
    # if a specific model is requested, skip lines until that one
    if isPDB and model is not None and model != 1:
        for i in range(split, len(lines)):
            if lines[i][:5] == 'MODEL':
                nmodel += 1
                if model == nmodel:
                    start = i+1
                    stop = len(lines)
                    break
        if nmodel != model:
            raise PDBParseError('model {0} is not found'.format(model))
    if isinstance(altloc_torf, str):
        if altloc_torf.strip() != 'A':
            LOGGER.info('Parsing alternate locations {0}.'
                        .format(altloc_torf))
            which_altlocs = ' ' + ''.join(altloc_torf.split())
        else:
            which_altlocs = ' A'
        altloc_torf = False
    else:
        which_altlocs = ' A'
        altloc_torf = True

    acount = 0
    coordsets = None
    altloc = defaultdict(list)
    i = start
    END = False
    while i < stop:
        line = lines[i]
        if not isPDB:
            fields = line.split()
            if len(fields) == 10:
                fields.insert(4, '')
            elif len(fields) != 11:
                LOGGER.warn('wrong number of fields for PQR format at line %d'%i)
                i += 1
                continue

        if isPDB:
            startswith = line[0:6].strip()
        else:
            startswith = fields[0]
        
        if startswith == 'ATOM' or startswith == 'HETATM':
            if isPDB:
                atomname = line[12:16].strip()
                resname = line[17:21].strip()
            else:
                atomname= fields[2]
                resname = fields[3]

            if only_subset:
                if not (atomname in subset and resname in protein_resnames):
                    i += 1
                    continue

            if isPDB:
                chid = line[21]
            else:
                chid = fields[4]

            if only_chains:
                if not chid in chain:
                    i += 1
                    continue
            
            if isPDB:
                alt = line[16]
                if alt not in which_altlocs:
                    altloc[alt].append((line, i))
                    i += 1
                    continue
            else:
                alt = ' '
            try:
                if isPDB:
                    coordinates[acount, 0] = line[30:38]
                    coordinates[acount, 1] = line[38:46]
                    coordinates[acount, 2] = line[46:54]
                else:
                    coordinates[acount, 0] = fields[6]
                    coordinates[acount, 1] = fields[7]
                    coordinates[acount, 2] = fields[8]
            except:
                if acount >= n_atoms > 0:
                    if nmodel == 0:
                        raise ValueError(format + 'file and AtomGroup ag must '
                                         'have same number of atoms')
                    LOGGER.warn('Discarding model {0}, which contains {1} more '
                                'atoms than first model does.'
                                .format(nmodel+1,acount-n_atoms+1))
                    acount = 0
                    nmodel += 1
                    coordinates = np.zeros((n_atoms, 3), dtype=float)
                    if isPDB:
                        while lines[i][:6] != 'ENDMDL':
                            i += 1
                else:
                    raise PDBParseError('invalid or missing coordinate(s) at '
                                         'line {0}'.format(i+1))
            if onlycoords:
                acount += 1
                i += 1
                continue

            try:
                serials[acount] = int(line[6:11]) if isPDB else int(fields[1])
            except ValueError:
                try:
                    serials[acount] = int(line[6:11], 16) if isPDB else int(fields[1], 16)
                except ValueError:
                    LOGGER.warn('failed to parse serial number in line {0}'
                                .format(i))
                    serials[acount] = serials[acount-1]+1
            altlocs[acount] = alt
            atomnames[acount] = atomname
            resnames[acount] = resname
            chainids[acount] = chid
            if isPDB:
                resnums[acount] = line[22:26] 
                icodes[acount] = line[26] 
            else:
                resnum = fields[5]
                if resnum[-1].isalpha():
                    icode = resnum[-1]
                else:
                    icode = ' '
                resnums[acount] = resnum
                icodes[acount] = icode

            if isPDB:
                try:
                    occupancies[acount] = line[54:60]
                except:
                    LOGGER.warn('failed to parse occupancy at line {0}'
                                .format(i))
                try:
                    bfactors[acount] = line[60:66]
                except:
                    LOGGER.warn('failed to parse beta-factor at line {0}'
                                .format(i))
                hetero[acount] = startswith[0] == 'H'
                segnames[acount] = line[72:76]
                elements[acount] = line[76:78]
                try:
                    charges[acount] = int(line[79] + line[78])
                except:
                    charges[acount] = 0
            else:
                try:
                    charges[acount] = fields[9]
                except:
                    LOGGER.warn('failed to parse charge at line {0}'
                                .format(i))
                try:
                    radii[acount] = fields[10]
                except:
                    LOGGER.warn('failed to parse radius at line {0}'
                                .format(i))
            acount += 1
            if n_atoms == 0 and acount >= alength:
                # if arrays are short extend them with zeros
                alength += asize
                coordinates = np.concatenate(
                    (coordinates, np.zeros((asize, 3), float)))
                atomnames = np.concatenate((atomnames,
                    np.zeros(asize, ATOMIC_FIELDS['name'].dtype)))
                resnames = np.concatenate((resnames,
                    np.zeros(asize, ATOMIC_FIELDS['resname'].dtype)))
                resnums = np.concatenate((resnums,
                    np.zeros(asize, ATOMIC_FIELDS['resnum'].dtype)))
                chainids = np.concatenate((chainids,
                    np.zeros(asize, ATOMIC_FIELDS['chain'].dtype)))
                hetero = np.concatenate((hetero, np.zeros(asize, bool)))
                termini = np.concatenate((termini, np.zeros(asize, bool)))
                altlocs = np.concatenate((altlocs,
                    np.zeros(asize, ATOMIC_FIELDS['altloc'].dtype)))
                icodes = np.concatenate((icodes,
                    np.zeros(asize, ATOMIC_FIELDS['icode'].dtype)))
                serials = np.concatenate((serials,
                    np.zeros(asize, ATOMIC_FIELDS['serial'].dtype)))
                if isPDB:
                    bfactors = np.concatenate((bfactors,
                        np.zeros(asize, ATOMIC_FIELDS['beta'].dtype)))
                    occupancies = np.concatenate((occupancies,
                        np.zeros(asize, ATOMIC_FIELDS['occupancy'].dtype)))
                    segnames = np.concatenate((segnames,
                        np.zeros(asize, ATOMIC_FIELDS['segment'].dtype)))
                    elements = np.concatenate((elements,
                        np.zeros(asize, ATOMIC_FIELDS['element'].dtype)))
                    if anisou is not None:
                        anisou = np.concatenate((anisou, np.zeros((asize, 6),
                            ATOMIC_FIELDS['anisou'].dtype)))
                    if siguij is not None:
                        siguij = np.concatenate((siguij, np.zeros((asize, 6),
                            ATOMIC_FIELDS['siguij'].dtype)))
                else:
                    charges = np.concatenate((charges,
                        np.zeros(asize, ATOMIC_FIELDS['charge'].dtype)))
                    radii = np.concatenate((radii,
                        np.zeros(asize, ATOMIC_FIELDS['radius'].dtype)))
        #elif startswith == 'END   ' or startswith == 'CONECT':
        #    i += 1
        #    break
        elif not onlycoords and (startswith == 'TER   ' or
            startswith.strip() == 'TER'):
            termini[acount - 1] = True
        elif startswith == 'ENDMDL' or startswith[:3] == 'END':
            if acount == 0:
                # If there is no atom record between ENDMDL & END skip to next
                i += 1
                continue
            if model is not None:
                i += 1
                break
            diff = stop - i - 1
            END = diff < acount
            if coordsets is not None:
                END = END or nmodel >= coordsets.shape[0]
            if onlycoords:
                if acount < n_atoms:
                    LOGGER.warn('Discarding model {0}, which contains '
                                '{1} fewer atoms than the first model '
                                'does.'.format(nmodel+1, n_atoms-acount))
                else:
                    coordsets[nmodel] = coordinates
                    nmodel += 1
                acount = 0
                if not END:
                    coordinates = coordsets[nmodel]
            else:
                if acount != n_atoms > 0:
                    raise ValueError('PDB file and AtomGroup ag must have '
                                    'same number of atoms')
                # this is where to decide if more coordsets should be expected
                if END:
                    coordinates.resize((acount, 3), refcheck=False)
                    if addcoords:
                        atomgroup.addCoordset(coordinates)
                    else:
                        atomgroup._setCoords(coordinates)
                else:
                    coordsets = np.zeros((int(diff//acount+1), acount, 3))
                    coordsets[0] = coordinates[:acount]
                    onlycoords = True
                atomnames.resize(acount, refcheck=False)
                resnames.resize(acount, refcheck=False)
                resnums.resize(acount, refcheck=False)
                chainids.resize(acount, refcheck=False)
                hetero.resize(acount, refcheck=False)
                termini.resize(acount, refcheck=False)
                altlocs.resize(acount, refcheck=False)
                icodes.resize(acount, refcheck=False)
                serials.resize(acount, refcheck=False)
                if not only_subset:
                    atomnames = np.char.strip(atomnames)
                    resnames = np.char.strip(resnames)
                atomgroup.setNames(atomnames)
                atomgroup.setResnames(resnames)
                atomgroup.setResnums(resnums)
                atomgroup.setChids(chainids)
                atomgroup.setFlags('hetatm', hetero)
                atomgroup.setFlags('pdbter', termini)
                atomgroup.setAltlocs(altlocs)
                atomgroup.setIcodes(np.char.strip(icodes))
                atomgroup.setSerials(serials)
                if isPDB:
                    bfactors.resize(acount, refcheck=False)
                    occupancies.resize(acount, refcheck=False)
                    segnames.resize(acount, refcheck=False)
                    elements.resize(acount, refcheck=False)
                    atomgroup.setBetas(bfactors)
                    atomgroup.setOccupancies(occupancies)
                    atomgroup.setSegnames(np.char.strip(segnames))
                    atomgroup.setElements(np.char.strip(elements))
                    from prody.utilities.misctools import getMasses
                    atomgroup.setMasses(getMasses(np.char.strip(elements)))
                    if anisou is not None:
                        anisou.resize((acount, 6), refcheck=False)
                        atomgroup.setAnisous(anisou / 10000)
                    if siguij is not None:
                        siguij.resize((acount, 6), refcheck=False)
                        atomgroup.setAnistds(siguij / 10000)
                else:
                    charges.resize(acount, refcheck=False)
                    radii.resize(acount, refcheck=False)
                    atomgroup.setCharges(charges)
                    atomgroup.setRadii(radii)

                nmodel += 1
                n_atoms = acount
                acount = 0
                coordinates = np.zeros((n_atoms, 3), dtype=float)
                if altloc and altloc_torf:
                    _evalAltlocs(atomgroup, altloc, chainids, resnums,
                                 resnames, atomnames)
                    altloc = defaultdict(list)
                if END:
                    break
        elif isPDB and startswith == 'ANISOU':
            if anisou is None:
                anisou = True
                anisou = np.zeros((alength, 6),
                    dtype=ATOMIC_FIELDS['anisou'].dtype)
            try:
                index = acount - 1
                anisou[index, 0] = line[28:35]
                anisou[index, 1] = line[35:42]
                anisou[index, 2] = line[43:49]
                anisou[index, 3] = line[49:56]
                anisou[index, 4] = line[56:63]
                anisou[index, 5] = line[63:70]
            except:
                LOGGER.warn('failed to parse anisotropic temperature '
                    'factors at line {0}'.format(i))
        elif isPDB and startswith =='SIGUIJ':
            if siguij is None:
                siguij = np.zeros((alength, 6),
                    dtype=ATOMIC_FIELDS['siguij'].dtype)
            try:
                index = acount - 1
                siguij[index, 0] = line[28:35]
                siguij[index, 1] = line[35:42]
                siguij[index, 2] = line[43:49]
                siguij[index, 3] = line[49:56]
                siguij[index, 4] = line[56:63]
                siguij[index, 5] = line[63:70]
            except:
                LOGGER.warn('failed to parse standard deviations of '
                    'anisotropic temperature factors at line {0}'.format(i))
        elif startswith =='SIGATM':
            pass
        i += 1
    if onlycoords:
        if acount == atomgroup.numAtoms():
            coordsets[nmodel] = coordinates
            nmodel += 1
        del coordinates
        coordsets.resize((nmodel, atomgroup.numAtoms(), 3), refcheck=False)
        if addcoords:
            atomgroup.addCoordset(coordsets)
        else:
            atomgroup._setCoords(coordsets)
    elif not END:
        # this means last line was an ATOM line, so atomgroup is not decorated
        coordinates.resize((acount, 3), refcheck=False)
        if addcoords:
            atomgroup.addCoordset(coordinates)
        else:
            atomgroup._setCoords(coordinates)
        atomnames.resize(acount, refcheck=False)
        resnames.resize(acount, refcheck=False)
        resnums.resize(acount, refcheck=False)
        chainids.resize(acount, refcheck=False)
        hetero.resize(acount, refcheck=False)
        termini.resize(acount, refcheck=False)
        altlocs.resize(acount, refcheck=False)
        icodes.resize(acount, refcheck=False)
        serials.resize(acount, refcheck=False)
        if not only_subset:
            atomnames = np.char.strip(atomnames)
            resnames = np.char.strip(resnames)
        atomgroup.setNames(atomnames)
        atomgroup.setResnames(resnames)
        atomgroup.setResnums(resnums)
        atomgroup.setChids(chainids)
        atomgroup.setFlags('hetatm', hetero)
        atomgroup.setFlags('pdbter', termini)
        atomgroup.setAltlocs(altlocs)
        atomgroup.setIcodes(np.char.strip(icodes))
        atomgroup.setSerials(serials)
        if isPDB:
            if anisou is not None:
                anisou.resize((acount, 6), refcheck=False)
                atomgroup.setAnisous(anisou / 10000)
            if siguij is not None:
                siguij.resize((acount, 6), refcheck=False)
                atomgroup.setAnistds(siguij / 10000)
            bfactors.resize(acount, refcheck=False)
            occupancies.resize(acount, refcheck=False)
            segnames.resize(acount, refcheck=False)
            elements.resize(acount, refcheck=False)
            atomgroup.setSegnames(np.char.strip(segnames))
            atomgroup.setElements(np.char.strip(elements))
            from prody.utilities.misctools import getMasses
            atomgroup.setMasses(getMasses(np.char.strip(elements)))
            atomgroup.setBetas(bfactors)
            atomgroup.setOccupancies(occupancies)
        else:
            charges.resize(acount, refcheck=False)
            radii.resize(acount, refcheck=False)
            atomgroup.setCharges(charges)
            atomgroup.setRadii(radii)

    if altloc and altloc_torf:
        _evalAltlocs(atomgroup, altloc, chainids, resnums, resnames, atomnames)

    return atomgroup
Ejemplo n.º 2
0
    def getMasses(self):
        """get the mass atom. """
        from prody.utilities.misctools import getMasses

        return getMasses(self.getElement())
Ejemplo n.º 3
0
def _parsePDBLines(atomgroup,
                   lines,
                   split,
                   model,
                   chain,
                   subset,
                   altloc_torf,
                   format='PDB'):
    """Returns an AtomGroup. See also :func:`.parsePDBStream()`.

    :arg lines: PDB/PQR lines
    :arg split: starting index for coordinate data lines"""

    format = format.upper()
    if format == 'PDB':
        isPDB = True
    else:
        isPDB = False

    if subset:
        if subset == 'ca':
            subset = set(('CA', ))
        elif subset in 'bb':
            subset = flags.BACKBONE
        only_subset = True
        protein_resnames = flags.AMINOACIDS
    else:
        only_subset = False
    if chain is None:
        only_chains = False
    else:
        only_chains = True
    onlycoords = False
    n_atoms = atomgroup.numAtoms()
    if n_atoms > 0:
        asize = n_atoms
    else:
        # most PDB files contain less than 99999 atoms
        asize = min(len(lines) - split, 99999)
    addcoords = False
    if atomgroup.numCoordsets() > 0:
        addcoords = True
    alength = asize
    coordinates = np.zeros((asize, 3), dtype=float)
    atomnames = np.zeros(asize, dtype=ATOMIC_FIELDS['name'].dtype)
    resnames = np.zeros(asize, dtype=ATOMIC_FIELDS['resname'].dtype)
    resnums = np.zeros(asize, dtype=ATOMIC_FIELDS['resnum'].dtype)
    chainids = np.zeros(asize, dtype=ATOMIC_FIELDS['chain'].dtype)
    hetero = np.zeros(asize, dtype=bool)
    termini = np.zeros(asize, dtype=bool)
    altlocs = np.zeros(asize, dtype=ATOMIC_FIELDS['altloc'].dtype)
    icodes = np.zeros(asize, dtype=ATOMIC_FIELDS['icode'].dtype)
    serials = np.zeros(asize, dtype=ATOMIC_FIELDS['serial'].dtype)
    charges = np.zeros(asize, dtype=ATOMIC_FIELDS['charge'].dtype)
    if isPDB:
        segnames = np.zeros(asize, dtype=ATOMIC_FIELDS['segment'].dtype)
        elements = np.zeros(asize, dtype=ATOMIC_FIELDS['element'].dtype)
        bfactors = np.zeros(asize, dtype=ATOMIC_FIELDS['beta'].dtype)
        occupancies = np.zeros(asize, dtype=ATOMIC_FIELDS['occupancy'].dtype)
        anisou = None
        siguij = None
    else:
        radii = np.zeros(asize, dtype=ATOMIC_FIELDS['radius'].dtype)

    asize = 2000  # increase array length by this much when needed

    start = split
    stop = len(lines)
    nmodel = 0
    # if a specific model is requested, skip lines until that one
    if isPDB and model is not None and model != 1:
        for i in range(split, len(lines)):
            if lines[i][:5] == 'MODEL':
                nmodel += 1
                if model == nmodel:
                    start = i + 1
                    stop = len(lines)
                    break
        if nmodel != model:
            raise PDBParseError('model {0} is not found'.format(model))
    if isinstance(altloc_torf, str):
        if altloc_torf.strip() != 'A':
            LOGGER.info('Parsing alternate locations {0}.'.format(altloc_torf))
            which_altlocs = ' ' + ''.join(altloc_torf.split())
        else:
            which_altlocs = ' A'
        altloc_torf = False
    else:
        which_altlocs = ' A'
        altloc_torf = True

    acount = 0
    coordsets = None
    altloc = defaultdict(list)
    i = start
    END = False
    while i < stop:
        line = lines[i]
        if not isPDB:
            fields = line.split()
            if len(fields) == 10:
                fields.insert(4, '')
            elif len(fields) != 11:
                LOGGER.warn(
                    'wrong number of fields for PQR format at line %d' % i)
                i += 1
                continue

        if isPDB:
            startswith = line[0:6].strip()
        else:
            startswith = fields[0]

        if startswith == 'ATOM' or startswith == 'HETATM':
            if isPDB:
                atomname = line[12:16].strip()
                resname = line[17:21].strip()
            else:
                atomname = fields[2]
                resname = fields[3]

            if only_subset:
                if not (atomname in subset and resname in protein_resnames):
                    i += 1
                    continue

            if isPDB:
                chid = line[21]
            else:
                chid = fields[4]

            if only_chains:
                if not chid in chain:
                    i += 1
                    continue

            if isPDB:
                alt = line[16]
                if alt not in which_altlocs:
                    altloc[alt].append((line, i))
                    i += 1
                    continue
            else:
                alt = ' '
            try:
                if isPDB:
                    coordinates[acount, 0] = line[30:38]
                    coordinates[acount, 1] = line[38:46]
                    coordinates[acount, 2] = line[46:54]
                else:
                    coordinates[acount, 0] = fields[6]
                    coordinates[acount, 1] = fields[7]
                    coordinates[acount, 2] = fields[8]
            except:
                if acount >= n_atoms > 0:
                    if nmodel == 0:
                        raise ValueError(format + 'file and AtomGroup ag must '
                                         'have same number of atoms')
                    LOGGER.warn(
                        'Discarding model {0}, which contains {1} more '
                        'atoms than first model does.'.format(
                            nmodel + 1, acount - n_atoms + 1))
                    acount = 0
                    nmodel += 1
                    coordinates = np.zeros((n_atoms, 3), dtype=float)
                    if isPDB:
                        while lines[i][:6] != 'ENDMDL':
                            i += 1
                else:
                    raise PDBParseError('invalid or missing coordinate(s) at '
                                        'line {0}'.format(i + 1))
            if onlycoords:
                acount += 1
                i += 1
                continue

            try:
                serials[acount] = int(line[6:11]) if isPDB else int(fields[1])
            except ValueError:
                try:
                    serials[acount] = int(line[6:11], 16) if isPDB else int(
                        fields[1], 16)
                except ValueError:
                    LOGGER.warn(
                        'failed to parse serial number in line {0}'.format(i))
                    serials[acount] = serials[acount - 1] + 1
            altlocs[acount] = alt
            atomnames[acount] = atomname
            resnames[acount] = resname
            chainids[acount] = chid
            if isPDB:
                resnums[acount] = line[22:26]
                icodes[acount] = line[26]
            else:
                resnum = fields[5]
                if resnum[-1].isalpha():
                    icode = resnum[-1]
                else:
                    icode = ' '
                resnums[acount] = resnum
                icodes[acount] = icode

            if isPDB:
                try:
                    occupancies[acount] = line[54:60]
                except:
                    LOGGER.warn(
                        'failed to parse occupancy at line {0}'.format(i))
                try:
                    bfactors[acount] = line[60:66]
                except:
                    LOGGER.warn(
                        'failed to parse beta-factor at line {0}'.format(i))
                hetero[acount] = startswith[0] == 'H'
                segnames[acount] = line[72:76]
                elements[acount] = line[76:78]
                try:
                    charges[acount] = int(line[79] + line[78])
                except:
                    charges[acount] = 0
            else:
                try:
                    charges[acount] = fields[9]
                except:
                    LOGGER.warn('failed to parse charge at line {0}'.format(i))
                try:
                    radii[acount] = fields[10]
                except:
                    LOGGER.warn('failed to parse radius at line {0}'.format(i))
            acount += 1
            if n_atoms == 0 and acount >= alength:
                # if arrays are short extend them with zeros
                alength += asize
                coordinates = np.concatenate(
                    (coordinates, np.zeros((asize, 3), float)))
                atomnames = np.concatenate(
                    (atomnames, np.zeros(asize, ATOMIC_FIELDS['name'].dtype)))
                resnames = np.concatenate(
                    (resnames, np.zeros(asize,
                                        ATOMIC_FIELDS['resname'].dtype)))
                resnums = np.concatenate(
                    (resnums, np.zeros(asize, ATOMIC_FIELDS['resnum'].dtype)))
                chainids = np.concatenate(
                    (chainids, np.zeros(asize, ATOMIC_FIELDS['chain'].dtype)))
                hetero = np.concatenate((hetero, np.zeros(asize, bool)))
                termini = np.concatenate((termini, np.zeros(asize, bool)))
                altlocs = np.concatenate(
                    (altlocs, np.zeros(asize, ATOMIC_FIELDS['altloc'].dtype)))
                icodes = np.concatenate(
                    (icodes, np.zeros(asize, ATOMIC_FIELDS['icode'].dtype)))
                serials = np.concatenate(
                    (serials, np.zeros(asize, ATOMIC_FIELDS['serial'].dtype)))
                if isPDB:
                    bfactors = np.concatenate(
                        (bfactors, np.zeros(asize,
                                            ATOMIC_FIELDS['beta'].dtype)))
                    occupancies = np.concatenate(
                        (occupancies,
                         np.zeros(asize, ATOMIC_FIELDS['occupancy'].dtype)))
                    segnames = np.concatenate(
                        (segnames,
                         np.zeros(asize, ATOMIC_FIELDS['segment'].dtype)))
                    elements = np.concatenate(
                        (elements,
                         np.zeros(asize, ATOMIC_FIELDS['element'].dtype)))
                    if anisou is not None:
                        anisou = np.concatenate(
                            (anisou,
                             np.zeros((asize, 6),
                                      ATOMIC_FIELDS['anisou'].dtype)))
                    if siguij is not None:
                        siguij = np.concatenate(
                            (siguij,
                             np.zeros((asize, 6),
                                      ATOMIC_FIELDS['siguij'].dtype)))
                else:
                    charges = np.concatenate(
                        (charges, np.zeros(asize,
                                           ATOMIC_FIELDS['charge'].dtype)))
                    radii = np.concatenate(
                        (radii, np.zeros(asize,
                                         ATOMIC_FIELDS['radius'].dtype)))
        #elif startswith == 'END   ' or startswith == 'CONECT':
        #    i += 1
        #    break
        elif not onlycoords and (startswith == 'TER   '
                                 or startswith.strip() == 'TER'):
            termini[acount - 1] = True
        elif startswith == 'ENDMDL' or startswith[:3] == 'END':
            if acount == 0:
                # If there is no atom record between ENDMDL & END skip to next
                i += 1
                continue
            if model is not None:
                i += 1
                break
            diff = stop - i - 1
            END = diff < acount
            if coordsets is not None:
                END = END or nmodel >= coordsets.shape[0]
            if onlycoords:
                if acount < n_atoms:
                    LOGGER.warn('Discarding model {0}, which contains '
                                '{1} fewer atoms than the first model '
                                'does.'.format(nmodel + 1, n_atoms - acount))
                else:
                    coordsets[nmodel] = coordinates
                    nmodel += 1
                acount = 0
                if not END:
                    coordinates = coordsets[nmodel]
            else:
                if acount != n_atoms > 0:
                    raise ValueError('PDB file and AtomGroup ag must have '
                                     'same number of atoms')
                # this is where to decide if more coordsets should be expected
                if END:
                    coordinates.resize((acount, 3), refcheck=False)
                    if addcoords:
                        atomgroup.addCoordset(coordinates)
                    else:
                        atomgroup._setCoords(coordinates)
                else:
                    coordsets = np.zeros((int(diff // acount + 1), acount, 3))
                    coordsets[0] = coordinates[:acount]
                    onlycoords = True
                atomnames.resize(acount, refcheck=False)
                resnames.resize(acount, refcheck=False)
                resnums.resize(acount, refcheck=False)
                chainids.resize(acount, refcheck=False)
                hetero.resize(acount, refcheck=False)
                termini.resize(acount, refcheck=False)
                altlocs.resize(acount, refcheck=False)
                icodes.resize(acount, refcheck=False)
                serials.resize(acount, refcheck=False)
                if not only_subset:
                    atomnames = np.char.strip(atomnames)
                    resnames = np.char.strip(resnames)
                atomgroup.setNames(atomnames)
                atomgroup.setResnames(resnames)
                atomgroup.setResnums(resnums)
                atomgroup.setChids(chainids)
                atomgroup.setFlags('hetatm', hetero)
                atomgroup.setFlags('pdbter', termini)
                atomgroup.setAltlocs(altlocs)
                atomgroup.setIcodes(np.char.strip(icodes))
                atomgroup.setSerials(serials)
                if isPDB:
                    bfactors.resize(acount, refcheck=False)
                    occupancies.resize(acount, refcheck=False)
                    segnames.resize(acount, refcheck=False)
                    elements.resize(acount, refcheck=False)
                    atomgroup.setBetas(bfactors)
                    atomgroup.setOccupancies(occupancies)
                    atomgroup.setSegnames(np.char.strip(segnames))
                    atomgroup.setElements(np.char.strip(elements))
                    from prody.utilities.misctools import getMasses
                    atomgroup.setMasses(getMasses(np.char.strip(elements)))
                    if anisou is not None:
                        anisou.resize((acount, 6), refcheck=False)
                        atomgroup.setAnisous(anisou / 10000)
                    if siguij is not None:
                        siguij.resize((acount, 6), refcheck=False)
                        atomgroup.setAnistds(siguij / 10000)
                else:
                    charges.resize(acount, refcheck=False)
                    radii.resize(acount, refcheck=False)
                    atomgroup.setCharges(charges)
                    atomgroup.setRadii(radii)

                nmodel += 1
                n_atoms = acount
                acount = 0
                coordinates = np.zeros((n_atoms, 3), dtype=float)
                if altloc and altloc_torf:
                    _evalAltlocs(atomgroup, altloc, chainids, resnums,
                                 resnames, atomnames)
                    altloc = defaultdict(list)
                if END:
                    break
        elif isPDB and startswith == 'ANISOU':
            if anisou is None:
                anisou = True
                anisou = np.zeros((alength, 6),
                                  dtype=ATOMIC_FIELDS['anisou'].dtype)
            try:
                index = acount - 1
                anisou[index, 0] = line[28:35]
                anisou[index, 1] = line[35:42]
                anisou[index, 2] = line[43:49]
                anisou[index, 3] = line[49:56]
                anisou[index, 4] = line[56:63]
                anisou[index, 5] = line[63:70]
            except:
                LOGGER.warn('failed to parse anisotropic temperature '
                            'factors at line {0}'.format(i))
        elif isPDB and startswith == 'SIGUIJ':
            if siguij is None:
                siguij = np.zeros((alength, 6),
                                  dtype=ATOMIC_FIELDS['siguij'].dtype)
            try:
                index = acount - 1
                siguij[index, 0] = line[28:35]
                siguij[index, 1] = line[35:42]
                siguij[index, 2] = line[43:49]
                siguij[index, 3] = line[49:56]
                siguij[index, 4] = line[56:63]
                siguij[index, 5] = line[63:70]
            except:
                LOGGER.warn(
                    'failed to parse standard deviations of '
                    'anisotropic temperature factors at line {0}'.format(i))
        elif startswith == 'SIGATM':
            pass
        i += 1
    if onlycoords:
        if acount == atomgroup.numAtoms():
            coordsets[nmodel] = coordinates
            nmodel += 1
        del coordinates
        coordsets.resize((nmodel, atomgroup.numAtoms(), 3), refcheck=False)
        if addcoords:
            atomgroup.addCoordset(coordsets)
        else:
            atomgroup._setCoords(coordsets)
    elif not END:
        # this means last line was an ATOM line, so atomgroup is not decorated
        coordinates.resize((acount, 3), refcheck=False)
        if addcoords:
            atomgroup.addCoordset(coordinates)
        else:
            atomgroup._setCoords(coordinates)
        atomnames.resize(acount, refcheck=False)
        resnames.resize(acount, refcheck=False)
        resnums.resize(acount, refcheck=False)
        chainids.resize(acount, refcheck=False)
        hetero.resize(acount, refcheck=False)
        termini.resize(acount, refcheck=False)
        altlocs.resize(acount, refcheck=False)
        icodes.resize(acount, refcheck=False)
        serials.resize(acount, refcheck=False)
        if not only_subset:
            atomnames = np.char.strip(atomnames)
            resnames = np.char.strip(resnames)
        atomgroup.setNames(atomnames)
        atomgroup.setResnames(resnames)
        atomgroup.setResnums(resnums)
        atomgroup.setChids(chainids)
        atomgroup.setFlags('hetatm', hetero)
        atomgroup.setFlags('pdbter', termini)
        atomgroup.setAltlocs(altlocs)
        atomgroup.setIcodes(np.char.strip(icodes))
        atomgroup.setSerials(serials)
        if isPDB:
            if anisou is not None:
                anisou.resize((acount, 6), refcheck=False)
                atomgroup.setAnisous(anisou / 10000)
            if siguij is not None:
                siguij.resize((acount, 6), refcheck=False)
                atomgroup.setAnistds(siguij / 10000)
            bfactors.resize(acount, refcheck=False)
            occupancies.resize(acount, refcheck=False)
            segnames.resize(acount, refcheck=False)
            elements.resize(acount, refcheck=False)
            atomgroup.setSegnames(np.char.strip(segnames))
            atomgroup.setElements(np.char.strip(elements))
            from prody.utilities.misctools import getMasses
            atomgroup.setMasses(getMasses(np.char.strip(elements)))
            atomgroup.setBetas(bfactors)
            atomgroup.setOccupancies(occupancies)
        else:
            charges.resize(acount, refcheck=False)
            radii.resize(acount, refcheck=False)
            atomgroup.setCharges(charges)
            atomgroup.setRadii(radii)

    if altloc and altloc_torf:
        _evalAltlocs(atomgroup, altloc, chainids, resnums, resnames, atomnames)

    return atomgroup
Ejemplo n.º 4
0
def _parseMMCIFLines(atomgroup, lines, model, chain, subset, altloc_torf,
                     header):
    """Returns an AtomGroup. See also :func:`.parsePDBStream()`.

    :arg lines: mmCIF lines
    """

    if subset is not None:
        if subset == 'ca':
            subset = set(('CA', ))
        elif subset in 'bb':
            subset = flags.BACKBONE
        protein_resnames = flags.AMINOACIDS

    asize = 0
    i = 0
    models = []
    nModels = 0
    fields = {}
    fieldCounter = -1
    foundAtomBlock = False
    doneAtomBlock = False
    while not doneAtomBlock:
        line = lines[i]
        if line[:11] == '_atom_site.':
            fieldCounter += 1
            fields[line.split('.')[1].strip()] = fieldCounter

        if line.startswith('ATOM') or line.startswith('HETATM'):
            if not foundAtomBlock:
                foundAtomBlock = True
                start = i
            models.append(line.split()[fields['pdbx_PDB_model_num']])
            if models[asize] != models[asize - 1]:
                nModels += 1
            asize += 1
        else:
            if foundAtomBlock:
                doneAtomBlock = True
                stop = i
        i += 1
    if nModels == 0:
        nModels = 1

    if model is not None and model != 1:
        for i in range(start, stop):
            if str(models[i]) != model and str(models[i + 1]) == model:
                start = i + 1
            if str(models[i]) == model and str(models[i + 1]) != model:
                stop = i + 1
                break
        if not str(model) in models:
            raise mmCIFParseError('model {0} is not found'.format(model))

    addcoords = False
    if atomgroup.numCoordsets() > 0:
        addcoords = True

    if isinstance(altloc_torf, str):
        if altloc_torf.strip() != 'A':
            LOGGER.info('Parsing alternate locations {0}.'.format(altloc_torf))
            which_altlocs = '.' + ''.join(altloc_torf.split())
        else:
            which_altlocs = '.A'
        altloc_torf = False
    else:
        which_altlocs = '.A'
        altloc_torf = True

    coordinates = np.zeros((asize, 3), dtype=float)
    atomnames = np.zeros(asize, dtype=ATOMIC_FIELDS['name'].dtype)
    resnames = np.zeros(asize, dtype=ATOMIC_FIELDS['resname'].dtype)
    resnums = np.zeros(asize, dtype=ATOMIC_FIELDS['resnum'].dtype)
    chainids = np.zeros(asize, dtype=ATOMIC_FIELDS['chain'].dtype)
    segnames = np.zeros(asize, dtype=ATOMIC_FIELDS['segment'].dtype)
    hetero = np.zeros(asize, dtype=bool)
    termini = np.zeros(asize, dtype=bool)
    altlocs = np.zeros(asize, dtype=ATOMIC_FIELDS['altloc'].dtype)
    icodes = np.zeros(asize, dtype=ATOMIC_FIELDS['icode'].dtype)
    serials = np.zeros(asize, dtype=ATOMIC_FIELDS['serial'].dtype)
    elements = np.zeros(asize, dtype=ATOMIC_FIELDS['element'].dtype)
    bfactors = np.zeros(asize, dtype=ATOMIC_FIELDS['beta'].dtype)
    occupancies = np.zeros(asize, dtype=ATOMIC_FIELDS['occupancy'].dtype)

    n_atoms = atomgroup.numAtoms()
    if n_atoms > 0:
        asize = n_atoms

    acount = 0
    for line in lines[start:stop]:
        startswith = line.split()[fields['group_PDB']]

        atomname = line.split()[fields['auth_atom_id']]
        resname = line.split()[fields['auth_comp_id']]

        if subset is not None:
            if not (atomname in subset and resname in protein_resnames):
                continue

        chID = line.split()[fields['auth_asym_id']]
        if chain is not None:
            if isinstance(chain, str):
                chain = chain.split(',')
            if not chID in chain:
                continue

        segID = line.split()[fields['label_asym_id']]

        alt = line.split()[fields['label_alt_id']]
        if alt not in which_altlocs:
            continue

        if model is not None:
            if int(models[acount]) < model:
                continue
            elif int(models[acount]) > model:
                break

        coordinates[acount] = [
            line.split()[fields['Cartn_x']],
            line.split()[fields['Cartn_y']],
            line.split()[fields['Cartn_z']]
        ]
        atomnames[acount] = atomname
        resnames[acount] = resname
        resnums[acount] = line.split()[fields['auth_seq_id']]
        chainids[acount] = chID
        segnames[acount] = segID
        hetero[acount] = startswith == 'HETATM'  # True or False

        if chainids[acount] != chainids[acount - 1]:
            termini[acount - 1] = True

        altlocs[acount] = alt
        icodes[acount] = line.split()[fields['pdbx_PDB_ins_code']]

        if icodes[acount] == '?':
            icodes[acount] = ''

        serials[acount] = line.split()[fields['id']]
        elements[acount] = line.split()[fields['type_symbol']]
        bfactors[acount] = line.split()[fields['B_iso_or_equiv']]
        occupancies[acount] = line.split()[fields['occupancy']]

        acount += 1

    if model is not None:
        nModels = 1

    modelSize = acount // nModels

    if addcoords:
        atomgroup.addCoordset(coordinates[:modelSize])
    else:
        atomgroup._setCoords(coordinates[:modelSize])

    atomgroup.setNames(atomnames[:modelSize])
    atomgroup.setResnames(resnames[:modelSize])
    atomgroup.setResnums(resnums[:modelSize])
    atomgroup.setSegnames(segnames[:modelSize])
    atomgroup.setChids(chainids[:modelSize])
    atomgroup.setFlags('hetatm', hetero[:modelSize])
    atomgroup.setFlags('pdbter', termini[:modelSize])
    atomgroup.setAltlocs(altlocs[:modelSize])
    atomgroup.setIcodes(icodes[:modelSize])
    atomgroup.setSerials(serials[:modelSize])

    atomgroup.setElements(elements[:modelSize])
    from prody.utilities.misctools import getMasses
    atomgroup.setMasses(getMasses(elements[:modelSize]))
    atomgroup.setBetas(bfactors[:modelSize])
    atomgroup.setOccupancies(occupancies[:modelSize])

    for n in range(1, nModels):
        atomgroup.addCoordset(coordinates[n * modelSize:(n + 1) * modelSize])

    if header:
        header = parseSTARLines(lines[:start - fieldCounter - 2] +
                                lines[stop:],
                                shlex=True)
        return atomgroup, header

    return atomgroup
Ejemplo n.º 5
0
def _parseMMCIFLines(atomgroup, lines, model, chain, subset,
                     altloc_torf):
    """Returns an AtomGroup. See also :func:`.parsePDBStream()`.

    :arg lines: mmCIF lines
    """

    if subset is not None:
        if subset == 'ca':
            subset = set(('CA',))
        elif subset in 'bb':
            subset = flags.BACKBONE
        protein_resnames = flags.AMINOACIDS

    asize = 0
    i = 0
    models = []
    nModels = 0
    fields = OrderedDict()
    fieldCounter = -1
    foundAtomBlock = False
    doneAtomBlock = False
    start = 0
    stop = 0
    while not doneAtomBlock:
        line = lines[i]
        if line[:11] == '_atom_site.':
            fieldCounter += 1
            fields[line.split('.')[1].strip()] = fieldCounter

        if line.startswith('ATOM ') or line.startswith('HETATM'):
            if not foundAtomBlock:
                foundAtomBlock = True
                start = i
            models.append(line.split()[fields['pdbx_PDB_model_num']])
            if len(models) == 1 or (models[asize] != models[asize-1]):
                nModels += 1
            asize += 1
        else:
            if foundAtomBlock:
                doneAtomBlock = True
                stop = i
        i += 1

    if model is not None and model != 1:
        for i in range(start, stop):
            if str(models[i]) != model and str(models[i+1]) == model:
                start = i+1
            if str(models[i]) == model and str(models[i+1]) != model:
                stop = i+1
                break
        if not str(model) in models:
            raise mmCIFParseError('model {0} is not found'.format(model))

    addcoords = False
    if atomgroup.numCoordsets() > 0:
        addcoords = True

    if isinstance(altloc_torf, str):
        if altloc_torf == 'all':
            which_altlocs = 'all'
        elif altloc_torf.strip() != 'A':
            LOGGER.info('Parsing alternate locations {0}.'
                        .format(altloc_torf))
            which_altlocs = '.' + ''.join(altloc_torf.split())
        else:
            which_altlocs = '.A'
        altloc_torf = False
    else:
        which_altlocs = '.A'
        altloc_torf = True

    coordinates = np.zeros((asize, 3), dtype=float)
    atomnames = np.zeros(asize, dtype=ATOMIC_FIELDS['name'].dtype)
    resnames = np.zeros(asize, dtype=ATOMIC_FIELDS['resname'].dtype)
    resnums = np.zeros(asize, dtype=ATOMIC_FIELDS['resnum'].dtype)
    chainids = np.zeros(asize, dtype=ATOMIC_FIELDS['chain'].dtype)
    segnames = np.zeros(asize, dtype=ATOMIC_FIELDS['segment'].dtype)
    hetero = np.zeros(asize, dtype=bool)
    termini = np.zeros(asize, dtype=bool)
    altlocs = np.zeros(asize, dtype=ATOMIC_FIELDS['altloc'].dtype)
    icodes = np.zeros(asize, dtype=ATOMIC_FIELDS['icode'].dtype)
    serials = np.zeros(asize, dtype=ATOMIC_FIELDS['serial'].dtype)
    elements = np.zeros(asize, dtype=ATOMIC_FIELDS['element'].dtype)
    bfactors = np.zeros(asize, dtype=ATOMIC_FIELDS['beta'].dtype)
    occupancies = np.zeros(asize, dtype=ATOMIC_FIELDS['occupancy'].dtype)

    n_atoms = atomgroup.numAtoms()
    if n_atoms > 0:
        asize = n_atoms

    acount = 0
    for line in lines[start:stop]:
        startswith = line.split()[fields['group_PDB']]

        atomname = line.split()[fields['auth_atom_id']]
        if atomname.startswith('"') and atomname.endswith('"'):
            atomname = atomname[1:-1]
        resname = line.split()[fields['auth_comp_id']]

        if subset is not None:
            if not (atomname in subset and resname in protein_resnames):
                continue

        chID = line.split()[fields['auth_asym_id']]
        if chain is not None:
            if isinstance(chain, str):
                chain = chain.split(',')
            if not chID in chain:
                continue

        segID = line.split()[fields['label_asym_id']]

        alt = line.split()[fields['label_alt_id']]
        if alt not in which_altlocs and which_altlocs != 'all':
            continue

        if alt == '.':
            alt = ' '

        if model is not None:
            if int(models[acount]) < model:
                continue
            elif int(models[acount]) > model:
                break

        coordinates[acount] = [line.split()[fields['Cartn_x']],
                               line.split()[fields['Cartn_y']],
                               line.split()[fields['Cartn_z']]]
        atomnames[acount] = atomname
        resnames[acount] = resname
        resnums[acount] = line.split()[fields['auth_seq_id']]
        chainids[acount] = chID
        segnames[acount] = segID
        hetero[acount] = startswith == 'HETATM' # True or False

        if chainids[acount] != chainids[acount-1]: 
            termini[acount-1] = True

        altlocs[acount] = alt
        icodes[acount] = line.split()[fields['pdbx_PDB_ins_code']]

        if icodes[acount] == '?': 
            icodes[acount] = ''

        serials[acount] = line.split()[fields['id']]
        elements[acount] = line.split()[fields['type_symbol']]
        bfactors[acount] = line.split()[fields['B_iso_or_equiv']]
        occupancies[acount] = line.split()[fields['occupancy']]

        acount += 1

    if model is not None:
        nModels = 1

    modelSize = acount//nModels

    if addcoords:
        atomgroup.addCoordset(coordinates[:modelSize])
    else:
        atomgroup._setCoords(coordinates[:modelSize])

    atomgroup.setNames(atomnames[:modelSize])
    atomgroup.setResnames(resnames[:modelSize])
    atomgroup.setResnums(resnums[:modelSize])
    atomgroup.setSegnames(segnames[:modelSize])
    atomgroup.setChids(chainids[:modelSize])
    atomgroup.setFlags('hetatm', hetero[:modelSize])
    atomgroup.setFlags('pdbter', termini[:modelSize])
    atomgroup.setAltlocs(altlocs[:modelSize])
    atomgroup.setIcodes(icodes[:modelSize])
    atomgroup.setSerials(serials[:modelSize])

    atomgroup.setElements(elements[:modelSize])
    from prody.utilities.misctools import getMasses
    atomgroup.setMasses(getMasses(elements[:modelSize]))
    atomgroup.setBetas(bfactors[:modelSize])
    atomgroup.setOccupancies(occupancies[:modelSize])

    anisou = None
    siguij = None
    try:
        data = parseSTARSection(lines, "_atom_site_anisotrop")
        x = data[0] # check if data has anything in it
    except IndexError:
        LOGGER.warn("No anisotropic B factors found")
    else:
        anisou = np.zeros((acount, 6),
                          dtype=ATOMIC_FIELDS['anisou'].dtype)
        
        if "_atom_site_anisotrop.U[1][1]_esd" in data[0].keys():
            siguij = np.zeros((alength, 6),
                dtype=ATOMIC_FIELDS['siguij'].dtype)

        for entry in data:
            try:
                index = np.where(atomgroup.getSerials() == int(
                    entry["_atom_site_anisotrop.id"]))[0][0]
            except:
                continue
            
            anisou[index, 0] = entry['_atom_site_anisotrop.U[1][1]']
            anisou[index, 1] = entry['_atom_site_anisotrop.U[2][2]']
            anisou[index, 2] = entry['_atom_site_anisotrop.U[3][3]']
            anisou[index, 3] = entry['_atom_site_anisotrop.U[1][2]']
            anisou[index, 4] = entry['_atom_site_anisotrop.U[1][3]']
            anisou[index, 5] = entry['_atom_site_anisotrop.U[2][3]'] 

            if siguij is not None:
                siguij[index, 0] = entry['_atom_site_anisotrop.U[1][1]_esd']
                siguij[index, 1] = entry['_atom_site_anisotrop.U[2][2]_esd']
                siguij[index, 2] = entry['_atom_site_anisotrop.U[3][3]_esd']
                siguij[index, 3] = entry['_atom_site_anisotrop.U[1][2]_esd']
                siguij[index, 4] = entry['_atom_site_anisotrop.U[1][3]_esd']
                siguij[index, 5] = entry['_atom_site_anisotrop.U[2][3]_esd']

        atomgroup.setAnisous(anisou) # no division needed anymore
        atomgroup.setAnistds(siguij) # no division needed anymore

    for n in range(1, nModels):
        atomgroup.addCoordset(coordinates[n*modelSize:(n+1)*modelSize])

    return atomgroup
Ejemplo n.º 6
0
 def getMasses(self):
     """get the mass atom. """
     from prody.utilities.misctools import getMasses
     
     return getMasses(self.getElement())
Ejemplo n.º 7
0
def _parseCIFLines(atomgroup, lines, model, chain, subset,
                   altloc_torf):
    """Returns an AtomGroup. See also :func:`.parsePDBStream()`.

    :arg lines: CIF lines
    """

    if subset is not None:
        if subset == 'ca':
            subset = set(('CA',))
        elif subset in 'bb':
            subset = flags.BACKBONE
        protein_resnames = flags.AMINOACIDS

    asize = 0
    i = 0
    models = []
    nModels = 0
    fields = {}
    fieldCounter = -1
    foundModelNumFieldID = False
    foundAtomBlock = False
    doneAtomBlock = False
    while not doneAtomBlock:
        line = lines[i]
        if line[:11] == '_atom_site.':
            fieldCounter += 1
            fields[line.split('.')[1].strip()] = fieldCounter

        if line.startswith('ATOM') or line.startswith('HETATM'):
            if not foundAtomBlock:
                foundAtomBlock = True
                start = i
            models.append(line.split()[fields['pdbx_PDB_model_num']])
            if models[asize] != models[asize-1]:
                nModels += 1
            asize += 1
        else:
            if foundAtomBlock:
                doneAtomBlock = True
        i += 1
    stop = i-1
    if nModels == 0: nModels = 1

    if model is not None and model != 1:
        for i in range(start, stop):
            if str(models[i]) != model and str(models[i+1]) == model:
                start = i+1
            if str(models[i]) == model and str(models[i+1]) != model:
                stop = i+1
                break
        if not str(model) in models:
            raise CIFParseError('model {0} is not found'.format(model))

    addcoords = False
    if atomgroup.numCoordsets() > 0:
        addcoords = True
 
    if isinstance(altloc_torf, str):
        if altloc_torf.strip() != 'A':
            LOGGER.info('Parsing alternate locations {0}.'
                        .format(altloc_torf))
            which_altlocs = '.' + ''.join(altloc_torf.split())
        else:
            which_altlocs = '.A'
        altloc_torf = False
    else:
        which_altlocs = '.A'
        altloc_torf = True

    coordinates = np.zeros((asize, 3), dtype=float)
    atomnames = np.zeros(asize, dtype=ATOMIC_FIELDS['name'].dtype)
    resnames = np.zeros(asize, dtype=ATOMIC_FIELDS['resname'].dtype)
    resnums = np.zeros(asize, dtype=ATOMIC_FIELDS['resnum'].dtype)
    chainids = np.zeros(asize, dtype=ATOMIC_FIELDS['chain'].dtype)
    hetero = np.zeros(asize, dtype=bool)
    termini = np.zeros(asize, dtype=bool)
    altlocs = np.zeros(asize, dtype=ATOMIC_FIELDS['altloc'].dtype)
    icodes = np.zeros(asize, dtype=ATOMIC_FIELDS['icode'].dtype)
    serials = np.zeros(asize, dtype=ATOMIC_FIELDS['serial'].dtype)
    elements = np.zeros(asize, dtype=ATOMIC_FIELDS['element'].dtype)
    bfactors = np.zeros(asize, dtype=ATOMIC_FIELDS['beta'].dtype)
    occupancies = np.zeros(asize, dtype=ATOMIC_FIELDS['occupancy'].dtype)

    n_atoms = atomgroup.numAtoms()
    if n_atoms > 0:
        asize = n_atoms

    acount = 0
    for line in lines[start:stop]:
        startswith = line.split()[fields['group_PDB']]

        atomname = line.split()[fields['auth_atom_id']]
        resname = line.split()[fields['auth_comp_id']]

        if subset is not None:
            if not (atomname in subset and resname in protein_resnames):
                continue

        chID = line.split()[fields['auth_asym_id']]
        if chain is not None:
            if not chID in chain:
                continue

        alt = line.split()[fields['label_alt_id']]
        if alt not in which_altlocs:
            continue

        if model is not None:
            if int(models[acount]) < model:
                continue
            elif int(models[acount]) > model:
                break

        coordinates[acount] = [line.split()[fields['Cartn_x']], \
                              line.split()[fields['Cartn_y']], \
                              line.split()[fields['Cartn_z']]]
        atomnames[acount] = atomname
        resnames[acount] = resname
        resnums[acount] = line.split()[fields['auth_seq_id']]
        chainids[acount] = chID
        hetero[acount] = startswith == 'HETATM' # True or False
        if chainids[acount] != chainids[acount-1]: termini[acount] = True
        altlocs[acount] = alt
        icodes[acount] = line.split()[fields['pdbx_PDB_ins_code']]
        if icodes[acount] == '?': icodes[acount] = ''
        serials[acount] = line.split()[fields['id']]
        elements[acount] = line.split()[fields['type_symbol']]
        bfactors[acount] = line.split()[fields['B_iso_or_equiv']]
        occupancies[acount] = line.split()[fields['occupancy']]
        
        acount += 1

    if model is not None:
        nModels = 1

    modelSize = acount//nModels

    if addcoords:
        atomgroup.addCoordset(coordinates[:modelSize])
    else:
        atomgroup._setCoords(coordinates[:modelSize])

    atomgroup.setNames(atomnames[:modelSize])
    atomgroup.setResnames(resnames[:modelSize])
    atomgroup.setResnums(resnums[:modelSize])
    atomgroup.setChids(chainids[:modelSize])
    atomgroup.setFlags('hetatm', hetero[:modelSize])
    atomgroup.setFlags('pdbter', termini[:modelSize])
    atomgroup.setAltlocs(altlocs[:modelSize])
    atomgroup.setIcodes(icodes[:modelSize])
    atomgroup.setSerials(serials[:modelSize])

    atomgroup.setElements(elements[:modelSize])
    from prody.utilities.misctools import getMasses
    atomgroup.setMasses(getMasses(elements[:modelSize]))
    atomgroup.setBetas(bfactors[:modelSize])
    atomgroup.setOccupancies(occupancies[:modelSize])

    for n in range(1,nModels):
        atomgroup.addCoordset(coordinates[n*modelSize:(n+1)*modelSize])

    return atomgroup
Ejemplo n.º 8
0
def _parseCIFLines(atomgroup, lines, model, chain, subset,
                   altloc_torf):
    """Returns an AtomGroup. See also :func:`.parsePDBStream()`.

    :arg lines: CIF lines
    """

    if subset is not None:
        if subset == 'ca':
            subset = set(('CA',))
        elif subset in 'bb':
            subset = flags.BACKBONE
        protein_resnames = flags.AMINOACIDS

    asize = 0
    i = 0
    models = []
    nModels = 0
    foundAtomBlock = False
    doneAtomBlock = False
    while not doneAtomBlock:
        line = lines[i]
        if line[:6] == 'ATOM  ' or line[:6] == 'HETATM':
            if not foundAtomBlock:
                foundAtomBlock = True
                start = i
            models.append(line.split()[25]) # pdbx_PDB_model_num
            if models[asize] != models[asize-1]:
                nModels += 1
            asize += 1
        else:
            if foundAtomBlock:
                doneAtomBlock = True
        i += 1
    stop = i-1
    if nModels == 0: nModels = 1

    if model is not None and model != 1:
        for i in range(start, stop):
            if str(models[i]) != model and str(models[i+1]) == model:
                start = i+1
            if str(models[i]) == model and str(models[i+1]) != model:
                stop = i+1
                break
        if not str(model) in models:
            raise CIFParseError('model {0} is not found'.format(model))

    addcoords = False
    if atomgroup.numCoordsets() > 0:
        addcoords = True
 
    if isinstance(altloc_torf, str):
        if altloc_torf.strip() != 'A':
            LOGGER.info('Parsing alternate locations {0}.'
                        .format(altloc_torf))
            which_altlocs = '.' + ''.join(altloc_torf.split())
        else:
            which_altlocs = '.A'
        altloc_torf = False
    else:
        which_altlocs = '.A'
        altloc_torf = True

    coordinates = np.zeros((asize, 3), dtype=float)
    atomnames = np.zeros(asize, dtype=ATOMIC_FIELDS['name'].dtype)
    resnames = np.zeros(asize, dtype=ATOMIC_FIELDS['resname'].dtype)
    resnums = np.zeros(asize, dtype=ATOMIC_FIELDS['resnum'].dtype)
    chainids = np.zeros(asize, dtype=ATOMIC_FIELDS['chain'].dtype)
    hetero = np.zeros(asize, dtype=bool)
    termini = np.zeros(asize, dtype=bool)
    altlocs = np.zeros(asize, dtype=ATOMIC_FIELDS['altloc'].dtype)
    icodes = np.zeros(asize, dtype=ATOMIC_FIELDS['icode'].dtype)
    serials = np.zeros(asize, dtype=ATOMIC_FIELDS['serial'].dtype)
    elements = np.zeros(asize, dtype=ATOMIC_FIELDS['element'].dtype)
    bfactors = np.zeros(asize, dtype=ATOMIC_FIELDS['beta'].dtype)
    occupancies = np.zeros(asize, dtype=ATOMIC_FIELDS['occupancy'].dtype)

    n_atoms = atomgroup.numAtoms()
    if n_atoms > 0:
        asize = n_atoms

    acount = 0
    for line in lines[start:stop]:
        startswith = line.split()[0] # group_PDB

        atomname = line.split()[-2] # auth_atom_id in stardard pos
        resname = line.split()[-4] # auth_comp_id in standard pos

        if subset is not None:
            if not (atomname in subset and resname in protein_resnames):
                continue

        chID = line.split()[-3] # auth_asym_id in stardard pos
        if chain is not None:
            if not chID in chain:
                LOGGER.info('The loop has entered the chID continue block!!')
                continue

        alt = line.split()[4] # label_alt_id in standard pos
        if alt not in which_altlocs:
            LOGGER.info('The loop has entered the alt continue block!!')
            LOGGER.info('line = {0}'.format(line))
            continue

        if model is not None:
            if int(models[acount]) < model:
                LOGGER.info('The loop has entered the model continue block!!')
                continue
            elif int(models[acount]) > model:
                LOGGER.info('The loop has entered the model break block!!')
                break

        coordinates[acount] = line.split()[10:13]
        atomnames[acount] = atomname
        resnames[acount] = resname
        resnums[acount] = line.split()[21] # auth_seq_id
        chainids[acount] = chID
        hetero[acount] = startswith == 'HETATM' # True or False
        if chainids[acount] != chainids[acount-1]: termini[acount] = True
        altlocs[acount] = alt
        icodes[acount] = line.split()[9] # pdbx_PDB_ins_code
        if icodes[acount] == '?': icodes[acount] = ''
        serials[acount] = line.split()[1] # id
        elements[acount] = line.split()[2] # type_symbol
        bfactors[acount] = line.split()[14]
        occupancies[acount] = line.split()[13]
        
        acount += 1

    if model is not None:
        nModels = 1

    modelSize = acount//nModels

    if addcoords:
        atomgroup.addCoordset(coordinates[:modelSize])
    else:
        atomgroup._setCoords(coordinates[:modelSize])

    atomgroup.setNames(atomnames[:modelSize])
    atomgroup.setResnames(resnames[:modelSize])
    atomgroup.setResnums(resnums[:modelSize])
    atomgroup.setChids(chainids[:modelSize])
    atomgroup.setFlags('hetatm', hetero[:modelSize])
    atomgroup.setFlags('pdbter', termini[:modelSize])
    atomgroup.setAltlocs(altlocs[:modelSize])
    atomgroup.setIcodes(icodes[:modelSize])
    atomgroup.setSerials(serials[:modelSize])

    atomgroup.setElements(elements[:modelSize])
    from prody.utilities.misctools import getMasses
    atomgroup.setMasses(getMasses(elements[:modelSize]))
    atomgroup.setBetas(bfactors[:modelSize])
    atomgroup.setOccupancies(occupancies[:modelSize])

    for n in range(1,nModels):
        atomgroup.addCoordset(coordinates[n*modelSize:(n+1)*modelSize])

    return atomgroup