Exemple #1
0
    def __init__(self, file):
        """Load a PDBx/mmCIF file.

        The atom positions and Topology can be retrieved by calling
        getPositions() and getTopology().

        Parameters
        ----------
        file : string
            the name of the file to load.  Alternatively you can pass an open
            file object.
        """
        top = Topology()
        ## The Topology read from the PDBx/mmCIF file
        self.topology = top
        self._positions = []
        PDBFile._loadNameReplacementTables()

        # Load the file.

        inputFile = file
        ownHandle = False
        if isinstance(file, str):
            inputFile = open(file)
            ownHandle = True
        reader = PdbxReader(inputFile)
        data = []
        reader.read(data)
        if ownHandle:
            inputFile.close()
        block = data[0]

        # Build the topology.

        atomData = block.getObj('atom_site')
        atomNameCol = atomData.getAttributeIndex('auth_atom_id')
        if atomNameCol == -1:
            atomNameCol = atomData.getAttributeIndex('label_atom_id')
        atomIdCol = atomData.getAttributeIndex('id')
        resNameCol = atomData.getAttributeIndex('auth_comp_id')
        if resNameCol == -1:
            resNameCol = atomData.getAttributeIndex('label_comp_id')
        resNumCol = atomData.getAttributeIndex('auth_seq_id')
        if resNumCol == -1:
            resNumCol = atomData.getAttributeIndex('label_seq_id')
        resInsertionCol = atomData.getAttributeIndex('pdbx_PDB_ins_code')
        chainIdCol = atomData.getAttributeIndex('auth_asym_id')
        if chainIdCol == -1:
            chainIdCol = atomData.getAttributeIndex('label_asym_id')
            altChainIdCol = -1
        else:
            altChainIdCol = atomData.getAttributeIndex('label_asym_id')
        if altChainIdCol != -1:
            # Figure out which column is best to use for chain IDs.

            idSet = set(row[chainIdCol] for row in atomData.getRowList())
            altIdSet = set(row[altChainIdCol] for row in atomData.getRowList())
            if len(altIdSet) > len(idSet):
                chainIdCol, altChainIdCol = (altChainIdCol, chainIdCol)
        elementCol = atomData.getAttributeIndex('type_symbol')
        altIdCol = atomData.getAttributeIndex('label_alt_id')
        modelCol = atomData.getAttributeIndex('pdbx_PDB_model_num')
        xCol = atomData.getAttributeIndex('Cartn_x')
        yCol = atomData.getAttributeIndex('Cartn_y')
        zCol = atomData.getAttributeIndex('Cartn_z')
        lastChainId = None
        lastAltChainId = None
        lastResId = None
        lastInsertionCode = ''
        atomTable = {}
        atomsInResidue = set()
        models = []
        for row in atomData.getRowList():
            atomKey = ((row[resNumCol], row[chainIdCol], row[atomNameCol]))
            model = ('1' if modelCol == -1 else row[modelCol])
            if model not in models:
                models.append(model)
                self._positions.append([])
            modelIndex = models.index(model)
            if row[altIdCol] != '.' and atomKey in atomTable and len(
                    self._positions[modelIndex]) > atomTable[atomKey].index:
                # This row is an alternate position for an existing atom, so ignore it.

                continue
            if modelIndex == 0:
                # This row defines a new atom.

                if resInsertionCol == -1:
                    insertionCode = ''
                else:
                    insertionCode = row[resInsertionCol]
                if insertionCode in ('.', '?'):
                    insertionCode = ''
                if lastChainId != row[chainIdCol] or (
                        altChainIdCol != -1
                        and lastAltChainId != row[altChainIdCol]):
                    # The start of a new chain.
                    chain = top.addChain(row[chainIdCol])
                    lastChainId = row[chainIdCol]
                    lastResId = None
                    if altChainIdCol != -1:
                        lastAltChainId = row[altChainIdCol]
                if lastResId != row[resNumCol] or lastChainId != row[
                        chainIdCol] or lastInsertionCode != insertionCode or (
                            lastResId == '.'
                            and row[atomNameCol] in atomsInResidue):
                    # The start of a new residue.
                    resId = (None if resNumCol == -1 else row[resNumCol])
                    resIC = insertionCode
                    resName = row[resNameCol]
                    if resName in PDBFile._residueNameReplacements:
                        resName = PDBFile._residueNameReplacements[resName]
                    res = top.addResidue(resName, chain, resId, resIC)
                    if resName in PDBFile._atomNameReplacements:
                        atomReplacements = PDBFile._atomNameReplacements[
                            resName]
                    else:
                        atomReplacements = {}
                    lastResId = row[resNumCol]
                    lastInsertionCode = insertionCode
                    atomsInResidue.clear()
                element = None
                try:
                    element = elem.get_by_symbol(row[elementCol])
                except KeyError:
                    pass
                atomName = row[atomNameCol]
                if atomName in atomReplacements:
                    atomName = atomReplacements[atomName]
                atom = top.addAtom(atomName, element, res, row[atomIdCol])
                atomTable[atomKey] = atom
                atomsInResidue.add(atomName)
            else:
                # This row defines coordinates for an existing atom in one of the later models.

                try:
                    atom = atomTable[atomKey]
                except KeyError:
                    raise ValueError(
                        'Unknown atom %s in residue %s %s for model %s' %
                        (row[atomNameCol], row[resNameCol], row[resNumCol],
                         model))
                if atom.index != len(self._positions[modelIndex]):
                    raise ValueError(
                        'Atom %s for model %s does not match the order of atoms for model %s'
                        % (row[atomIdCol], model, models[0]))
            self._positions[modelIndex].append(
                Vec3(float(row[xCol]), float(row[yCol]), float(row[zCol])) *
                0.1)
        for i in range(len(self._positions)):
            self._positions[i] = self._positions[i] * nanometers
        ## The atom positions read from the PDBx/mmCIF file.  If the file contains multiple frames, these are the positions in the first frame.
        self.positions = self._positions[0]
        self.topology.createStandardBonds()
        self._numpyPositions = None

        # Record unit cell information, if present.

        cell = block.getObj('cell')
        if cell is not None and cell.getRowCount() > 0:
            row = cell.getRow(0)
            (a, b, c) = [
                float(row[cell.getAttributeIndex(attribute)]) * 0.1
                for attribute in ('length_a', 'length_b', 'length_c')
            ]
            (alpha, beta, gamma) = [
                float(row[cell.getAttributeIndex(attribute)]) * math.pi / 180.0
                for attribute in ('angle_alpha', 'angle_beta', 'angle_gamma')
            ]
            self.topology.setPeriodicBoxVectors(
                computePeriodicBoxVectors(a, b, c, alpha, beta, gamma))

        # Add bonds based on struct_conn records.

        connectData = block.getObj('struct_conn')
        if connectData is not None:
            res1Col = connectData.getAttributeIndex('ptnr1_label_seq_id')
            res2Col = connectData.getAttributeIndex('ptnr2_label_seq_id')
            atom1Col = connectData.getAttributeIndex('ptnr1_label_atom_id')
            atom2Col = connectData.getAttributeIndex('ptnr2_label_atom_id')
            asym1Col = connectData.getAttributeIndex('ptnr1_label_asym_id')
            asym2Col = connectData.getAttributeIndex('ptnr2_label_asym_id')
            typeCol = connectData.getAttributeIndex('conn_type_id')
            connectBonds = []
            for row in connectData.getRowList():
                type = row[typeCol][:6]
                if type in ('covale', 'disulf', 'modres'):
                    key1 = (row[res1Col], row[asym1Col], row[atom1Col])
                    key2 = (row[res2Col], row[asym2Col], row[atom2Col])
                    if key1 in atomTable and key2 in atomTable:
                        connectBonds.append((atomTable[key1], atomTable[key2]))
            if len(connectBonds) > 0:
                # Only add bonds that don't already exist.
                existingBonds = set(top.bonds())
                for bond in connectBonds:
                    if bond not in existingBonds and (
                            bond[1], bond[0]) not in existingBonds:
                        top.addBond(bond[0], bond[1])
                        existingBonds.add(bond)
Exemple #2
0
    def __init__(self, file, extraParticleIdentifier='EP'):
        """Load a PDB file.

        The atom positions and Topology can be retrieved by calling getPositions() and getTopology().

        Parameters
        ----------
        file : string or file
            the name of the file to load.  Alternatively you can pass an open file object.
        extraParticleIdentifier : string='EP'
            if this value appears in the element column for an ATOM record, the Atom's element will be set to None to mark it as an extra particle
        """

        metalElements = [
            'Al', 'As', 'Ba', 'Ca', 'Cd', 'Ce', 'Co', 'Cs', 'Cu', 'Dy', 'Fe',
            'Gd', 'Hg', 'Ho', 'In', 'Ir', 'K', 'Li', 'Mg', 'Mn', 'Mo', 'Na',
            'Ni', 'Pb', 'Pd', 'Pt', 'Rb', 'Rh', 'Sm', 'Sr', 'Te', 'Tl', 'V',
            'W', 'Yb', 'Zn'
        ]

        top = Topology()
        ## The Topology read from the PDB file
        self.topology = top

        # Load the PDB file

        if isinstance(file, PdbStructure):
            pdb = file
        else:
            inputfile = file
            own_handle = False
            if isinstance(file, str):
                inputfile = open(file)
                own_handle = True
            pdb = PdbStructure(inputfile,
                               load_all_models=True,
                               extraParticleIdentifier=extraParticleIdentifier)
            if own_handle:
                inputfile.close()
        PDBFile._loadNameReplacementTables()

        # Build the topology

        atomByNumber = {}
        for chain in pdb.iter_chains():
            c = top.addChain(chain.chain_id)
            for residue in chain.iter_residues():
                resName = residue.get_name()
                if resName in PDBFile._residueNameReplacements:
                    resName = PDBFile._residueNameReplacements[resName]
                r = top.addResidue(resName, c, str(residue.number),
                                   residue.insertion_code)
                if resName in PDBFile._atomNameReplacements:
                    atomReplacements = PDBFile._atomNameReplacements[resName]
                else:
                    atomReplacements = {}
                for atom in residue.iter_atoms():
                    atomName = atom.get_name()
                    if atomName in atomReplacements:
                        atomName = atomReplacements[atomName]
                    atomName = atomName.strip()
                    element = atom.element
                    if element == 'EP':
                        element = None
                    elif element is None:
                        # Try to guess the element.

                        upper = atomName.upper()
                        while len(upper) > 1 and upper[0].isdigit():
                            upper = upper[1:]
                        if upper.startswith('CL'):
                            element = elem.chlorine
                        elif upper.startswith('NA'):
                            element = elem.sodium
                        elif upper.startswith('MG'):
                            element = elem.magnesium
                        elif upper.startswith('BE'):
                            element = elem.beryllium
                        elif upper.startswith('LI'):
                            element = elem.lithium
                        elif upper.startswith('K'):
                            element = elem.potassium
                        elif upper.startswith('ZN'):
                            element = elem.zinc
                        elif len(residue) == 1 and upper.startswith('CA'):
                            element = elem.calcium
                        elif upper.startswith('D') and any(
                                a.name == atomName[1:]
                                for a in residue.iter_atoms()):
                            pass  # A Drude particle
                        else:
                            try:
                                element = elem.get_by_symbol(upper[0])
                            except KeyError:
                                pass
                    newAtom = top.addAtom(atomName, element, r,
                                          str(atom.serial_number))
                    atomByNumber[atom.serial_number] = newAtom
        self._positions = []
        for model in pdb.iter_models(True):
            coords = []
            for chain in model.iter_chains():
                for residue in chain.iter_residues():
                    for atom in residue.iter_atoms():
                        pos = atom.get_position().value_in_unit(nanometers)
                        coords.append(Vec3(pos[0], pos[1], pos[2]))
            self._positions.append(coords * nanometers)
        ## The atom positions read from the PDB file.  If the file contains multiple frames, these are the positions in the first frame.
        self.positions = self._positions[0]
        self.topology.setPeriodicBoxVectors(pdb.get_periodic_box_vectors())
        self.topology.createStandardBonds()
        self.topology.createDisulfideBonds(self.positions)
        self._numpyPositions = None

        # Add bonds based on CONECT records. Bonds between metals of elements specified in metalElements and residues in standardResidues are not added.

        connectBonds = []
        for connect in pdb.models[-1].connects:
            i = connect[0]
            for j in connect[1:]:
                if i in atomByNumber and j in atomByNumber:
                    if atomByNumber[i].element is not None and atomByNumber[
                            j].element is not None:
                        if atomByNumber[
                                i].element.symbol not in metalElements and atomByNumber[
                                    j].element.symbol not in metalElements:
                            connectBonds.append(
                                (atomByNumber[i], atomByNumber[j]))
                        elif atomByNumber[
                                i].element.symbol in metalElements and atomByNumber[
                                    j].residue.name not in PDBFile._standardResidues:
                            connectBonds.append(
                                (atomByNumber[i], atomByNumber[j]))
                        elif atomByNumber[
                                j].element.symbol in metalElements and atomByNumber[
                                    i].residue.name not in PDBFile._standardResidues:
                            connectBonds.append(
                                (atomByNumber[i], atomByNumber[j]))
                    else:
                        connectBonds.append((atomByNumber[i], atomByNumber[j]))
        if len(connectBonds) > 0:
            # Only add bonds that don't already exist.
            existingBonds = set(top.bonds())
            for bond in connectBonds:
                if bond not in existingBonds and (
                        bond[1], bond[0]) not in existingBonds:
                    top.addBond(bond[0], bond[1])
                    existingBonds.add(bond)