Ejemplo n.º 1
0
    def __init__(self, file):
        """Load a prmtop file."""
        ## The Topology read from the prmtop file
        self.topology = top = Topology()
        self.elements = []

        # Load the prmtop file

        prmtop = amber_file_parser.PrmtopLoader(file)
        self._prmtop = prmtop

        # Add atoms to the topology

        PDBFile._loadNameReplacementTables()
        lastResidue = None
        c = top.addChain()
        for index in range(prmtop.getNumAtoms()):
            resNumber = prmtop.getResidueNumber(index)
            if resNumber != lastResidue:
                lastResidue = resNumber
                resName = prmtop.getResidueLabel(iAtom=index).strip()
                if resName in PDBFile._residueNameReplacements:
                    resName = PDBFile._residueNameReplacements[resName]
                r = top.addResidue(resName, c)
                if resName in PDBFile._atomNameReplacements:
                    atomReplacements = PDBFile._atomNameReplacements[resName]
                else:
                    atomReplacements = {}
            atomName = prmtop.getAtomName(index).strip()
            if atomName in atomReplacements:
                atomName = atomReplacements[atomName]

            # Get the element from the prmtop file if available
            if prmtop.has_atomic_number:
                try:
                    element = elem.Element.getByAtomicNumber(
                        int(prmtop._raw_data['ATOMIC_NUMBER'][index]))
                except KeyError:
                    element = None
            else:
                # Try to guess the element from the atom name.

                upper = atomName.upper()
                if upper.startswith('CL'):
                    element = elem.chlorine
                elif upper.startswith('NA'):
                    element = elem.sodium
                elif upper.startswith('MG'):
                    element = elem.magnesium
                elif upper.startswith('ZN'):
                    element = elem.zinc
                else:
                    try:
                        element = elem.get_by_symbol(atomName[0])
                    except KeyError:
                        element = None

            top.addAtom(atomName, element, r)
            self.elements.append(element)

        # Add bonds to the topology

        atoms = list(top.atoms())
        for bond in prmtop.getBondsWithH():
            top.addBond(atoms[bond[0]], atoms[bond[1]])
        for bond in prmtop.getBondsNoH():
            top.addBond(atoms[bond[0]], atoms[bond[1]])

        # Set the periodic box size.

        if prmtop.getIfBox():
            box = prmtop.getBoxBetaAndDimensions()
            top.setPeriodicBoxVectors(
                computePeriodicBoxVectors(*(box[1:4] + box[0:1] * 3)))
Ejemplo n.º 2
0
    def __init__(self, file):
        """Load a PDBx/mmCIF file.

        The atom positions and Topology can be retrieved by calling
        getPositions() and getTopology().

        Parameters
        ----------
        file : string
            the name of the file to load.  Alternatively you can pass an open
            file object.
        """
        top = Topology()
        ## The Topology read from the PDBx/mmCIF file
        self.topology = top
        self._positions = []
        PDBFile._loadNameReplacementTables()

        # Load the file.

        inputFile = file
        ownHandle = False
        if isinstance(file, str):
            inputFile = open(file)
            ownHandle = True
        reader = PdbxReader(inputFile)
        data = []
        reader.read(data)
        if ownHandle:
            inputFile.close()
        block = data[0]

        # Build the topology.

        atomData = block.getObj('atom_site')
        atomNameCol = atomData.getAttributeIndex('auth_atom_id')
        if atomNameCol == -1:
            atomNameCol = atomData.getAttributeIndex('label_atom_id')
        atomIdCol = atomData.getAttributeIndex('id')
        resNameCol = atomData.getAttributeIndex('auth_comp_id')
        if resNameCol == -1:
            resNameCol = atomData.getAttributeIndex('label_comp_id')
        resNumCol = atomData.getAttributeIndex('auth_seq_id')
        if resNumCol == -1:
            resNumCol = atomData.getAttributeIndex('label_seq_id')
        resInsertionCol = atomData.getAttributeIndex('pdbx_PDB_ins_code')
        chainIdCol = atomData.getAttributeIndex('auth_asym_id')
        if chainIdCol == -1:
            chainIdCol = atomData.getAttributeIndex('label_asym_id')
            altChainIdCol = -1
        else:
            altChainIdCol = atomData.getAttributeIndex('label_asym_id')
        if altChainIdCol != -1:
            # Figure out which column is best to use for chain IDs.

            idSet = set(row[chainIdCol] for row in atomData.getRowList())
            altIdSet = set(row[altChainIdCol] for row in atomData.getRowList())
            if len(altIdSet) > len(idSet):
                chainIdCol, altChainIdCol = (altChainIdCol, chainIdCol)
        elementCol = atomData.getAttributeIndex('type_symbol')
        altIdCol = atomData.getAttributeIndex('label_alt_id')
        modelCol = atomData.getAttributeIndex('pdbx_PDB_model_num')
        xCol = atomData.getAttributeIndex('Cartn_x')
        yCol = atomData.getAttributeIndex('Cartn_y')
        zCol = atomData.getAttributeIndex('Cartn_z')
        lastChainId = None
        lastAltChainId = None
        lastResId = None
        lastInsertionCode = ''
        atomTable = {}
        atomsInResidue = set()
        models = []
        for row in atomData.getRowList():
            atomKey = ((row[resNumCol], row[chainIdCol], row[atomNameCol]))
            model = ('1' if modelCol == -1 else row[modelCol])
            if model not in models:
                models.append(model)
                self._positions.append([])
            modelIndex = models.index(model)
            if row[altIdCol] != '.' and atomKey in atomTable and len(
                    self._positions[modelIndex]) > atomTable[atomKey].index:
                # This row is an alternate position for an existing atom, so ignore it.

                continue
            if modelIndex == 0:
                # This row defines a new atom.

                if resInsertionCol == -1:
                    insertionCode = ''
                else:
                    insertionCode = row[resInsertionCol]
                if insertionCode in ('.', '?'):
                    insertionCode = ''
                if lastChainId != row[chainIdCol] or (
                        altChainIdCol != -1
                        and lastAltChainId != row[altChainIdCol]):
                    # The start of a new chain.
                    chain = top.addChain(row[chainIdCol])
                    lastChainId = row[chainIdCol]
                    lastResId = None
                    if altChainIdCol != -1:
                        lastAltChainId = row[altChainIdCol]
                if lastResId != row[resNumCol] or lastChainId != row[
                        chainIdCol] or lastInsertionCode != insertionCode or (
                            lastResId == '.'
                            and row[atomNameCol] in atomsInResidue):
                    # The start of a new residue.
                    resId = (None if resNumCol == -1 else row[resNumCol])
                    resIC = insertionCode
                    resName = row[resNameCol]
                    if resName in PDBFile._residueNameReplacements:
                        resName = PDBFile._residueNameReplacements[resName]
                    res = top.addResidue(resName, chain, resId, resIC)
                    if resName in PDBFile._atomNameReplacements:
                        atomReplacements = PDBFile._atomNameReplacements[
                            resName]
                    else:
                        atomReplacements = {}
                    lastResId = row[resNumCol]
                    lastInsertionCode = insertionCode
                    atomsInResidue.clear()
                element = None
                try:
                    element = elem.get_by_symbol(row[elementCol])
                except KeyError:
                    pass
                atomName = row[atomNameCol]
                if atomName in atomReplacements:
                    atomName = atomReplacements[atomName]
                atom = top.addAtom(atomName, element, res, row[atomIdCol])
                atomTable[atomKey] = atom
                atomsInResidue.add(atomName)
            else:
                # This row defines coordinates for an existing atom in one of the later models.

                try:
                    atom = atomTable[atomKey]
                except KeyError:
                    raise ValueError(
                        'Unknown atom %s in residue %s %s for model %s' %
                        (row[atomNameCol], row[resNameCol], row[resNumCol],
                         model))
                if atom.index != len(self._positions[modelIndex]):
                    raise ValueError(
                        'Atom %s for model %s does not match the order of atoms for model %s'
                        % (row[atomIdCol], model, models[0]))
            self._positions[modelIndex].append(
                Vec3(float(row[xCol]), float(row[yCol]), float(row[zCol])) *
                0.1)
        for i in range(len(self._positions)):
            self._positions[i] = self._positions[i] * nanometers
        ## The atom positions read from the PDBx/mmCIF file.  If the file contains multiple frames, these are the positions in the first frame.
        self.positions = self._positions[0]
        self.topology.createStandardBonds()
        self._numpyPositions = None

        # Record unit cell information, if present.

        cell = block.getObj('cell')
        if cell is not None and cell.getRowCount() > 0:
            row = cell.getRow(0)
            (a, b, c) = [
                float(row[cell.getAttributeIndex(attribute)]) * 0.1
                for attribute in ('length_a', 'length_b', 'length_c')
            ]
            (alpha, beta, gamma) = [
                float(row[cell.getAttributeIndex(attribute)]) * math.pi / 180.0
                for attribute in ('angle_alpha', 'angle_beta', 'angle_gamma')
            ]
            self.topology.setPeriodicBoxVectors(
                computePeriodicBoxVectors(a, b, c, alpha, beta, gamma))

        # Add bonds based on struct_conn records.

        connectData = block.getObj('struct_conn')
        if connectData is not None:
            res1Col = connectData.getAttributeIndex('ptnr1_label_seq_id')
            res2Col = connectData.getAttributeIndex('ptnr2_label_seq_id')
            atom1Col = connectData.getAttributeIndex('ptnr1_label_atom_id')
            atom2Col = connectData.getAttributeIndex('ptnr2_label_atom_id')
            asym1Col = connectData.getAttributeIndex('ptnr1_label_asym_id')
            asym2Col = connectData.getAttributeIndex('ptnr2_label_asym_id')
            typeCol = connectData.getAttributeIndex('conn_type_id')
            connectBonds = []
            for row in connectData.getRowList():
                type = row[typeCol][:6]
                if type in ('covale', 'disulf', 'modres'):
                    key1 = (row[res1Col], row[asym1Col], row[atom1Col])
                    key2 = (row[res2Col], row[asym2Col], row[atom2Col])
                    if key1 in atomTable and key2 in atomTable:
                        connectBonds.append((atomTable[key1], atomTable[key2]))
            if len(connectBonds) > 0:
                # Only add bonds that don't already exist.
                existingBonds = set(top.bonds())
                for bond in connectBonds:
                    if bond not in existingBonds and (
                            bond[1], bond[0]) not in existingBonds:
                        top.addBond(bond[0], bond[1])
                        existingBonds.add(bond)
Ejemplo n.º 3
0
    def __init__(self, file, extraParticleIdentifier='EP'):
        """Load a PDB file.

        The atom positions and Topology can be retrieved by calling getPositions() and getTopology().

        Parameters
        ----------
        file : string or file
            the name of the file to load.  Alternatively you can pass an open file object.
        extraParticleIdentifier : string='EP'
            if this value appears in the element column for an ATOM record, the Atom's element will be set to None to mark it as an extra particle
        """

        metalElements = [
            'Al', 'As', 'Ba', 'Ca', 'Cd', 'Ce', 'Co', 'Cs', 'Cu', 'Dy', 'Fe',
            'Gd', 'Hg', 'Ho', 'In', 'Ir', 'K', 'Li', 'Mg', 'Mn', 'Mo', 'Na',
            'Ni', 'Pb', 'Pd', 'Pt', 'Rb', 'Rh', 'Sm', 'Sr', 'Te', 'Tl', 'V',
            'W', 'Yb', 'Zn'
        ]

        top = Topology()
        ## The Topology read from the PDB file
        self.topology = top

        # Load the PDB file

        if isinstance(file, PdbStructure):
            pdb = file
        else:
            inputfile = file
            own_handle = False
            if isinstance(file, str):
                inputfile = open(file)
                own_handle = True
            pdb = PdbStructure(inputfile,
                               load_all_models=True,
                               extraParticleIdentifier=extraParticleIdentifier)
            if own_handle:
                inputfile.close()
        PDBFile._loadNameReplacementTables()

        # Build the topology

        atomByNumber = {}
        for chain in pdb.iter_chains():
            c = top.addChain(chain.chain_id)
            for residue in chain.iter_residues():
                resName = residue.get_name()
                if resName in PDBFile._residueNameReplacements:
                    resName = PDBFile._residueNameReplacements[resName]
                r = top.addResidue(resName, c, str(residue.number),
                                   residue.insertion_code)
                if resName in PDBFile._atomNameReplacements:
                    atomReplacements = PDBFile._atomNameReplacements[resName]
                else:
                    atomReplacements = {}
                for atom in residue.iter_atoms():
                    atomName = atom.get_name()
                    if atomName in atomReplacements:
                        atomName = atomReplacements[atomName]
                    atomName = atomName.strip()
                    element = atom.element
                    if element == 'EP':
                        element = None
                    elif element is None:
                        # Try to guess the element.

                        upper = atomName.upper()
                        while len(upper) > 1 and upper[0].isdigit():
                            upper = upper[1:]
                        if upper.startswith('CL'):
                            element = elem.chlorine
                        elif upper.startswith('NA'):
                            element = elem.sodium
                        elif upper.startswith('MG'):
                            element = elem.magnesium
                        elif upper.startswith('BE'):
                            element = elem.beryllium
                        elif upper.startswith('LI'):
                            element = elem.lithium
                        elif upper.startswith('K'):
                            element = elem.potassium
                        elif upper.startswith('ZN'):
                            element = elem.zinc
                        elif len(residue) == 1 and upper.startswith('CA'):
                            element = elem.calcium
                        elif upper.startswith('D') and any(
                                a.name == atomName[1:]
                                for a in residue.iter_atoms()):
                            pass  # A Drude particle
                        else:
                            try:
                                element = elem.get_by_symbol(upper[0])
                            except KeyError:
                                pass
                    newAtom = top.addAtom(atomName, element, r,
                                          str(atom.serial_number))
                    atomByNumber[atom.serial_number] = newAtom
        self._positions = []
        for model in pdb.iter_models(True):
            coords = []
            for chain in model.iter_chains():
                for residue in chain.iter_residues():
                    for atom in residue.iter_atoms():
                        pos = atom.get_position().value_in_unit(nanometers)
                        coords.append(Vec3(pos[0], pos[1], pos[2]))
            self._positions.append(coords * nanometers)
        ## The atom positions read from the PDB file.  If the file contains multiple frames, these are the positions in the first frame.
        self.positions = self._positions[0]
        self.topology.setPeriodicBoxVectors(pdb.get_periodic_box_vectors())
        self.topology.createStandardBonds()
        self.topology.createDisulfideBonds(self.positions)
        self._numpyPositions = None

        # Add bonds based on CONECT records. Bonds between metals of elements specified in metalElements and residues in standardResidues are not added.

        connectBonds = []
        for connect in pdb.models[-1].connects:
            i = connect[0]
            for j in connect[1:]:
                if i in atomByNumber and j in atomByNumber:
                    if atomByNumber[i].element is not None and atomByNumber[
                            j].element is not None:
                        if atomByNumber[
                                i].element.symbol not in metalElements and atomByNumber[
                                    j].element.symbol not in metalElements:
                            connectBonds.append(
                                (atomByNumber[i], atomByNumber[j]))
                        elif atomByNumber[
                                i].element.symbol in metalElements and atomByNumber[
                                    j].residue.name not in PDBFile._standardResidues:
                            connectBonds.append(
                                (atomByNumber[i], atomByNumber[j]))
                        elif atomByNumber[
                                j].element.symbol in metalElements and atomByNumber[
                                    i].residue.name not in PDBFile._standardResidues:
                            connectBonds.append(
                                (atomByNumber[i], atomByNumber[j]))
                    else:
                        connectBonds.append((atomByNumber[i], atomByNumber[j]))
        if len(connectBonds) > 0:
            # Only add bonds that don't already exist.
            existingBonds = set(top.bonds())
            for bond in connectBonds:
                if bond not in existingBonds and (
                        bond[1], bond[0]) not in existingBonds:
                    top.addBond(bond[0], bond[1])
                    existingBonds.add(bond)
Ejemplo n.º 4
0
def extract(item,
            atom_indices='all',
            structure_indices='all',
            copy_if_all=True,
            check=True):

    if check:

        digest_item(item, 'openmm.Topology')
        atom_indices = digest_atom_indices(atom_indices)
        structure_indices = digest_structure_indices(structure_indices)

    from openmm.app import Topology

    if (atom_indices is 'all') and (structure_indices is 'all'):

        if copy_if_all:

            new_item = Topology()
            newAtoms = {}
            for chain in item.chains():
                newChain = new_item.addChain(chain.id)
                for residue in chain.residues():
                    newResidue = new_item.addResidue(residue.name, newChain,
                                                     residue.id,
                                                     residue.insertionCode)
                    for atom in residue.atoms():
                        newAtom = new_item.addAtom(atom.name, atom.element,
                                                   newResidue, atom.id)
                        newAtoms[atom] = newAtom
            for bond in item.bonds():
                new_item.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
            del (newAtoms)
            new_item.setPeriodicBoxVectors(item.getPeriodicBoxVectors())
            tmp_item = new_item

        else:

            tmp_item = item
    else:

        new_item = Topology()
        atom_indices_to_be_kept = atom_indices
        newAtoms = {}
        set_atom_indices = set(atom_indices_to_be_kept)
        for chain in item.chains():
            needNewChain = True
            for residue in chain.residues():
                needNewResidue = True
                for atom in residue.atoms():
                    if atom.index in set_atom_indices:
                        if needNewChain:
                            newChain = new_item.addChain(chain.id)
                            needNewChain = False
                        if needNewResidue:
                            newResidue = new_item.addResidue(
                                residue.name, newChain, residue.id,
                                residue.insertionCode)
                            needNewResidue = False
                        newAtom = new_item.addAtom(atom.name, atom.element,
                                                   newResidue, atom.id)
                        newAtoms[atom] = newAtom
        for bond in item.bonds():
            if bond[0].index in set_atom_indices and bond[
                    1].index in set_atom_indices:
                new_item.addBond(newAtoms[bond[0]], newAtoms[bond[1]])
        del (newAtoms)
        new_item.setPeriodicBoxVectors(item.getPeriodicBoxVectors())
        tmp_item = new_item

    return tmp_item
Ejemplo n.º 5
0
    def _createTopology(self):
        """Build the topology of the system
        """
        top = Topology()
        positions = []
        velocities = []
        boxVectors = []

        #assume cell dimensions are set in the first file
        #the other molecules inherit the same cell
        conn = self._conn[0]
        for x, y, z in conn.execute('SELECT x, y, z FROM global_cell'):
            boxVectors.append(mm.Vec3(x, y, z))
        unitCellDimensions = [
            boxVectors[0][0], boxVectors[1][1], boxVectors[2][2]
        ]
        top.setUnitCellDimensions(unitCellDimensions * angstrom)

        #process each file
        nfiles = len(self._conn)
        for (fcounter, conn, tables) in zip(range(0, nfiles), self._conn,
                                            self._tables):

            atoms = {}
            lastChain = None
            lastResId = None
            c = top.addChain()
            q = """SELECT id, name, anum, resname, resid, chain, x, y, z, vx, vy, vz
                FROM particle ORDER BY id"""
            for (atomId, atomName, atomNumber, resName, resId, chain, x, y, z,
                 vx, vy, vz) in conn.execute(q):
                newChain = False
                if chain != lastChain:
                    lastChain = chain
                    c = top.addChain()
                    newChain = True
                if resId != lastResId or newChain:
                    lastResId = resId
                    if resName in PDBFile._residueNameReplacements:
                        resName = PDBFile._residueNameReplacements[resName]
                    r = top.addResidue(resName, c)
                    if resName in PDBFile._atomNameReplacements:
                        atomReplacements = PDBFile._atomNameReplacements[
                            resName]
                    else:
                        atomReplacements = {}

                if atomNumber == 0 and atomName.startswith('Vrt'):
                    elem = None
                else:
                    elem = Element.getByAtomicNumber(atomNumber)

                if atomName in atomReplacements:
                    atomName = atomReplacements[atomName]

                atoms[atomId] = top.addAtom(atomName, elem, r)
                positions.append(mm.Vec3(x, y, z))

                velocities.append(mm.Vec3(vx, vy, vz))

            self._natoms[fcounter] = len(atoms)

            for p0, p1 in conn.execute('SELECT p0, p1 FROM bond'):
                top.addBond(atoms[p0], atoms[p1])

        positions = positions * angstrom
        velocities = velocities * angstrom / femtosecond
        return top, positions, velocities