Esempio n. 1
0
    def __init__(self, file):
        """Load a prmtop file."""
        top = Topology()
        ## The Topology read from the prmtop file
        self.topology = top

        # Load the prmtop file

        prmtop = amber_file_parser.PrmtopLoader(file)
        self._prmtop = prmtop

        # Add atoms to the topology

        PDBFile._loadNameReplacementTables()
        lastResidue = None
        c = top.addChain()
        for index in range(prmtop.getNumAtoms()):
            resNumber = prmtop.getResidueNumber(index)
            if resNumber != lastResidue:
                lastResidue = resNumber
                resName = prmtop.getResidueLabel(iAtom=index).strip()
                if resName in PDBFile._residueNameReplacements:
                    resName = PDBFile._residueNameReplacements[resName]
                r = top.addResidue(resName, c)
                if resName in PDBFile._atomNameReplacements:
                    atomReplacements = PDBFile._atomNameReplacements[resName]
                else:
                    atomReplacements = {}
            atomName = prmtop.getAtomName(index).strip()
            if atomName in atomReplacements:
                atomName = atomReplacements[atomName]

            # Try to guess the element.

            upper = atomName.upper()
            if upper.startswith('CL'):
                element = elem.chlorine
            elif upper.startswith('NA'):
                element = elem.sodium
            elif upper.startswith('MG'):
                element = elem.magnesium
            else:
                try:
                    element = elem.get_by_symbol(atomName[0])
                except KeyError:
                    element = None
            top.addAtom(atomName, element, r)

        # Add bonds to the topology

        atoms = list(top.atoms())
        for bond in prmtop.getBondsWithH():
            top.addBond(atoms[bond[0]], atoms[bond[1]])
        for bond in prmtop.getBondsNoH():
            top.addBond(atoms[bond[0]], atoms[bond[1]])

        # Set the periodic box size.

        if prmtop.getIfBox():
            top.setUnitCellDimensions(tuple(x.value_in_unit(unit.nanometer) for x in prmtop.getBoxBetaAndDimensions()[1:4])*unit.nanometer)
Esempio n. 2
0
    def __init__(self,
                 file,
                 unitCellDimensions=None,
                 includeDir='/usr/local/gromacs/share/gromacs/top',
                 defines={}):
        """Load a top file.

        Parameters:
         - file (string) the name of the file to load
         - unitCellDimensions (Vec3=None) the dimensions of the crystallographic unit cell
         - includeDir (string=/usr/local/gromacs/share/gromacs/top) a directory in which to look for other files
           included from the top file
         - defines (map={}) preprocessor definitions that should be predefined when parsing the file
         """
        self._includeDirs = (os.path.dirname(file), includeDir)
        self._defines = defines

        # Parse the file.

        self._currentCategory = None
        self._ifStack = []
        self._moleculeTypes = {}
        self._molecules = []
        self._currentMoleculeType = None
        self._atomTypes = {}
        self._bondTypes = {}
        self._angleTypes = {}
        self._dihedralTypes = {}
        self._implicitTypes = {}
        self._pairTypes = {}
        self._cmapTypes = {}
        self._processFile(file)

        # Create the Topology from it.

        top = Topology()
        ## The Topology read from the prmtop file
        self.topology = top
        top.setUnitCellDimensions(unitCellDimensions)
        PDBFile._loadNameReplacementTables()
        for moleculeName, moleculeCount in self._molecules:
            if moleculeName not in self._moleculeTypes:
                raise ValueError("Unknown molecule type: " + moleculeName)
            moleculeType = self._moleculeTypes[moleculeName]

            # Create the specified number of molecules of this type.

            for i in range(moleculeCount):
                atoms = []
                lastResidue = None
                c = top.addChain()
                for index, fields in enumerate(moleculeType.atoms):
                    resNumber = fields[2]
                    if resNumber != lastResidue:
                        lastResidue = resNumber
                        resName = fields[3]
                        if resName in PDBFile._residueNameReplacements:
                            resName = PDBFile._residueNameReplacements[resName]
                        r = top.addResidue(resName, c)
                        if resName in PDBFile._atomNameReplacements:
                            atomReplacements = PDBFile._atomNameReplacements[
                                resName]
                        else:
                            atomReplacements = {}
                    atomName = fields[4]
                    if atomName in atomReplacements:
                        atomName = atomReplacements[atomName]

                    # Try to guess the element.

                    upper = atomName.upper()
                    if upper.startswith('CL'):
                        element = elem.chlorine
                    elif upper.startswith('NA'):
                        element = elem.sodium
                    elif upper.startswith('MG'):
                        element = elem.magnesium
                    else:
                        try:
                            element = elem.get_by_symbol(atomName[0])
                        except KeyError:
                            element = None
                    atoms.append(top.addAtom(atomName, element, r))

                # Add bonds to the topology

                for fields in moleculeType.bonds:
                    top.addBond(atoms[int(fields[0]) - 1],
                                atoms[int(fields[1]) - 1])
Esempio n. 3
0
    def __init__(self, file, periodicBoxVectors=None, unitCellDimensions=None, includeDir=None, defines=None):
        """Load a top file.

        Parameters:
         - file (string) the name of the file to load
         - periodicBoxVectors (tuple of Vec3=None) the vectors defining the periodic box
         - unitCellDimensions (Vec3=None) the dimensions of the crystallographic unit cell.  For
           non-rectangular unit cells, specify periodicBoxVectors instead.
         - includeDir (string=None) A directory in which to look for other files
           included from the top file. If not specified, we will attempt to locate a gromacs
           installation on your system. When gromacs is installed in /usr/local, this will resolve
           to  /usr/local/gromacs/share/gromacs/top
         - defines (dict={}) preprocessor definitions that should be predefined when parsing the file
         """
        if includeDir is None:
            includeDir = _defaultGromacsIncludeDir()
        self._includeDirs = (os.path.dirname(file), includeDir)
        # Most of the gromacs water itp files for different forcefields,
        # unless the preprocessor #define FLEXIBLE is given, don't define
        # bonds between the water hydrogen and oxygens, but only give the
        # constraint distances and exclusions.
        self._defines = {'FLEXIBLE': True}
        if defines is not None:
            self._defines.update(defines)

        # Parse the file.

        self._currentCategory = None
        self._ifStack = []
        self._elseStack = []
        self._moleculeTypes = {}
        self._molecules = []
        self._currentMoleculeType = None
        self._atomTypes = {}
        self._bondTypes= {}
        self._angleTypes = {}
        self._dihedralTypes = {}
        self._implicitTypes = {}
        self._pairTypes = {}
        self._cmapTypes = {}
        self._processFile(file)

        # Create the Topology from it.

        top = Topology()
        ## The Topology read from the prmtop file
        self.topology = top
        if periodicBoxVectors is not None:
            if unitCellDimensions is not None:
                raise ValueError("specify either periodicBoxVectors or unitCellDimensions, but not both")
            top.setPeriodicBoxVectors(periodicBoxVectors)
        else:
            top.setUnitCellDimensions(unitCellDimensions)
        PDBFile._loadNameReplacementTables()
        for moleculeName, moleculeCount in self._molecules:
            if moleculeName not in self._moleculeTypes:
                raise ValueError("Unknown molecule type: "+moleculeName)
            moleculeType = self._moleculeTypes[moleculeName]

            # Create the specified number of molecules of this type.

            for i in range(moleculeCount):
                atoms = []
                lastResidue = None
                c = top.addChain()
                for index, fields in enumerate(moleculeType.atoms):
                    resNumber = fields[2]
                    if resNumber != lastResidue:
                        lastResidue = resNumber
                        resName = fields[3]
                        if resName in PDBFile._residueNameReplacements:
                            resName = PDBFile._residueNameReplacements[resName]
                        r = top.addResidue(resName, c)
                        if resName in PDBFile._atomNameReplacements:
                            atomReplacements = PDBFile._atomNameReplacements[resName]
                        else:
                            atomReplacements = {}
                    atomName = fields[4]
                    if atomName in atomReplacements:
                        atomName = atomReplacements[atomName]

                    # Try to guess the element.

                    upper = atomName.upper()
                    if upper.startswith('CL'):
                        element = elem.chlorine
                    elif upper.startswith('NA'):
                        element = elem.sodium
                    elif upper.startswith('MG'):
                        element = elem.magnesium
                    else:
                        try:
                            element = elem.get_by_symbol(atomName[0])
                        except KeyError:
                            element = None
                    atoms.append(top.addAtom(atomName, element, r))

                # Add bonds to the topology

                for fields in moleculeType.bonds:
                    top.addBond(atoms[int(fields[0])-1], atoms[int(fields[1])-1])
Esempio n. 4
0
    def __init__(self, file):
        """Load a prmtop file."""
        top = Topology()
        ## The Topology read from the prmtop file
        self.topology = top
        self.elements = []

        # Load the prmtop file

        prmtop = amber_file_parser.PrmtopLoader(file)
        self._prmtop = prmtop

        # Add atoms to the topology

        PDBFile._loadNameReplacementTables()
        lastResidue = None
        c = top.addChain()
        for index in range(prmtop.getNumAtoms()):
            resNumber = prmtop.getResidueNumber(index)
            if resNumber != lastResidue:
                lastResidue = resNumber
                resName = prmtop.getResidueLabel(iAtom=index).strip()
                if resName in PDBFile._residueNameReplacements:
                    resName = PDBFile._residueNameReplacements[resName]
                r = top.addResidue(resName, c)
                if resName in PDBFile._atomNameReplacements:
                    atomReplacements = PDBFile._atomNameReplacements[resName]
                else:
                    atomReplacements = {}
            atomName = prmtop.getAtomName(index).strip()
            if atomName in atomReplacements:
                atomName = atomReplacements[atomName]

            # Get the element from the prmtop file if available
            if prmtop.has_atomic_number:
                try:
                    element = elem.Element.getByAtomicNumber(int(prmtop._raw_data['ATOMIC_NUMBER'][index]))
                except KeyError:
                    element = None
            else:
                # Try to guess the element from the atom name.

                upper = atomName.upper()
                if upper.startswith('CL'):
                    element = elem.chlorine
                elif upper.startswith('NA'):
                    element = elem.sodium
                elif upper.startswith('MG'):
                    element = elem.magnesium
                elif upper.startswith('ZN'):
                    element = elem.zinc
                else:
                    try:
                        element = elem.get_by_symbol(atomName[0])
                    except KeyError:
                        element = None

            top.addAtom(atomName, element, r)
            self.elements.append(element)

        # Add bonds to the topology

        atoms = list(top.atoms())
        for bond in prmtop.getBondsWithH():
            top.addBond(atoms[bond[0]], atoms[bond[1]])
        for bond in prmtop.getBondsNoH():
            top.addBond(atoms[bond[0]], atoms[bond[1]])

        # Set the periodic box size.

        if prmtop.getIfBox():
            box = prmtop.getBoxBetaAndDimensions()
            top.setPeriodicBoxVectors(computePeriodicBoxVectors(*(box[1:4] + box[0:1]*3)))
Esempio n. 5
0
    def __init__(self,
                 file,
                 unitCellDimensions=None,
                 includeDir=None,
                 defines=None):
        """Load a top file.

        Parameters:
         - file (string) the name of the file to load
         - unitCellDimensions (Vec3=None) the dimensions of the crystallographic unit cell
         - includeDir (string=None) A directory in which to look for other files
           included from the top file. If not specified, we will attempt to locate a gromacs
           installation on your system. When gromacs is installed in /usr/local, this will resolve
           to  /usr/local/gromacs/share/gromacs/top
         - defines (dict={}) preprocessor definitions that should be predefined when parsing the file
         """
        if includeDir is None:
            includeDir = _defaultGromacsIncludeDir()
        self._includeDirs = (os.path.dirname(file), includeDir)
        # Most of the gromacs water itp files for different forcefields,
        # unless the preprocessor #define FLEXIBLE is given, don't define
        # bonds between the water hydrogen and oxygens, but only give the
        # constraint distances and exclusions.
        self._defines = {'FLEXIBLE': True}
        if defines is not None:
            self._defines.update(defines)

        # Parse the file.

        self._currentCategory = None
        self._ifStack = []
        self._elseStack = []
        self._moleculeTypes = {}
        self._molecules = []
        self._currentMoleculeType = None
        self._atomTypes = {}
        self._bondTypes = {}
        self._angleTypes = {}
        self._dihedralTypes = {}
        self._implicitTypes = {}
        self._pairTypes = {}
        self._cmapTypes = {}
        self._processFile(file)

        # Create the Topology from it.

        top = Topology()
        ## The Topology read from the prmtop file
        self.topology = top
        top.setUnitCellDimensions(unitCellDimensions)
        PDBFile._loadNameReplacementTables()
        for moleculeName, moleculeCount in self._molecules:
            if moleculeName not in self._moleculeTypes:
                raise ValueError("Unknown molecule type: " + moleculeName)
            moleculeType = self._moleculeTypes[moleculeName]

            # Create the specified number of molecules of this type.

            for i in range(moleculeCount):
                atoms = []
                lastResidue = None
                c = top.addChain()
                for index, fields in enumerate(moleculeType.atoms):
                    resNumber = fields[2]
                    if resNumber != lastResidue:
                        lastResidue = resNumber
                        resName = fields[3]
                        if resName in PDBFile._residueNameReplacements:
                            resName = PDBFile._residueNameReplacements[resName]
                        r = top.addResidue(resName, c)
                        if resName in PDBFile._atomNameReplacements:
                            atomReplacements = PDBFile._atomNameReplacements[
                                resName]
                        else:
                            atomReplacements = {}
                    atomName = fields[4]
                    if atomName in atomReplacements:
                        atomName = atomReplacements[atomName]

                    # Try to guess the element.

                    upper = atomName.upper()
                    if upper.startswith('CL'):
                        element = elem.chlorine
                    elif upper.startswith('NA'):
                        element = elem.sodium
                    elif upper.startswith('MG'):
                        element = elem.magnesium
                    else:
                        try:
                            element = elem.get_by_symbol(atomName[0])
                        except KeyError:
                            element = None
                    atoms.append(top.addAtom(atomName, element, r))

                # Add bonds to the topology

                for fields in moleculeType.bonds:
                    top.addBond(atoms[int(fields[0]) - 1],
                                atoms[int(fields[1]) - 1])
Esempio n. 6
0
    def __init__(self, file):
        """Load a PDB file.

        The atom positions and Topology can be retrieved by calling getPositions() and getTopology().

        Parameters:
         - file (string) the name of the file to load
        """
        top = Topology()
        ## The Topology read from the PDB file
        self.topology = top

        # Load the PDB file

        if isinstance(file, PdbStructure):
            pdb = file
        else:
            inputfile = file
            if isinstance(file, str):
                inputfile = open(file)
            pdb = PdbStructure(inputfile, load_all_models=True)
        PDBFile._loadNameReplacementTables()

        # Build the topology

        atomByNumber = {}
        for chain in pdb.iter_chains():
            c = top.addChain()
            for residue in chain.iter_residues():
                resName = residue.get_name()
                if resName in PDBFile._residueNameReplacements:
                    resName = PDBFile._residueNameReplacements[resName]
                r = top.addResidue(resName, c)
                if resName in PDBFile._atomNameReplacements:
                    atomReplacements = PDBFile._atomNameReplacements[resName]
                else:
                    atomReplacements = {}
                for atom in residue.atoms:
                    atomName = atom.get_name()
                    if atomName in atomReplacements:
                        atomName = atomReplacements[atomName]
                    atomName = atomName.strip()
                    element = atom.element
                    if element is None:
                        # Try to guess the element.

                        upper = atomName.upper()
                        if upper.startswith('CL'):
                            element = elem.chlorine
                        elif upper.startswith('NA'):
                            element = elem.sodium
                        elif upper.startswith('MG'):
                            element = elem.magnesium
                        elif upper.startswith('BE'):
                            element = elem.beryllium
                        elif upper.startswith('LI'):
                            element = elem.lithium
                        elif upper.startswith('K'):
                            element = elem.potassium
                        elif (len(residue) == 1 and upper.startswith('CA')):
                            element = elem.calcium
                        else:
                            try:
                                element = elem.get_by_symbol(atomName[0])
                            except KeyError:
                                pass
                    newAtom = top.addAtom(atomName, element, r)
                    atomByNumber[atom.serial_number] = newAtom
        self._positions = []
        for model in pdb.iter_models(True):
            coords = []
            for chain in model.iter_chains():
                for residue in chain.iter_residues():
                    for atom in residue.atoms:
                        pos = atom.get_position().value_in_unit(nanometers)
                        coords.append(Vec3(pos[0], pos[1], pos[2]))
            self._positions.append(coords * nanometers)
        ## The atom positions read from the PDB file.  If the file contains multiple frames, these are the positions in the first frame.
        self.positions = self._positions[0]
        self.topology.setUnitCellDimensions(pdb.get_unit_cell_dimensions())
        self.topology.createStandardBonds()
        self.topology.createDisulfideBonds(self.positions)
        self._numpyPositions = None

        # Add bonds based on CONECT records.

        connectBonds = []
        for connect in pdb.models[0].connects:
            i = connect[0]
            for j in connect[1:]:
                if i in atomByNumber and j in atomByNumber:
                    connectBonds.append((atomByNumber[i], atomByNumber[j]))
        if len(connectBonds) > 0:
            # Only add bonds that don't already exist.
            existingBonds = set(top.bonds())
            for bond in connectBonds:
                if bond not in existingBonds and (
                        bond[1], bond[0]) not in existingBonds:
                    top.addBond(bond[0], bond[1])
                    existingBonds.add(bond)
Esempio n. 7
0
    def __init__(self, file, unitCellDimensions=None, includeDir='/usr/local/gromacs/share/gromacs/top', defines={}):
        """Load a top file.

        Parameters:
         - file (string) the name of the file to load
         - unitCellDimensions (Vec3=None) the dimensions of the crystallographic unit cell
         - includeDir (string=/usr/local/gromacs/share/gromacs/top) a directory in which to look for other files
           included from the top file
         - defines (map={}) preprocessor definitions that should be predefined when parsing the file
         """
        self._includeDirs = (os.path.dirname(file), includeDir)
        self._defines = defines

        # Parse the file.

        self._currentCategory = None
        self._ifStack = []
        self._moleculeTypes = {}
        self._molecules = []
        self._currentMoleculeType = None
        self._atomTypes = {}
        self._bondTypes= {}
        self._angleTypes = {}
        self._dihedralTypes = {}
        self._implicitTypes = {}
        self._pairTypes = {}
        self._cmapTypes = {}
        self._processFile(file)

        # Create the Topology from it.

        top = Topology()
        ## The Topology read from the prmtop file
        self.topology = top
        top.setUnitCellDimensions(unitCellDimensions)
        PDBFile._loadNameReplacementTables()
        for moleculeName, moleculeCount in self._molecules:
            if moleculeName not in self._moleculeTypes:
                raise ValueError("Unknown molecule type: "+moleculeName)
            moleculeType = self._moleculeTypes[moleculeName]

            # Create the specified number of molecules of this type.

            for i in range(moleculeCount):
                atoms = []
                lastResidue = None
                c = top.addChain()
                for index, fields in enumerate(moleculeType.atoms):
                    resNumber = fields[2]
                    if resNumber != lastResidue:
                        lastResidue = resNumber
                        resName = fields[3]
                        if resName in PDBFile._residueNameReplacements:
                            resName = PDBFile._residueNameReplacements[resName]
                        r = top.addResidue(resName, c)
                        if resName in PDBFile._atomNameReplacements:
                            atomReplacements = PDBFile._atomNameReplacements[resName]
                        else:
                            atomReplacements = {}
                    atomName = fields[4]
                    if atomName in atomReplacements:
                        atomName = atomReplacements[atomName]

                    # Try to guess the element.

                    upper = atomName.upper()
                    if upper.startswith('CL'):
                        element = elem.chlorine
                    elif upper.startswith('NA'):
                        element = elem.sodium
                    elif upper.startswith('MG'):
                        element = elem.magnesium
                    else:
                        try:
                            element = elem.get_by_symbol(atomName[0])
                        except KeyError:
                            element = None
                    atoms.append(top.addAtom(atomName, element, r))

                # Add bonds to the topology

                for fields in moleculeType.bonds:
                    top.addBond(atoms[int(fields[0])-1], atoms[int(fields[1])-1])
Esempio n. 8
0
    def __init__(self, file):
        """Load a PDBx/mmCIF file.

        The atom positions and Topology can be retrieved by calling getPositions() and getTopology().

        Parameters:
         - file (string) the name of the file to load.  Alternatively you can pass an open file object.
        """
        top = Topology()
        ## The Topology read from the PDBx/mmCIF file
        self.topology = top
        self._positions = []

        # Load the file.

        inputFile = file
        if isinstance(file, str):
            inputFile = open(file)
        reader = PdbxReader(inputFile)
        data = []
        reader.read(data)
        block = data[0]

        # Build the topology.

        atomData = block.getObj('atom_site')
        atomNameCol = atomData.getAttributeIndex('label_atom_id')
        atomIdCol = atomData.getAttributeIndex('id')
        resNameCol = atomData.getAttributeIndex('label_comp_id')
        resIdCol = atomData.getAttributeIndex('label_seq_id')
        asymIdCol = atomData.getAttributeIndex('label_asym_id')
        chainIdCol = atomData.getAttributeIndex('label_entity_id')
        elementCol = atomData.getAttributeIndex('type_symbol')
        altIdCol = atomData.getAttributeIndex('label_alt_id')
        modelCol = atomData.getAttributeIndex('pdbx_PDB_model_num')
        xCol = atomData.getAttributeIndex('Cartn_x')
        yCol = atomData.getAttributeIndex('Cartn_y')
        zCol = atomData.getAttributeIndex('Cartn_z')
        lastChainId = None
        lastResId = None
        lastAsymId = None
        atomTable = {}
        models = []
        for row in atomData.getRowList():
            atomKey = ((row[resIdCol], row[asymIdCol], row[atomNameCol]))
            model = ('1' if modelCol == -1 else row[modelCol])
            if model not in models:
                models.append(model)
                self._positions.append([])
            modelIndex = models.index(model)
            if row[altIdCol] != '.' and atomKey in atomTable and len(self._positions[modelIndex]) > atomTable[atomKey].index:
                # This row is an alternate position for an existing atom, so ignore it.

                continue
            if modelIndex == 0:
                # This row defines a new atom.

                if lastChainId != row[chainIdCol]:
                    # The start of a new chain.
                    chain = top.addChain()
                    lastChainId = row[chainIdCol]
                    lastResId = None
                    lastAsymId = None
                if lastResId != row[resIdCol] or lastAsymId != row[asymIdCol]:
                    # The start of a new residue.
                    res = top.addResidue(row[resNameCol], chain)
                    lastResId = row[resIdCol]
                    if lastResId == '.':
                        lastResId = None
                    lastAsymId = row[asymIdCol]
                element = None
                try:
                    element = elem.get_by_symbol(row[elementCol])
                except KeyError:
                    pass
                atom = top.addAtom(row[atomNameCol], element, res)
                atomTable[atomKey] = atom
            else:
                # This row defines coordinates for an existing atom in one of the later models.

                try:
                    atom = atomTable[atomKey]
                except KeyError:
                    raise ValueError('Unknown atom %s in residue %s %s for model %s' % (row[atomNameCol], row[resNameCol], row[resIdCol], model))
                if atom.index != len(self._positions[modelIndex]):
                    raise ValueError('Atom %s for model %s does not match the order of atoms for model %s' % (row[atomIdCol], model, models[0]))
            self._positions[modelIndex].append(Vec3(float(row[xCol]), float(row[yCol]), float(row[zCol]))*0.1)
        for i in range(len(self._positions)):
            self._positions[i] = self._positions[i]*nanometers
        ## The atom positions read from the PDBx/mmCIF file.  If the file contains multiple frames, these are the positions in the first frame.
        self.positions = self._positions[0]
        self.topology.createStandardBonds()
        self._numpyPositions = None

        # Record unit cell information, if present.

        cell = block.getObj('cell')
        if cell is not None and cell.getRowCount() > 0:
            row = cell.getRow(0)
            cellSize = [float(row[cell.getAttributeIndex(attribute)]) for attribute in ('length_a', 'length_b', 'length_c')]*angstroms
            self.topology.setUnitCellDimensions(cellSize)

        # Add bonds based on struct_conn records.

        connectData = block.getObj('struct_conn')
        if connectData is not None:
            res1Col = connectData.getAttributeIndex('ptnr1_label_seq_id')
            res2Col = connectData.getAttributeIndex('ptnr2_label_seq_id')
            atom1Col = connectData.getAttributeIndex('ptnr1_label_atom_id')
            atom2Col = connectData.getAttributeIndex('ptnr2_label_atom_id')
            asym1Col = connectData.getAttributeIndex('ptnr1_label_asym_id')
            asym2Col = connectData.getAttributeIndex('ptnr2_label_asym_id')
            typeCol = connectData.getAttributeIndex('conn_type_id')
            connectBonds = []
            for row in connectData.getRowList():
                type = row[typeCol][:6]
                if type in ('covale', 'disulf', 'modres'):
                    key1 = (row[res1Col], row[asym1Col], row[atom1Col])
                    key2 = (row[res2Col], row[asym2Col], row[atom2Col])
                    if key1 in atomTable and key2 in atomTable:
                        connectBonds.append((atomTable[key1], atomTable[key2]))
            if len(connectBonds) > 0:
                # Only add bonds that don't already exist.
                existingBonds = set(top.bonds())
                for bond in connectBonds:
                    if bond not in existingBonds and (bond[1], bond[0]) not in existingBonds:
                        top.addBond(bond[0], bond[1])
                        existingBonds.add(bond)
Esempio n. 9
0
    def __init__(self, file):
        """Load a PDB file.

        The atom positions and Topology can be retrieved by calling getPositions() and getTopology().

        Parameters:
         - file (string) the name of the file to load
        """
        top = Topology()
        ## The Topology read from the PDB file
        self.topology = top

        # Load the PDB file

        if isinstance(file, PdbStructure):
            pdb = file
        else:
            inputfile = file
            if isinstance(file, str):
                inputfile = open(file)
            pdb = PdbStructure(inputfile, load_all_models=True)
        PDBFile._loadNameReplacementTables()

        # Build the topology

        atomByNumber = {}
        for chain in pdb.iter_chains():
            c = top.addChain()
            for residue in chain.iter_residues():
                resName = residue.get_name()
                if resName in PDBFile._residueNameReplacements:
                    resName = PDBFile._residueNameReplacements[resName]
                r = top.addResidue(resName, c)
                if resName in PDBFile._atomNameReplacements:
                    atomReplacements = PDBFile._atomNameReplacements[resName]
                else:
                    atomReplacements = {}
                for atom in residue.atoms:
                    atomName = atom.get_name()
                    if atomName in atomReplacements:
                        atomName = atomReplacements[atomName]
                    atomName = atomName.strip()
                    element = atom.element
                    if element is None:
                        # Try to guess the element.

                        upper = atomName.upper()
                        if upper.startswith('CL'):
                            element = elem.chlorine
                        elif upper.startswith('NA'):
                            element = elem.sodium
                        elif upper.startswith('MG'):
                            element = elem.magnesium
                        elif upper.startswith('BE'):
                            element = elem.beryllium
                        elif upper.startswith('LI'):
                            element = elem.lithium
                        elif upper.startswith('K'):
                            element = elem.potassium
                        elif upper.startswith('ZN'):
                            element = elem.zinc
                        elif( len( residue ) == 1 and upper.startswith('CA') ):
                            element = elem.calcium
                        else:
                            try:
                                element = elem.get_by_symbol(atomName[0])
                            except KeyError:
                                pass
                    newAtom = top.addAtom(atomName, element, r)
                    atomByNumber[atom.serial_number] = newAtom
        self._positions = []
        for model in pdb.iter_models(True):
            coords = []
            for chain in model.iter_chains():
                for residue in chain.iter_residues():
                    for atom in residue.atoms:
                        pos = atom.get_position().value_in_unit(nanometers)
                        coords.append(Vec3(pos[0], pos[1], pos[2]))
            self._positions.append(coords*nanometers)
        ## The atom positions read from the PDB file.  If the file contains multiple frames, these are the positions in the first frame.
        self.positions = self._positions[0]
        self.topology.setUnitCellDimensions(pdb.get_unit_cell_dimensions())
        self.topology.createStandardBonds()
        self.topology.createDisulfideBonds(self.positions)
        self._numpyPositions = None

        # Add bonds based on CONECT records.

        connectBonds = []
        for connect in pdb.models[0].connects:
            i = connect[0]
            for j in connect[1:]:
                if i in atomByNumber and j in atomByNumber:
                    connectBonds.append((atomByNumber[i], atomByNumber[j]))
        if len(connectBonds) > 0:
            # Only add bonds that don't already exist.
            existingBonds = set(top.bonds())
            for bond in connectBonds:
                if bond not in existingBonds and (bond[1], bond[0]) not in existingBonds:
                    top.addBond(bond[0], bond[1])
                    existingBonds.add(bond)
Esempio n. 10
0
    def __init__(self, file):
        """Load a PDBx/mmCIF file.

        The atom positions and Topology can be retrieved by calling getPositions() and getTopology().

        Parameters:
         - file (string) the name of the file to load.  Alternatively you can pass an open file object.
        """
        top = Topology()
        ## The Topology read from the PDBx/mmCIF file
        self.topology = top
        self._positions = []

        # Load the file.

        inputFile = file
        if isinstance(file, str):
            inputFile = open(file)
        reader = PdbxReader(inputFile)
        data = []
        reader.read(data)
        block = data[0]

        # Build the topology.

        atomData = block.getObj('atom_site')
        atomNameCol = atomData.getAttributeIndex('label_atom_id')
        atomIdCol = atomData.getAttributeIndex('id')
        resNameCol = atomData.getAttributeIndex('label_comp_id')
        resIdCol = atomData.getAttributeIndex('label_seq_id')
        asymIdCol = atomData.getAttributeIndex('label_asym_id')
        chainIdCol = atomData.getAttributeIndex('label_entity_id')
        elementCol = atomData.getAttributeIndex('type_symbol')
        altIdCol = atomData.getAttributeIndex('label_alt_id')
        modelCol = atomData.getAttributeIndex('pdbx_PDB_model_num')
        xCol = atomData.getAttributeIndex('Cartn_x')
        yCol = atomData.getAttributeIndex('Cartn_y')
        zCol = atomData.getAttributeIndex('Cartn_z')
        lastChainId = None
        lastResId = None
        lastAsymId = None
        atomTable = {}
        models = []
        for row in atomData.getRowList():
            atomKey = ((row[resIdCol], row[asymIdCol], row[atomNameCol]))
            model = ('1' if modelCol == -1 else row[modelCol])
            if model not in models:
                models.append(model)
                self._positions.append([])
            modelIndex = models.index(model)
            if row[altIdCol] != '.' and atomKey in atomTable and len(
                    self._positions[modelIndex]) > atomTable[atomKey].index:
                # This row is an alternate position for an existing atom, so ignore it.

                continue
            if modelIndex == 0:
                # This row defines a new atom.

                if lastChainId != row[chainIdCol]:
                    # The start of a new chain.
                    chain = top.addChain()
                    lastChainId = row[chainIdCol]
                    lastResId = None
                    lastAsymId = None
                if lastResId != row[resIdCol] or lastAsymId != row[asymIdCol]:
                    # The start of a new residue.
                    res = top.addResidue(row[resNameCol], chain)
                    lastResId = row[resIdCol]
                    if lastResId == '.':
                        lastResId = None
                    lastAsymId = row[asymIdCol]
                element = None
                try:
                    element = elem.get_by_symbol(row[elementCol])
                except KeyError:
                    pass
                atom = top.addAtom(row[atomNameCol], element, res)
                atomTable[atomKey] = atom
            else:
                # This row defines coordinates for an existing atom in one of the later models.

                try:
                    atom = atomTable[atomKey]
                except KeyError:
                    raise ValueError(
                        'Unknown atom %s in residue %s %s for model %s' %
                        (row[atomNameCol], row[resNameCol], row[resIdCol],
                         model))
                if atom.index != len(self._positions[modelIndex]):
                    raise ValueError(
                        'Atom %s for model %s does not match the order of atoms for model %s'
                        % (row[atomIdCol], model, models[0]))
            self._positions[modelIndex].append(
                Vec3(float(row[xCol]), float(row[yCol]), float(row[zCol])) *
                0.1)
        for i in range(len(self._positions)):
            self._positions[i] = self._positions[i] * nanometers
        ## The atom positions read from the PDBx/mmCIF file.  If the file contains multiple frames, these are the positions in the first frame.
        self.positions = self._positions[0]
        self.topology.createStandardBonds()
        self._numpyPositions = None

        # Record unit cell information, if present.

        cell = block.getObj('cell')
        if cell is not None and cell.getRowCount() > 0:
            row = cell.getRow(0)
            cellSize = [
                float(row[cell.getAttributeIndex(attribute)])
                for attribute in ('length_a', 'length_b', 'length_c')
            ] * angstroms
            self.topology.setUnitCellDimensions(cellSize)

        # Add bonds based on struct_conn records.

        connectData = block.getObj('struct_conn')
        if connectData is not None:
            res1Col = connectData.getAttributeIndex('ptnr1_label_seq_id')
            res2Col = connectData.getAttributeIndex('ptnr2_label_seq_id')
            atom1Col = connectData.getAttributeIndex('ptnr1_label_atom_id')
            atom2Col = connectData.getAttributeIndex('ptnr2_label_atom_id')
            asym1Col = connectData.getAttributeIndex('ptnr1_label_asym_id')
            asym2Col = connectData.getAttributeIndex('ptnr2_label_asym_id')
            typeCol = connectData.getAttributeIndex('conn_type_id')
            connectBonds = []
            for row in connectData.getRowList():
                type = row[typeCol][:6]
                if type in ('covale', 'disulf', 'modres'):
                    key1 = (row[res1Col], row[asym1Col], row[atom1Col])
                    key2 = (row[res2Col], row[asym2Col], row[atom2Col])
                    if key1 in atomTable and key2 in atomTable:
                        connectBonds.append((atomTable[key1], atomTable[key2]))
            if len(connectBonds) > 0:
                # Only add bonds that don't already exist.
                existingBonds = set(top.bonds())
                for bond in connectBonds:
                    if bond not in existingBonds and (
                            bond[1], bond[0]) not in existingBonds:
                        top.addBond(bond[0], bond[1])
                        existingBonds.add(bond)
Esempio n. 11
0
    def __init__(self, pdb_line):
        """Create a new pdb.Atom from an ATOM or HETATM line.

        Example line:
        ATOM   2209  CB  TYR A 299       6.167  22.607  20.046  1.00  8.12           C
        00000000011111111112222222222333333333344444444445555555555666666666677777777778
        12345678901234567890123456789012345678901234567890123456789012345678901234567890

        ATOM line format description from
          http://deposit.rcsb.org/adit/docs/pdb_atom_format.html:

        COLUMNS        DATA TYPE       CONTENTS
        --------------------------------------------------------------------------------
         1 -  6        Record name     "ATOM  "
         7 - 11        Integer         Atom serial number.
        13 - 16        Atom            Atom name.
        17             Character       Alternate location indicator.
        18 - 20        Residue name    Residue name.
        22             Character       Chain identifier.
        23 - 26        Integer         Residue sequence number.
        27             AChar           Code for insertion of residues.
        31 - 38        Real(8.3)       Orthogonal coordinates for X in Angstroms.
        39 - 46        Real(8.3)       Orthogonal coordinates for Y in Angstroms.
        47 - 54        Real(8.3)       Orthogonal coordinates for Z in Angstroms.
        55 - 60        Real(6.2)       Occupancy (Default = 1.0).
        61 - 66        Real(6.2)       Temperature factor (Default = 0.0).
        73 - 76        LString(4)      Segment identifier, left-justified.
        77 - 78        LString(2)      Element symbol, right-justified.
        79 - 80        LString(2)      Charge on the atom.

        """
        # We might modify first/final status during _finalize() methods
        self.is_first_atom_in_chain = False
        self.is_final_atom_in_chain = False
        self.is_first_residue_in_chain = False
        self.is_final_residue_in_chain = False
        # Start parsing fields from pdb line
        self.record_name = pdb_line[0:6].strip()
        self.serial_number = int(pdb_line[6:11])
        self.name_with_spaces = pdb_line[12:16]
        alternate_location_indicator = pdb_line[16]

        self.residue_name_with_spaces = pdb_line[17:20]
        # In some MD codes, notably ffamber in gromacs, residue name has a fourth character in
        # column 21
        possible_fourth_character = pdb_line[20:21]
        if possible_fourth_character != " ":
            # Fourth character should only be there if official 3 are already full
            if len(self.residue_name_with_spaces.strip()) != 3:
                raise ValueError('Misaligned residue name: %s' % pdb_line)
            self.residue_name_with_spaces += possible_fourth_character
        self.residue_name = self.residue_name_with_spaces.strip()

        self.chain_id = pdb_line[21]
        self.residue_number = int(pdb_line[22:26])
        self.insertion_code = pdb_line[26]
        # coordinates, occupancy, and temperature factor belong in Atom.Location object
        x = float(pdb_line[30:38])
        y = float(pdb_line[38:46])
        z = float(pdb_line[46:54])
        try:
            occupancy = float(pdb_line[54:60])
        except:
            occupancy = 1.0
        try:
            temperature_factor = float(pdb_line[60:66])
        except:
            temperature_factor = 0.0
        self.locations = {}
        loc = Atom.Location(alternate_location_indicator, np.array([x,y,z]), occupancy, temperature_factor, self.residue_name_with_spaces)
        self.locations[alternate_location_indicator] = loc
        self.default_location_id = alternate_location_indicator
        # segment id, element_symbol, and formal_charge are not always present
        self.segment_id = pdb_line[72:76].strip()
        self.element_symbol = pdb_line[76:78].strip()
        try: self.formal_charge = int(pdb_line[78:80])
        except ValueError: self.formal_charge = None
        # figure out atom element
        try:
            # First try to find a sensible element symbol from columns 76-77
            self.element = element.get_by_symbol(self.element_symbol)
        except KeyError:
            # otherwise, deduce element from first two characters of atom name
            # remove digits found in some hydrogen atom names
            symbol = self.name_with_spaces[0:2].strip().lstrip("0123456789")
            try:
                # Some molecular dynamics PDB files, such as gromacs with ffamber force
                # field, include 4-character hydrogen atom names beginning with "H".
                # Hopefully elements like holmium (Ho) and mercury (Hg) will have fewer than four
                # characters in the atom name.  This problem is the fault of molecular
                # dynamics code authors who feel the need to make up their own atom
                # nomenclature because it is too tedious to read that provided by the PDB.
                # These are the same folks who invent their own meanings for biochemical terms
                # like "dipeptide".  Clowntards.
                if len(self.name) == 4 and self.name[0:1] == "H":
                    self.element = element.hydrogen
                else:
                    self.element = element.get_by_symbol(symbol)
            except KeyError:
                # OK, I give up
                self.element = None
Esempio n. 12
0
    def __init__(self, file):
        """Load a PDBx/mmCIF file.

        The atom positions and Topology can be retrieved by calling getPositions() and getTopology().

        Parameters:
         - file (string) the name of the file to load.  Alternatively you can pass an open file object.
        """
        top = Topology()
        ## The Topology read from the PDBx/mmCIF file
        self.topology = top
        self._positions = []

        # Load the file.

        inputFile = file
        if isinstance(file, str):
            inputFile = open(file)
        reader = PdbxReader(inputFile)
        data = []
        reader.read(data)
        block = data[0]

        # Build the topology.

        atomData = block.getObj("atom_site")
        atomNameCol = atomData.getAttributeIndex("label_atom_id")
        atomIdCol = atomData.getAttributeIndex("id")
        resNameCol = atomData.getAttributeIndex("label_comp_id")
        resIdCol = atomData.getAttributeIndex("label_seq_id")
        resNumCol = atomData.getAttributeIndex("auth_seq_id")
        asymIdCol = atomData.getAttributeIndex("label_asym_id")
        chainIdCol = atomData.getAttributeIndex("label_entity_id")
        elementCol = atomData.getAttributeIndex("type_symbol")
        altIdCol = atomData.getAttributeIndex("label_alt_id")
        modelCol = atomData.getAttributeIndex("pdbx_PDB_model_num")
        xCol = atomData.getAttributeIndex("Cartn_x")
        yCol = atomData.getAttributeIndex("Cartn_y")
        zCol = atomData.getAttributeIndex("Cartn_z")
        lastChainId = None
        lastResId = None
        lastAsymId = None
        atomTable = {}
        atomsInResidue = set()
        models = []
        for row in atomData.getRowList():
            atomKey = (row[resIdCol], row[asymIdCol], row[atomNameCol])
            model = "1" if modelCol == -1 else row[modelCol]
            if model not in models:
                models.append(model)
                self._positions.append([])
            modelIndex = models.index(model)
            if (
                row[altIdCol] != "."
                and atomKey in atomTable
                and len(self._positions[modelIndex]) > atomTable[atomKey].index
            ):
                # This row is an alternate position for an existing atom, so ignore it.

                continue
            if modelIndex == 0:
                # This row defines a new atom.

                if lastChainId != row[chainIdCol]:
                    # The start of a new chain.
                    chain = top.addChain(row[asymIdCol])
                    lastChainId = row[chainIdCol]
                    lastResId = None
                    lastAsymId = None
                if (
                    lastResId != row[resIdCol]
                    or lastAsymId != row[asymIdCol]
                    or (lastResId == "." and row[atomNameCol] in atomsInResidue)
                ):
                    # The start of a new residue.
                    res = top.addResidue(row[resNameCol], chain, None if resNumCol == -1 else row[resNumCol])
                    lastResId = row[resIdCol]
                    lastAsymId = row[asymIdCol]
                    atomsInResidue.clear()
                element = None
                try:
                    element = elem.get_by_symbol(row[elementCol])
                except KeyError:
                    pass
                atom = top.addAtom(row[atomNameCol], element, res, row[atomIdCol])
                atomTable[atomKey] = atom
                atomsInResidue.add(row[atomNameCol])
            else:
                # This row defines coordinates for an existing atom in one of the later models.

                try:
                    atom = atomTable[atomKey]
                except KeyError:
                    raise ValueError(
                        "Unknown atom %s in residue %s %s for model %s"
                        % (row[atomNameCol], row[resNameCol], row[resIdCol], model)
                    )
                if atom.index != len(self._positions[modelIndex]):
                    raise ValueError(
                        "Atom %s for model %s does not match the order of atoms for model %s"
                        % (row[atomIdCol], model, models[0])
                    )
            self._positions[modelIndex].append(Vec3(float(row[xCol]), float(row[yCol]), float(row[zCol])) * 0.1)
        for i in range(len(self._positions)):
            self._positions[i] = self._positions[i] * nanometers
        ## The atom positions read from the PDBx/mmCIF file.  If the file contains multiple frames, these are the positions in the first frame.
        self.positions = self._positions[0]
        self.topology.createStandardBonds()
        self._numpyPositions = None

        # Record unit cell information, if present.

        cell = block.getObj("cell")
        if cell is not None and cell.getRowCount() > 0:
            row = cell.getRow(0)
            (a, b, c) = [
                float(row[cell.getAttributeIndex(attribute)]) * 0.1
                for attribute in ("length_a", "length_b", "length_c")
            ]
            (alpha, beta, gamma) = [
                float(row[cell.getAttributeIndex(attribute)]) * math.pi / 180.0
                for attribute in ("angle_alpha", "angle_beta", "angle_gamma")
            ]
            self.topology.setPeriodicBoxVectors(computePeriodicBoxVectors(a, b, c, alpha, beta, gamma))

        # Add bonds based on struct_conn records.

        connectData = block.getObj("struct_conn")
        if connectData is not None:
            res1Col = connectData.getAttributeIndex("ptnr1_label_seq_id")
            res2Col = connectData.getAttributeIndex("ptnr2_label_seq_id")
            atom1Col = connectData.getAttributeIndex("ptnr1_label_atom_id")
            atom2Col = connectData.getAttributeIndex("ptnr2_label_atom_id")
            asym1Col = connectData.getAttributeIndex("ptnr1_label_asym_id")
            asym2Col = connectData.getAttributeIndex("ptnr2_label_asym_id")
            typeCol = connectData.getAttributeIndex("conn_type_id")
            connectBonds = []
            for row in connectData.getRowList():
                type = row[typeCol][:6]
                if type in ("covale", "disulf", "modres"):
                    key1 = (row[res1Col], row[asym1Col], row[atom1Col])
                    key2 = (row[res2Col], row[asym2Col], row[atom2Col])
                    if key1 in atomTable and key2 in atomTable:
                        connectBonds.append((atomTable[key1], atomTable[key2]))
            if len(connectBonds) > 0:
                # Only add bonds that don't already exist.
                existingBonds = set(top.bonds())
                for bond in connectBonds:
                    if bond not in existingBonds and (bond[1], bond[0]) not in existingBonds:
                        top.addBond(bond[0], bond[1])
                        existingBonds.add(bond)
Esempio n. 13
0
    def __init__(self, pdb_line):
        """Create a new pdb.Atom from an ATOM or HETATM line.

        Example line:
        ATOM   2209  CB  TYR A 299       6.167  22.607  20.046  1.00  8.12           C
        00000000011111111112222222222333333333344444444445555555555666666666677777777778
        12345678901234567890123456789012345678901234567890123456789012345678901234567890

        ATOM line format description from
          http://deposit.rcsb.org/adit/docs/pdb_atom_format.html:

        COLUMNS        DATA TYPE       CONTENTS
        --------------------------------------------------------------------------------
         1 -  6        Record name     "ATOM  "
         7 - 11        Integer         Atom serial number.
        13 - 16        Atom            Atom name.
        17             Character       Alternate location indicator.
        18 - 20        Residue name    Residue name.
        22             Character       Chain identifier.
        23 - 26        Integer         Residue sequence number.
        27             AChar           Code for insertion of residues.
        31 - 38        Real(8.3)       Orthogonal coordinates for X in Angstroms.
        39 - 46        Real(8.3)       Orthogonal coordinates for Y in Angstroms.
        47 - 54        Real(8.3)       Orthogonal coordinates for Z in Angstroms.
        55 - 60        Real(6.2)       Occupancy (Default = 1.0).
        61 - 66        Real(6.2)       Temperature factor (Default = 0.0).
        73 - 76        LString(4)      Segment identifier, left-justified.
        77 - 78        LString(2)      Element symbol, right-justified.
        79 - 80        LString(2)      Charge on the atom.

        """
        # We might modify first/final status during _finalize() methods
        self.is_first_atom_in_chain = False
        self.is_final_atom_in_chain = False
        self.is_first_residue_in_chain = False
        self.is_final_residue_in_chain = False
        # Start parsing fields from pdb line
        self.record_name = pdb_line[0:6].strip()
        self.serial_number = int(pdb_line[6:11])
        self.name_with_spaces = pdb_line[12:16]
        alternate_location_indicator = pdb_line[16]

        self.residue_name_with_spaces = pdb_line[17:20]
        # In some MD codes, notably ffamber in gromacs, residue name has a fourth character in
        # column 21
        possible_fourth_character = pdb_line[20:21]
        if possible_fourth_character != " ":
            # Fourth character should only be there if official 3 are already full
            if len(self.residue_name_with_spaces.strip()) != 3:
                raise ValueError('Misaligned residue name: %s' % pdb_line)
            self.residue_name_with_spaces += possible_fourth_character
        self.residue_name = self.residue_name_with_spaces.strip()

        self.chain_id = pdb_line[21]
        self.residue_number = int(pdb_line[22:26])
        self.insertion_code = pdb_line[26]
        # coordinates, occupancy, and temperature factor belong in Atom.Location object
        x = float(pdb_line[30:38])
        y = float(pdb_line[38:46])
        z = float(pdb_line[46:54])
        try:
            occupancy = float(pdb_line[54:60])
        except:
            occupancy = 1.0
        try:
            temperature_factor = float(pdb_line[60:66])
        except:
            temperature_factor = 0.0
        self.locations = {}
        loc = Atom.Location(alternate_location_indicator, np.array([x, y, z]),
                            occupancy, temperature_factor,
                            self.residue_name_with_spaces)
        self.locations[alternate_location_indicator] = loc
        self.default_location_id = alternate_location_indicator
        # segment id, element_symbol, and formal_charge are not always present
        self.segment_id = pdb_line[72:76].strip()
        self.element_symbol = pdb_line[76:78].strip()
        try:
            self.formal_charge = int(pdb_line[78:80])
        except ValueError:
            self.formal_charge = None
        # figure out atom element
        try:
            # First try to find a sensible element symbol from columns 76-77
            self.element = element.get_by_symbol(self.element_symbol)
        except KeyError:
            # otherwise, deduce element from first two characters of atom name
            # remove digits found in some hydrogen atom names
            symbol = self.name_with_spaces[0:2].strip().lstrip("0123456789")
            try:
                # Some molecular dynamics PDB files, such as gromacs with ffamber force
                # field, include 4-character hydrogen atom names beginning with "H".
                # Hopefully elements like holmium (Ho) and mercury (Hg) will have fewer than four
                # characters in the atom name.  This problem is the fault of molecular
                # dynamics code authors who feel the need to make up their own atom
                # nomenclature because it is too tedious to read that provided by the PDB.
                # These are the same folks who invent their own meanings for biochemical terms
                # like "dipeptide".  Clowntards.
                if len(self.name) == 4 and self.name[0:1] == "H":
                    self.element = element.hydrogen
                else:
                    self.element = element.get_by_symbol(symbol)
            except KeyError:
                # OK, I give up
                self.element = None
Esempio n. 14
0
    def __init__(self, file):
        """Load a prmtop file."""
        top = Topology()
        ## The Topology read from the prmtop file
        self.topology = top
        self.elements = []

        # Load the prmtop file

        prmtop = amber_file_parser.PrmtopLoader(file)
        self._prmtop = prmtop

        # Add atoms to the topology

        PDBFile._loadNameReplacementTables()
        lastResidue = None
        c = top.addChain()
        for index in range(prmtop.getNumAtoms()):
            resNumber = prmtop.getResidueNumber(index)
            if resNumber != lastResidue:
                lastResidue = resNumber
                resName = prmtop.getResidueLabel(iAtom=index).strip()
                if resName in PDBFile._residueNameReplacements:
                    resName = PDBFile._residueNameReplacements[resName]
                r = top.addResidue(resName, c)
                if resName in PDBFile._atomNameReplacements:
                    atomReplacements = PDBFile._atomNameReplacements[resName]
                else:
                    atomReplacements = {}
            atomName = prmtop.getAtomName(index).strip()
            if atomName in atomReplacements:
                atomName = atomReplacements[atomName]

            # Get the element from the prmtop file if available
            if prmtop.has_atomic_number:
                try:
                    element = elem.Element.getByAtomicNumber(
                        int(prmtop._raw_data['ATOMIC_NUMBER'][index]))
                except KeyError:
                    element = None
            else:
                # Try to guess the element from the atom name.

                upper = atomName.upper()
                if upper.startswith('CL'):
                    element = elem.chlorine
                elif upper.startswith('NA'):
                    element = elem.sodium
                elif upper.startswith('MG'):
                    element = elem.magnesium
                elif upper.startswith('ZN'):
                    element = elem.zinc
                else:
                    try:
                        element = elem.get_by_symbol(atomName[0])
                    except KeyError:
                        element = None

            top.addAtom(atomName, element, r)
            self.elements.append(element)

        # Add bonds to the topology

        atoms = list(top.atoms())
        for bond in prmtop.getBondsWithH():
            top.addBond(atoms[bond[0]], atoms[bond[1]])
        for bond in prmtop.getBondsNoH():
            top.addBond(atoms[bond[0]], atoms[bond[1]])

        # Set the periodic box size.

        if prmtop.getIfBox():
            top.setUnitCellDimensions(
                tuple(
                    x.value_in_unit(unit.nanometer)
                    for x in prmtop.getBoxBetaAndDimensions()[1:4]) *
                unit.nanometer)
Esempio n. 15
0
    def __init__(self, file):
        """Load a .gro file.

        The atom positions can be retrieved by calling getPositions().

        Parameters:
         - file (string) the name of the file to load
        """

        xyzs     = []
        elements = [] # The element, most useful for quantum chemistry calculations
        atomname = [] # The atom name, for instance 'HW1'
        comms    = []
        resid    = []
        resname  = []
        boxes    = []
        xyz      = []
        ln       = 0
        frame    = 0
        for line in open(file):
            if ln == 0:
                comms.append(line.strip())
            elif ln == 1:
                na = int(line.strip())
            elif _is_gro_coord(line):
                if frame == 0: # Create the list of residues, atom names etc. only if it's the first frame.
                    (thisresnum, thisresname, thisatomname) = [line[i*5:i*5+5].strip() for i in range(3)]
                    resname.append(thisresname)
                    resid.append(int(thisresnum))
                    atomname.append(thisatomname)
                    thiselem = thisatomname
                    if len(thiselem) > 1:
                        thiselem = thiselem[0] + sub('[A-Z0-9]','',thiselem[1:])
                        try:
                            elements.append(elem.get_by_symbol(thiselem))
                        except KeyError:
                            elements.append(None)
                firstDecimalPos = line.index('.', 20)
                secondDecimalPos = line.index('.', firstDecimalPos+1)
                digits = secondDecimalPos-firstDecimalPos
                pos = [float(line[20+i*digits:20+(i+1)*digits]) for i in range(3)]
                xyz.append(Vec3(pos[0], pos[1], pos[2]))
            elif _is_gro_box(line) and ln == na + 2:
                sline = line.split()
                boxes.append(tuple([float(i) for i in sline])*nanometers)
                xyzs.append(xyz*nanometers)
                xyz = []
                ln = -1
                frame += 1
            else:
                raise Exception("Unexpected line in .gro file: "+line)
            ln += 1

        ## The atom positions read from the file.  If the file contains multiple frames, these are the positions in the first frame.
        self.positions = xyzs[0]
        ## A list containing the element of each atom stored in the file
        self.elements = elements
        ## A list containing the name of each atom stored in the file
        self.atomNames = atomname
        ## A list containing the ID of the residue that each atom belongs to
        self.residueIds = resid
        ## A list containing the name of the residue that each atom belongs to
        self.residueNames = resname
        self._positions = xyzs
        self._unitCellDimensions = boxes
        self._numpyPositions = None
Esempio n. 16
0
    def __init__(self, file):
        """Load a .gro file.

        The atom positions can be retrieved by calling getPositions().

        Parameters:
         - file (string) the name of the file to load
        """

        xyzs = []
        elements = [
        ]  # The element, most useful for quantum chemistry calculations
        atomname = []  # The atom name, for instance 'HW1'
        comms = []
        resid = []
        resname = []
        boxes = []
        xyz = []
        ln = 0
        frame = 0
        for line in open(file):
            if ln == 0:
                comms.append(line.strip())
            elif ln == 1:
                na = int(line.strip())
            elif _is_gro_coord(line):
                if frame == 0:  # Create the list of residues, atom names etc. only if it's the first frame.
                    (thisresnum, thisresname, thisatomname) = [
                        line[i * 5:i * 5 + 5].strip() for i in range(3)
                    ]
                    resname.append(thisresname)
                    resid.append(int(thisresnum))
                    atomname.append(thisatomname)
                    thiselem = thisatomname
                    if len(thiselem) > 1:
                        thiselem = thiselem[0] + sub('[A-Z0-9]', '',
                                                     thiselem[1:])
                        try:
                            elements.append(elem.get_by_symbol(thiselem))
                        except KeyError:
                            elements.append(None)
                firstDecimalPos = line.index('.', 20)
                secondDecimalPos = line.index('.', firstDecimalPos + 1)
                digits = secondDecimalPos - firstDecimalPos
                pos = [
                    float(line[20 + i * digits:20 + (i + 1) * digits])
                    for i in range(3)
                ]
                xyz.append(Vec3(pos[0], pos[1], pos[2]))
            elif _is_gro_box(line) and ln == na + 2:
                sline = line.split()
                boxes.append(_construct_box_vectors(line))
                xyzs.append(xyz * nanometers)
                xyz = []
                ln = -1
                frame += 1
            else:
                raise Exception("Unexpected line in .gro file: " + line)
            ln += 1

        ## The atom positions read from the file.  If the file contains multiple frames, these are the positions in the first frame.
        self.positions = xyzs[0]
        ## A list containing the element of each atom stored in the file
        self.elements = elements
        ## A list containing the name of each atom stored in the file
        self.atomNames = atomname
        ## A list containing the ID of the residue that each atom belongs to
        self.residueIds = resid
        ## A list containing the name of the residue that each atom belongs to
        self.residueNames = resname
        self._positions = xyzs
        self._periodicBoxVectors = boxes
        self._numpyPositions = None
Esempio n. 17
0
    def __init__(self, file):
        """Load a .gro file.
        
        The atom positions can be retrieved by calling getPositions().
        
        Parameters:
         - file (string) the name of the file to load
        """

        xyzs     = []
        elements = [] # The element, most useful for quantum chemistry calculations
        atomname = [] # The atom name, for instance 'HW1'
        comms    = []
        resid    = []
        resname  = []
        boxes    = []
        xyz      = []
        ln       = 0
        frame    = 0
        for line in open(file):
            sline = line.split()
            if ln == 0:
                comms.append(line.strip())
            elif ln == 1:
                na = int(line.strip())
            elif _is_gro_coord(line):
                if frame == 0: # Create the list of residues, atom names etc. only if it's the first frame.
                    # Name of the residue, for instance '153SOL1 -> SOL1' ; strips leading numbers
                    thisresname = sub('^[0-9]*','',sline[0])
                    resname.append(thisresname)
                    resid.append(int(sline[0].replace(thisresname,'')))
                    atomname.append(sline[1])
                    thiselem = sline[1]
                    if len(thiselem) > 1:
                        thiselem = thiselem[0] + sub('[A-Z0-9]','',thiselem[1:])
                        try:
                            elements.append(elem.get_by_symbol(thiselem))
                        except KeyError:
                            elements.append(None)
                pos = [float(i) for i in sline[-3:]]
                xyz.append(Vec3(pos[0], pos[1], pos[2]))
            elif _is_gro_box(line) and ln == na + 2:
                boxes.append([float(i) for i in sline]*nanometers)
                xyzs.append(xyz*nanometers)
                xyz = []
                ln = -1
                frame += 1
            else:
                raise Exception("Unexpected line in .gro file: "+line)
            ln += 1
            
        ## The atom positions read from the file.  If the file contains multiple frames, these are the positions in the first frame.
        self.positions = xyzs[0]
        ## A list containing the element of each atom stored in the file
        self.elements = elements
        ## A list containing the name of each atom stored in the file
        self.atomNames = atomname
        ## A list containing the ID of the residue that each atom belongs to
        self.residueIds = resid
        ## A list containing the name of the residue that each atom belongs to
        self.residueNames = resname
        self._positions = xyzs
        self._unitCellDimensions = boxes
        self._numpyPositions = None