Beispiel #1
0
class PDBTrajectoryFile(object):
    """Interface for reading and writing Protein Data Bank (PDB) files

    Parameters
    ----------
    filename : str
        The filename to open. A path to a file on disk.
    mode : {'r', 'w'}
        The mode in which to open the file, either 'r' for read or 'w' for write.
    force_overwrite : bool
        If opened in write mode, and a file by the name of `filename` already
        exists on disk, should we overwrite it?

    Attributes
    ----------
    positions : np.ndarray, shape=(n_frames, n_atoms, 3)
    topology : mdtraj.Topology
    closed : bool

    Notes
    -----
    When writing pdb files, mdtraj follows the PDB3.0 standard as closely as
    possible. During *reading* however, we try to be more lenient. For instance,
    we will parse common nonstandard atom names during reading, and convert them
    into the standard names. The replacement table used by mdtraj is at
    {mdtraj_source}/formats/pdb/data/pdbNames.xml.

    See Also
    --------
    mdtraj.load_pdb : High-level wrapper that returns a ``md.Trajectory``
    """
    distance_unit = 'angstroms'
    _residueNameReplacements = {}
    _atomNameReplacements = {}
    _chain_names = [chr(ord('A') + i) for i in range(26)]

    def __init__(self, filename, mode='r', force_overwrite=True):
        self._open = False
        self._file = None
        self._topology = None
        self._positions = None
        self._mode = mode
        self._last_topology = None

        if mode == 'r':
            PDBTrajectoryFile._loadNameReplacementTables()

            if _is_url(filename):
                self._file = urlopen(filename)
                if filename.lower().endswith('.gz'):
                    if six.PY3:
                        self._file = gzip.GzipFile(fileobj=self._file)
                    else:
                        self._file = gzip.GzipFile(fileobj=six.StringIO(
                            self._file.read()))
                if six.PY3:
                    self._file = six.StringIO(self._file.read().decode('utf-8'))
            else:
                if filename.lower().endswith('.gz'):
                    self._file = gzip.open(filename, 'r')
                    self._file = six.StringIO(self._file.read().decode('utf-8'))                    
                else:
                    self._file = open(filename, 'r')

            self._read_models()
        elif mode == 'w':
            self._header_written = False
            self._footer_written = False
            if os.path.exists(filename) and not force_overwrite:
                raise IOError('"%s" already exists' % filename)
            self._file = open(filename, 'w')
        else:
            raise ValueError("invalid mode: %s" % mode)

        self._open = True

    def write(self, positions, topology, modelIndex=None, unitcell_lengths=None, 
              unitcell_angles=None, bfactors=None):
        """Write a PDB file to disk

        Parameters
        ----------
        positions : array_like
            The list of atomic positions to write.
        topology : mdtraj.Topology
            The Topology defining the model to write.
        modelIndex : {int, None}
            If not None, the model will be surrounded by MODEL/ENDMDL records
            with this index
        unitcell_lengths : {tuple, None}
            Lengths of the three unit cell vectors, or None for a non-periodic system
        unitcell_angles : {tuple, None}
            Angles between the three unit cell vectors, or None for a non-periodic system
        bfactors : array_like, default=None, shape=(n_atoms,)
            Save bfactors with pdb file. Should contain a single number for
            each atom in the topology
        """
        if not self._mode == 'w':
            raise ValueError('file not opened for writing')
        if not self._header_written:
            self._write_header(unitcell_lengths, unitcell_angles)
            self._header_written = True

        if ilen(topology.atoms) != len(positions):
            raise ValueError('The number of positions must match the number of atoms')
        if np.any(np.isnan(positions)):
            raise ValueError('Particle position is NaN')
        if np.any(np.isinf(positions)):
            raise ValueError('Particle position is infinite')
        
        self._last_topology = topology  # Hack to save the topology of the last frame written, allows us to output CONECT entries in write_footer()

        if bfactors is None:
            bfactors = ['{0:5.2f}'.format(0.0)] * len(positions)
        else:
            if (np.max(bfactors) >= 100) or (np.min(bfactors) <= -10):
                raise ValueError("bfactors must be in (-10, 100)")

            bfactors = ['{0:5.2f}'.format(b) for b in bfactors]
        
        atomIndex = 1
        posIndex = 0
        if modelIndex is not None:
            print("MODEL     %4d" % modelIndex, file=self._file)
        for (chainIndex, chain) in enumerate(topology.chains):
            chainName = self._chain_names[chainIndex % len(self._chain_names)]
            residues = list(chain.residues)
            for (resIndex, res) in enumerate(residues):
                if len(res.name) > 3:
                    resName = res.name[:3]
                else:
                    resName = res.name
                for atom in res.atoms:
                    if len(atom.name) < 4 and atom.name[:1].isalpha() and (atom.element is None or len(atom.element.symbol) < 2):
                        atomName = ' '+atom.name
                    elif len(atom.name) > 4:
                        atomName = atom.name[:4]
                    else:
                        atomName = atom.name
                    coords = positions[posIndex]
                    if atom.element is not None:
                        symbol = atom.element.symbol
                    else:
                        symbol = ' '
                    line = "ATOM  %5d %-4s %3s %s%4d    %s%s%s  1.00 %s          %2s  " % (
                        atomIndex % 100000, atomName, resName, chainName,
                        (res.resSeq) % 10000, _format_83(coords[0]),
                        _format_83(coords[1]), _format_83(coords[2]),
                        bfactors[posIndex], symbol)
                    assert len(line) == 80, 'Fixed width overflow detected'
                    print(line, file=self._file)
                    posIndex += 1
                    atomIndex += 1
                if resIndex == len(residues)-1:
                    print("TER   %5d      %3s %s%4d" % (atomIndex, resName, chainName, res.resSeq), file=self._file)
                    atomIndex += 1

        if modelIndex is not None:
            print("ENDMDL", file=self._file)

    def _write_header(self, unitcell_lengths, unitcell_angles, write_metadata=True):
        """Write out the header for a PDB file.

        Parameters
        ----------
        unitcell_lengths : {tuple, None}
            The lengths of the three unitcell vectors, ``a``, ``b``, ``c``
        unitcell_angles : {tuple, None}
            The angles between the three unitcell vectors, ``alpha``,
            ``beta``, ``gamma``
        """
        if not self._mode == 'w':
            raise ValueError('file not opened for writing')

        if unitcell_lengths is None and unitcell_angles is None:
            return
        if unitcell_lengths is not None and unitcell_angles is not None:
            if not len(unitcell_lengths) == 3:
                raise ValueError('unitcell_lengths must be length 3')
            if not len(unitcell_angles) == 3:
                raise ValueError('unitcell_angles must be length 3')
        else:
            raise ValueError('either unitcell_lengths and unitcell_angles'
                             'should both be spefied, or neither')

        box = list(unitcell_lengths) + list(unitcell_angles)
        assert len(box) == 6

        if write_metadata:
            print("REMARK   1 CREATED WITH MDTraj %s, %s" % (version.version, str(date.today())), file=self._file)
        print("CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1           1 " % tuple(box), file=self._file)

    def _write_footer(self):
        if not self._mode == 'w':
            raise ValueError('file not opened for writing')

        # Identify bonds that should be listed as CONECT records.
        standardResidues = ['ALA', 'ASN', 'CYS', 'GLU', 'HIS', 'LEU', 'MET', 'PRO', 'THR', 'TYR',
                            'ARG', 'ASP', 'GLN', 'GLY', 'ILE', 'LYS', 'PHE', 'SER', 'TRP', 'VAL',
                            'A', 'G', 'C', 'U', 'I', 'DA', 'DG', 'DC', 'DT', 'DI', 'HOH']
        conectBonds = []
        if self._last_topology is not None:
            for atom1, atom2 in self._last_topology.bonds:
                if atom1.residue.name not in standardResidues or atom2.residue.name not in standardResidues:
                    conectBonds.append((atom1, atom2))
                elif atom1.name == 'SG' and atom2.name == 'SG' and atom1.residue.name == 'CYS' and atom2.residue.name == 'CYS':
                    conectBonds.append((atom1, atom2))
        if len(conectBonds) > 0:
            
            # Work out the index used in the PDB file for each atom.
            
            atomIndex = {}
            nextAtomIndex = 0
            prevChain = None
            for chain in self._last_topology.chains:
                for atom in chain.atoms:
                    if atom.residue.chain != prevChain:
                        nextAtomIndex += 1
                        prevChain = atom.residue.chain
                    atomIndex[atom] = nextAtomIndex
                    nextAtomIndex += 1
            
            # Record which other atoms each atom is bonded to.
            
            atomBonds = {}
            for atom1, atom2 in conectBonds:
                index1 = atomIndex[atom1]
                index2 = atomIndex[atom2]
                if index1 not in atomBonds:
                    atomBonds[index1] = []
                if index2 not in atomBonds:
                    atomBonds[index2] = []
                atomBonds[index1].append(index2)
                atomBonds[index2].append(index1)
            
            # Write the CONECT records.
            
            for index1 in sorted(atomBonds):
                bonded = atomBonds[index1]
                while len(bonded) > 4:
                    print("CONECT%5d%5d%5d%5d" % (index1, bonded[0], bonded[1], bonded[2]), file=self._file)
                    del bonded[:4]
                line = "CONECT%5d" % index1
                for index2 in bonded:
                    line = "%s%5d" % (line, index2)
                print(line, file=self._file)
        print("END", file=self._file)
        self._footer_written = True

    @classmethod
    def set_chain_names(cls, values):
        """Set the cycle of chain names used when writing PDB files

        When writing PDB files, PDBTrajectoryFile translates each chain's
        index into a name -- the name is what's written in the file. By
        default, chains are named with the letters A-Z.

        Parameters
        ----------
        values : list
            A list of chacters (strings of length 1) that the PDB writer will
            cycle through to choose chain names.
        """
        for item in values:
            if not isinstance(item, six.string_types) and len(item) == 1:
                raise TypeError('Names must be a single character string')
        cls._chain_names = values

    @property
    def positions(self):
        """The cartesian coordinates of all of the atoms in each frame. Available when a file is opened in mode='r'
        """
        return self._positions

    @property
    def topology(self):
        """The topology from this PDB file. Available when a file is opened in mode='r'
        """
        return self._topology

    @property
    def unitcell_lengths(self):
        "The unitcell lengths (3-tuple) in this PDB file. May be None"
        return self._unitcell_lengths

    @property
    def unitcell_angles(self):
        "The unitcell angles (3-tuple) in this PDB file. May be None"
        return self._unitcell_angles

    @property
    def closed(self):
        "Whether the file is closed"
        return not self._open

    def close(self):
        "Close the PDB file"
        if self._mode == 'w' and not self._footer_written:
            self._write_footer()
        if self._open:
            self._file.close()
        self._open = False

    def _read_models(self):
        if not self._mode == 'r':
            raise ValueError('file not opened for reading')

        self._topology = Topology()

        pdb = PdbStructure(self._file, load_all_models=True)

        atomByNumber = {}
        for chain in pdb.iter_chains():
            c = self._topology.add_chain()
            for residue in chain.iter_residues():
                resName = residue.get_name()
                if resName in PDBTrajectoryFile._residueNameReplacements:
                    resName = PDBTrajectoryFile._residueNameReplacements[resName]
                r = self._topology.add_residue(resName, c, residue.number)
                if resName in PDBTrajectoryFile._atomNameReplacements:
                    atomReplacements = PDBTrajectoryFile._atomNameReplacements[resName]
                else:
                    atomReplacements = {}
                for atom in residue.atoms:
                    atomName = atom.get_name()
                    if atomName in atomReplacements:
                        atomName = atomReplacements[atomName]
                    atomName = atomName.strip()
                    element = atom.element
                    if element is None:
                        element = self._guess_element(atomName, residue)

                    newAtom = self._topology.add_atom(atomName, element, r, serial=atom.serial_number)
                    atomByNumber[atom.serial_number] = newAtom

        # load all of the positions (from every model)
        _positions = []
        for model in pdb.iter_models(use_all_models=True):
            coords = []
            for chain in model.iter_chains():
                for residue in chain.iter_residues():
                    for atom in residue.atoms:
                        coords.append(atom.get_position())
            _positions.append(coords)

        if not all(len(f) == len(_positions[0]) for f in _positions):
            raise ValueError('PDB Error: All MODELs must contain the same number of ATOMs')

        self._positions = np.array(_positions)

        ## The atom positions read from the PDB file
        self._unitcell_lengths = pdb.get_unit_cell_lengths()
        self._unitcell_angles = pdb.get_unit_cell_angles()
        self._topology.create_standard_bonds()
        self._topology.create_disulfide_bonds(self.positions[0])

        # Add bonds based on CONECT records.
        connectBonds = []
        for connect in pdb.models[0].connects:
            i = connect[0]
            for j in connect[1:]:
                if i in atomByNumber and j in atomByNumber:
                    connectBonds.append((atomByNumber[i], atomByNumber[j]))
        if len(connectBonds) > 0:
            # Only add bonds that don't already exist.
            existingBonds = set(self._topology.bonds)
            for bond in connectBonds:
                if bond not in existingBonds and (bond[1], bond[0]) not in existingBonds:
                    self._topology.add_bond(bond[0], bond[1])
                    existingBonds.add(bond)

    @staticmethod
    def _loadNameReplacementTables():
        """Load the list of atom and residue name replacements."""
        if len(PDBTrajectoryFile._residueNameReplacements) == 0:
            tree = etree.parse(os.path.join(os.path.dirname(__file__), 'data', 'pdbNames.xml'))
            allResidues = {}
            proteinResidues = {}
            nucleicAcidResidues = {}
            for residue in tree.getroot().findall('Residue'):
                name = residue.attrib['name']
                if name == 'All':
                    PDBTrajectoryFile._parseResidueAtoms(residue, allResidues)
                elif name == 'Protein':
                    PDBTrajectoryFile._parseResidueAtoms(residue, proteinResidues)
                elif name == 'Nucleic':
                    PDBTrajectoryFile._parseResidueAtoms(residue, nucleicAcidResidues)
            for atom in allResidues:
                proteinResidues[atom] = allResidues[atom]
                nucleicAcidResidues[atom] = allResidues[atom]
            for residue in tree.getroot().findall('Residue'):
                name = residue.attrib['name']
                for id in residue.attrib:
                    if id == 'name' or id.startswith('alt'):
                        PDBTrajectoryFile._residueNameReplacements[residue.attrib[id]] = name
                if 'type' not in residue.attrib:
                    atoms = copy(allResidues)
                elif residue.attrib['type'] == 'Protein':
                    atoms = copy(proteinResidues)
                elif residue.attrib['type'] == 'Nucleic':
                    atoms = copy(nucleicAcidResidues)
                else:
                    atoms = copy(allResidues)
                PDBTrajectoryFile._parseResidueAtoms(residue, atoms)
                PDBTrajectoryFile._atomNameReplacements[name] = atoms

    def _guess_element(self, atom_name, residue):
        "Try to guess the element name"

        upper = atom_name.upper()
        if upper.startswith('CL'):
            element = elem.chlorine
        elif upper.startswith('NA'):
            element = elem.sodium
        elif upper.startswith('MG'):
            element = elem.magnesium
        elif upper.startswith('BE'):
            element = elem.beryllium
        elif upper.startswith('LI'):
            element = elem.lithium
        elif upper.startswith('K'):
            element = elem.potassium
        elif upper.startswith('ZN'):
            element = elem.zinc
        elif len(residue) == 1 and upper.startswith('CA'):
            element = elem.calcium

        # TJL has edited this. There are a few issues here. First,
        # parsing for the element is non-trivial, so I do my best
        # below. Second, there is additional parsing code in
        # pdbstructure.py, and I am unsure why it doesn't get used
        # here...
        elif len(residue) > 1 and upper.startswith('CE'):
            element = elem.carbon  # (probably) not Celenium...
        elif len(residue) > 1 and upper.startswith('CD'):
            element = elem.carbon  # (probably) not Cadmium...
        elif residue.name in ['TRP', 'ARG', 'GLN', 'HIS'] and upper.startswith('NE'):
            element = elem.nitrogen  # (probably) not Neon...
        elif residue.name in ['ASN'] and upper.startswith('ND'):
            element = elem.nitrogen  # (probably) not ND...
        elif residue.name == 'CYS' and upper.startswith('SG'):
            element = elem.sulfur  # (probably) not SG...
        else:
            try:
                element = elem.get_by_symbol(atom_name[0])
            except KeyError:
                try:
                    symbol = atom_name[0:2].strip().rstrip("AB0123456789").lstrip("0123456789")
                    element = elem.get_by_symbol(symbol)
                except KeyError:
                    element = None

        return element

    @staticmethod
    def _parseResidueAtoms(residue, map):
        for atom in residue.findall('Atom'):
            name = atom.attrib['name']
            for id in atom.attrib:
                map[atom.attrib[id]] = name

    def __del__(self):
        self.close()

    def __enter__(self):
        return self

    def __exit__(self, *exc_info):
        self.close()

    def __len__(self):
        "Number of frames in the file"
        if str(self._mode) != 'r':
            raise NotImplementedError('len() only available in mode="r" currently')
        if not self._open:
            raise ValueError('I/O operation on closed file')
        return len(self._positions)
Beispiel #2
0
class PDBTrajectoryFile(object):
    """Interface for reading and writing Protein Data Bank (PDB) files

    Parameters
    ----------
    filename : str
        The filename to open. A path to a file on disk.
    mode : {'r', 'w'}
        The mode in which to open the file, either 'r' for read or 'w' for write.
    force_overwrite : bool
        If opened in write mode, and a file by the name of `filename` already
        exists on disk, should we overwrite it?

    Attributes
    ----------
    positions : np.ndarray, shape=(n_frames, n_atoms, 3)
    topology : mdtraj.Topology
    closed : bool

    Notes
    -----
    When writing pdb files, mdtraj follows the PDB3.0 standard as closely as
    possible. During *reading* however, we try to be more lenient. For instance,
    we will parse common nonstandard atom names during reading, and convert them
    into the standard names. The replacement table used by mdtraj is at
    {mdtraj_source}/formats/pdb/data/pdbNames.xml.

    See Also
    --------
    mdtraj.load_pdb : High-level wrapper that returns a ``md.Trajectory``
    """
    distance_unit = 'angstroms'
    _residueNameReplacements = {}
    _atomNameReplacements = {}
    _chain_names = [chr(ord('A') + i) for i in range(26)]

    def __init__(self, filename, mode='r', force_overwrite=True):
        self._open = False
        self._file = None
        self._topology = None
        self._positions = None
        self._mode = mode
        self._last_topology = None

        if mode == 'r':
            PDBTrajectoryFile._loadNameReplacementTables()

            if _is_url(filename):
                self._file = urlopen(filename)
                if filename.lower().endswith('.gz'):
                    if six.PY3:
                        self._file = gzip.GzipFile(fileobj=self._file)
                    else:
                        self._file = gzip.GzipFile(
                            fileobj=six.StringIO(self._file.read()))
                if six.PY3:
                    self._file = six.StringIO(
                        self._file.read().decode('utf-8'))
            else:
                if filename.lower().endswith('.gz'):
                    self._file = gzip.open(filename, 'r')
                    self._file = six.StringIO(
                        self._file.read().decode('utf-8'))
                else:
                    self._file = open(filename, 'r')

            self._read_models()
        elif mode == 'w':
            self._header_written = False
            self._footer_written = False
            if os.path.exists(filename) and not force_overwrite:
                raise IOError('"%s" already exists' % filename)
            self._file = open(filename, 'w')
        else:
            raise ValueError("invalid mode: %s" % mode)

        self._open = True

    def write(self,
              positions,
              topology,
              modelIndex=None,
              unitcell_lengths=None,
              unitcell_angles=None,
              bfactors=None):
        """Write a PDB file to disk

        Parameters
        ----------
        positions : array_like
            The list of atomic positions to write.
        topology : mdtraj.Topology
            The Topology defining the model to write.
        modelIndex : {int, None}
            If not None, the model will be surrounded by MODEL/ENDMDL records
            with this index
        unitcell_lengths : {tuple, None}
            Lengths of the three unit cell vectors, or None for a non-periodic system
        unitcell_angles : {tuple, None}
            Angles between the three unit cell vectors, or None for a non-periodic system
        bfactors : array_like, default=None, shape=(n_atoms,)
            Save bfactors with pdb file. Should contain a single number for
            each atom in the topology
        """
        if not self._mode == 'w':
            raise ValueError('file not opened for writing')
        if not self._header_written:
            self._write_header(unitcell_lengths, unitcell_angles)
            self._header_written = True

        if ilen(topology.atoms) != len(positions):
            raise ValueError(
                'The number of positions must match the number of atoms')
        if np.any(np.isnan(positions)):
            raise ValueError('Particle position is NaN')
        if np.any(np.isinf(positions)):
            raise ValueError('Particle position is infinite')

        self._last_topology = topology  # Hack to save the topology of the last frame written, allows us to output CONECT entries in write_footer()

        if bfactors is None:
            bfactors = ['{0:5.2f}'.format(0.0)] * len(positions)
        else:
            if (np.max(bfactors) >= 100) or (np.min(bfactors) <= -10):
                raise ValueError("bfactors must be in (-10, 100)")

            bfactors = ['{0:5.2f}'.format(b) for b in bfactors]

        atomIndex = 1
        posIndex = 0
        if modelIndex is not None:
            print("MODEL     %4d" % modelIndex, file=self._file)
        for (chainIndex, chain) in enumerate(topology.chains):
            chainName = self._chain_names[chainIndex % len(self._chain_names)]
            residues = list(chain.residues)
            for (resIndex, res) in enumerate(residues):
                if len(res.name) > 3:
                    resName = res.name[:3]
                else:
                    resName = res.name
                for atom in res.atoms:
                    if len(atom.name) < 4 and atom.name[:1].isalpha() and (
                            atom.element is None
                            or len(atom.element.symbol) < 2):
                        atomName = ' ' + atom.name
                    elif len(atom.name) > 4:
                        atomName = atom.name[:4]
                    else:
                        atomName = atom.name
                    coords = positions[posIndex]
                    if atom.element is not None:
                        symbol = atom.element.symbol
                    else:
                        symbol = ' '
                    line = "ATOM  %5d %-4s %3s %s%4d    %s%s%s  1.00 %s          %2s  " % (
                        atomIndex % 100000, atomName, resName, chainName,
                        (res.resSeq) % 10000, _format_83(coords[0]),
                        _format_83(coords[1]), _format_83(
                            coords[2]), bfactors[posIndex], symbol)
                    assert len(line) == 80, 'Fixed width overflow detected'
                    print(line, file=self._file)
                    posIndex += 1
                    atomIndex += 1
                if resIndex == len(residues) - 1:
                    print("TER   %5d      %3s %s%4d" %
                          (atomIndex, resName, chainName, res.resSeq),
                          file=self._file)
                    atomIndex += 1

        if modelIndex is not None:
            print("ENDMDL", file=self._file)

    def _write_header(self,
                      unitcell_lengths,
                      unitcell_angles,
                      write_metadata=True):
        """Write out the header for a PDB file.

        Parameters
        ----------
        unitcell_lengths : {tuple, None}
            The lengths of the three unitcell vectors, ``a``, ``b``, ``c``
        unitcell_angles : {tuple, None}
            The angles between the three unitcell vectors, ``alpha``,
            ``beta``, ``gamma``
        """
        if not self._mode == 'w':
            raise ValueError('file not opened for writing')

        if unitcell_lengths is None and unitcell_angles is None:
            return
        if unitcell_lengths is not None and unitcell_angles is not None:
            if not len(unitcell_lengths) == 3:
                raise ValueError('unitcell_lengths must be length 3')
            if not len(unitcell_angles) == 3:
                raise ValueError('unitcell_angles must be length 3')
        else:
            raise ValueError('either unitcell_lengths and unitcell_angles'
                             'should both be spefied, or neither')

        box = list(unitcell_lengths) + list(unitcell_angles)
        assert len(box) == 6

        if write_metadata:
            print("REMARK   1 CREATED WITH MDTraj %s, %s" %
                  (version.version, str(date.today())),
                  file=self._file)
        print("CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1           1 " %
              tuple(box),
              file=self._file)

    def _write_footer(self):
        if not self._mode == 'w':
            raise ValueError('file not opened for writing')

        # Identify bonds that should be listed as CONECT records.
        standardResidues = [
            'ALA', 'ASN', 'CYS', 'GLU', 'HIS', 'LEU', 'MET', 'PRO', 'THR',
            'TYR', 'ARG', 'ASP', 'GLN', 'GLY', 'ILE', 'LYS', 'PHE', 'SER',
            'TRP', 'VAL', 'A', 'G', 'C', 'U', 'I', 'DA', 'DG', 'DC', 'DT',
            'DI', 'HOH'
        ]
        conectBonds = []
        if self._last_topology is not None:
            for atom1, atom2 in self._last_topology.bonds:
                if atom1.residue.name not in standardResidues or atom2.residue.name not in standardResidues:
                    conectBonds.append((atom1, atom2))
                elif atom1.name == 'SG' and atom2.name == 'SG' and atom1.residue.name == 'CYS' and atom2.residue.name == 'CYS':
                    conectBonds.append((atom1, atom2))
        if len(conectBonds) > 0:

            # Work out the index used in the PDB file for each atom.

            atomIndex = {}
            nextAtomIndex = 0
            prevChain = None
            for chain in self._last_topology.chains:
                for atom in chain.atoms:
                    if atom.residue.chain != prevChain:
                        nextAtomIndex += 1
                        prevChain = atom.residue.chain
                    atomIndex[atom] = nextAtomIndex
                    nextAtomIndex += 1

            # Record which other atoms each atom is bonded to.

            atomBonds = {}
            for atom1, atom2 in conectBonds:
                index1 = atomIndex[atom1]
                index2 = atomIndex[atom2]
                if index1 not in atomBonds:
                    atomBonds[index1] = []
                if index2 not in atomBonds:
                    atomBonds[index2] = []
                atomBonds[index1].append(index2)
                atomBonds[index2].append(index1)

            # Write the CONECT records.

            for index1 in sorted(atomBonds):
                bonded = atomBonds[index1]
                while len(bonded) > 4:
                    print("CONECT%5d%5d%5d%5d" %
                          (index1, bonded[0], bonded[1], bonded[2]),
                          file=self._file)
                    del bonded[:4]
                line = "CONECT%5d" % index1
                for index2 in bonded:
                    line = "%s%5d" % (line, index2)
                print(line, file=self._file)
        print("END", file=self._file)
        self._footer_written = True

    @classmethod
    def set_chain_names(cls, values):
        """Set the cycle of chain names used when writing PDB files

        When writing PDB files, PDBTrajectoryFile translates each chain's
        index into a name -- the name is what's written in the file. By
        default, chains are named with the letters A-Z.

        Parameters
        ----------
        values : list
            A list of chacters (strings of length 1) that the PDB writer will
            cycle through to choose chain names.
        """
        for item in values:
            if not isinstance(item, six.string_types) and len(item) == 1:
                raise TypeError('Names must be a single character string')
        cls._chain_names = values

    @property
    def positions(self):
        """The cartesian coordinates of all of the atoms in each frame. Available when a file is opened in mode='r'
        """
        return self._positions

    @property
    def topology(self):
        """The topology from this PDB file. Available when a file is opened in mode='r'
        """
        return self._topology

    @property
    def unitcell_lengths(self):
        "The unitcell lengths (3-tuple) in this PDB file. May be None"
        return self._unitcell_lengths

    @property
    def unitcell_angles(self):
        "The unitcell angles (3-tuple) in this PDB file. May be None"
        return self._unitcell_angles

    @property
    def closed(self):
        "Whether the file is closed"
        return not self._open

    def close(self):
        "Close the PDB file"
        if self._mode == 'w' and not self._footer_written:
            self._write_footer()
        if self._open:
            self._file.close()
        self._open = False

    def _read_models(self):
        if not self._mode == 'r':
            raise ValueError('file not opened for reading')

        self._topology = Topology()

        pdb = PdbStructure(self._file, load_all_models=True)

        atomByNumber = {}
        for chain in pdb.iter_chains():
            c = self._topology.add_chain()
            for residue in chain.iter_residues():
                resName = residue.get_name()
                if resName in PDBTrajectoryFile._residueNameReplacements:
                    resName = PDBTrajectoryFile._residueNameReplacements[
                        resName]
                r = self._topology.add_residue(resName, c, residue.number)
                if resName in PDBTrajectoryFile._atomNameReplacements:
                    atomReplacements = PDBTrajectoryFile._atomNameReplacements[
                        resName]
                else:
                    atomReplacements = {}
                for atom in residue.atoms:
                    atomName = atom.get_name()
                    if atomName in atomReplacements:
                        atomName = atomReplacements[atomName]
                    atomName = atomName.strip()
                    element = atom.element
                    if element is None:
                        element = self._guess_element(atomName, residue)

                    newAtom = self._topology.add_atom(
                        atomName, element, r, serial=atom.serial_number)
                    atomByNumber[atom.serial_number] = newAtom

        # load all of the positions (from every model)
        _positions = []
        for model in pdb.iter_models(use_all_models=True):
            coords = []
            for chain in model.iter_chains():
                for residue in chain.iter_residues():
                    for atom in residue.atoms:
                        coords.append(atom.get_position())
            _positions.append(coords)

        if not all(len(f) == len(_positions[0]) for f in _positions):
            raise ValueError(
                'PDB Error: All MODELs must contain the same number of ATOMs')

        self._positions = np.array(_positions)

        ## The atom positions read from the PDB file
        self._unitcell_lengths = pdb.get_unit_cell_lengths()
        self._unitcell_angles = pdb.get_unit_cell_angles()
        self._topology.create_standard_bonds()
        self._topology.create_disulfide_bonds(self.positions[0])

        # Add bonds based on CONECT records.
        connectBonds = []
        for connect in pdb.models[0].connects:
            i = connect[0]
            for j in connect[1:]:
                if i in atomByNumber and j in atomByNumber:
                    connectBonds.append((atomByNumber[i], atomByNumber[j]))
        if len(connectBonds) > 0:
            # Only add bonds that don't already exist.
            existingBonds = set(self._topology.bonds)
            for bond in connectBonds:
                if bond not in existingBonds and (
                        bond[1], bond[0]) not in existingBonds:
                    self._topology.add_bond(bond[0], bond[1])
                    existingBonds.add(bond)

    @staticmethod
    def _loadNameReplacementTables():
        """Load the list of atom and residue name replacements."""
        if len(PDBTrajectoryFile._residueNameReplacements) == 0:
            tree = etree.parse(
                os.path.join(os.path.dirname(__file__), 'data',
                             'pdbNames.xml'))
            allResidues = {}
            proteinResidues = {}
            nucleicAcidResidues = {}
            for residue in tree.getroot().findall('Residue'):
                name = residue.attrib['name']
                if name == 'All':
                    PDBTrajectoryFile._parseResidueAtoms(residue, allResidues)
                elif name == 'Protein':
                    PDBTrajectoryFile._parseResidueAtoms(
                        residue, proteinResidues)
                elif name == 'Nucleic':
                    PDBTrajectoryFile._parseResidueAtoms(
                        residue, nucleicAcidResidues)
            for atom in allResidues:
                proteinResidues[atom] = allResidues[atom]
                nucleicAcidResidues[atom] = allResidues[atom]
            for residue in tree.getroot().findall('Residue'):
                name = residue.attrib['name']
                for id in residue.attrib:
                    if id == 'name' or id.startswith('alt'):
                        PDBTrajectoryFile._residueNameReplacements[
                            residue.attrib[id]] = name
                if 'type' not in residue.attrib:
                    atoms = copy(allResidues)
                elif residue.attrib['type'] == 'Protein':
                    atoms = copy(proteinResidues)
                elif residue.attrib['type'] == 'Nucleic':
                    atoms = copy(nucleicAcidResidues)
                else:
                    atoms = copy(allResidues)
                PDBTrajectoryFile._parseResidueAtoms(residue, atoms)
                PDBTrajectoryFile._atomNameReplacements[name] = atoms

    def _guess_element(self, atom_name, residue):
        "Try to guess the element name"

        upper = atom_name.upper()
        if upper.startswith('CL'):
            element = elem.chlorine
        elif upper.startswith('NA'):
            element = elem.sodium
        elif upper.startswith('MG'):
            element = elem.magnesium
        elif upper.startswith('BE'):
            element = elem.beryllium
        elif upper.startswith('LI'):
            element = elem.lithium
        elif upper.startswith('K'):
            element = elem.potassium
        elif upper.startswith('ZN'):
            element = elem.zinc
        elif len(residue) == 1 and upper.startswith('CA'):
            element = elem.calcium

        # TJL has edited this. There are a few issues here. First,
        # parsing for the element is non-trivial, so I do my best
        # below. Second, there is additional parsing code in
        # pdbstructure.py, and I am unsure why it doesn't get used
        # here...
        elif len(residue) > 1 and upper.startswith('CE'):
            element = elem.carbon  # (probably) not Celenium...
        elif len(residue) > 1 and upper.startswith('CD'):
            element = elem.carbon  # (probably) not Cadmium...
        elif residue.name in ['TRP', 'ARG', 'GLN', 'HIS'
                              ] and upper.startswith('NE'):
            element = elem.nitrogen  # (probably) not Neon...
        elif residue.name in ['ASN'] and upper.startswith('ND'):
            element = elem.nitrogen  # (probably) not ND...
        elif residue.name == 'CYS' and upper.startswith('SG'):
            element = elem.sulfur  # (probably) not SG...
        else:
            try:
                element = elem.get_by_symbol(atom_name[0])
            except KeyError:
                try:
                    symbol = atom_name[0:2].strip().rstrip(
                        "AB0123456789").lstrip("0123456789")
                    element = elem.get_by_symbol(symbol)
                except KeyError:
                    element = None

        return element

    @staticmethod
    def _parseResidueAtoms(residue, map):
        for atom in residue.findall('Atom'):
            name = atom.attrib['name']
            for id in atom.attrib:
                map[atom.attrib[id]] = name

    def __del__(self):
        self.close()

    def __enter__(self):
        return self

    def __exit__(self, *exc_info):
        self.close()

    def __len__(self):
        "Number of frames in the file"
        if str(self._mode) != 'r':
            raise NotImplementedError(
                'len() only available in mode="r" currently')
        if not self._open:
            raise ValueError('I/O operation on closed file')
        return len(self._positions)