Beispiel #1
0
def load_gsd_topology(filename, frame=0):
    """ Create an MDTraj.Topology from a GSD file 
    
    Parameters
    ----------
    filename : path-like
        Path of GSD trajectory file.
    frame : int, 0 
        Frame of GSD file to parse topology

    Returns
    -------
    top : mdtraj.Topology

    Notes
    -----
    GSD files support systems with variable topologies.
    For compatibility with MDTraj, only the topology from GSD frame 0 is
    used to construct the MDTraj topology.
    """
    import gsd.hoomd
    with gsd.hoomd.open(filename, 'rb') as gsdfile:
        top = Topology()
        generic_chain = top.add_chain()
        generic_residue = top.add_residue('A', generic_chain)
        all_particle_types = gsdfile[frame].particles.types
        for particle_type_id in gsdfile[frame].particles.typeid:
            top.add_atom(all_particle_types[particle_type_id], virtual_site,
                         generic_residue)

        for bond in gsdfile[frame].bonds.group:
            atom1, atom2 = bond[0], bond[1]
            top.add_bond(top.atom(atom1), top.atom(atom2))

    return top
Beispiel #2
0
    def __init__(self, topology):
        self._ref_topology = topology.copy()

        self._N_coeff = np.array([0.48318, 0.70328, -0.18643])  # CA_i-1, CA_i, O_i-1
        self._C_coeff = np.array([0.44365, 0.23520, 0.32115])   # CA_i, CA_i+1, O_i
        self._H_coeff = np.array([0.84100, 0.89296, -0.73389])  # CA_i-1, CA_i, O_i-1

        newTopology = Topology()
        CACBO_idxs = []; N_idxs = []; C_idxs = []; H_idxs = []
        res_idx = 1
        atm_idx = 0
        prev_ca = None
        prev_o = None
        for chain in topology._chains:
            newChain = newTopology.add_chain()
            for residue in chain._residues:
                newTopology, atm_idx, res_idx = self._add_residue(newTopology, 
                        newChain, residue, chain, res_idx, atm_idx, 
                        N_idxs, C_idxs, H_idxs, CACBO_idxs, prev_ca, prev_o)

        self._CACBO_idxs = np.array(CACBO_idxs)
        self._N_idxs = np.array(N_idxs)
        self._C_idxs = np.array(C_idxs)
        self._H_idxs = np.array(H_idxs)
        self.topology = newTopology
Beispiel #3
0
    def __init__(self, topology):
        self._ref_topology = topology.copy()

        # Used for recentering sidechain atom.
        self._N_CB_dist = 0.2470955 # distance from N_i to CB_i
        self._CA_CB_dist = 0.1533931 # distance from CA_i to CB_i
        self._C_CB_dist = 0.2510052 # distance from C_i to CB_i

        self._N_coeff = np.array([0.48318, 0.70328, -0.18643])  # CA_i-1, CA_i, O_i-1
        self._C_coeff = np.array([0.44365, 0.23520, 0.32115])   # CA_i, CA_i+1, O_i
        self._H_coeff = np.array([0.84100, 0.89296, -0.73389])  # CA_i-1, CA_i, O_i-1

        # Used for positioning HB's for the moment.
        self._HB_w = 3

        self._disulfides = []

        # Build new topology
        newTopology = Topology()
        CACBO_idxs = []
        HB_idxs = []
        res_charges = []
        res_idx = 1
        atm_idx = 0
        for chain in topology._chains:
            newChain = newTopology.add_chain()
            for residue in chain._residues:
                newTopology, atm_idx, res_idx = self._add_residue(newTopology,
                        newChain, residue, res_charges, res_idx, atm_idx,
                        CACBO_idxs, HB_idxs)

        self._charged_residues = res_charges
        self._HB_idxs = np.array(HB_idxs)
        self._CACBO_idxs = np.array(CACBO_idxs)
        self.topology = newTopology
Beispiel #4
0
    def _initialize(self, simulation):
        """Deferred initialization of the reporter, which happens before
        processing the first report.

        At the time that the first report is processed, we now have access
        to the simulation object, which we don't have at the point when the
        reporter is instantiated

        Parameters
        ----------
        simulation : simtk.openmm.app.Simulation
            The Simulation to generate a report for
        """
        if self._atomSubset is not None:
            if not min(self._atomSubset) >= 0:
                raise ValueError('atomSubset must be zero indexed. '
                                 'the smallest allowable value is zero')
            if not max(self._atomSubset) < simulation.system.getNumParticles():
                raise ValueError(
                    'atomSubset must be zero indexed. '
                    'the largest value must be less than the number '
                    'of particles in the system')
            if not all(a == int(a) for a in self._atomSubset):
                raise ValueError(
                    'all of the indices in atomSubset must be integers')

            self._atomSlice = self._atomSubset
            if hasattr(self._traj_file, 'topology'):
                self._traj_file.topology = _topology_from_subset(
                    Topology.from_openmm(simulation.topology),
                    self._atomSubset)
        else:
            self._atomSlice = slice(None)
            if hasattr(self._traj_file, 'topology'):
                self._traj_file.topology = Topology.from_openmm(
                    simulation.topology)

        system = simulation.system
        if self._temperature:
            # Compute the number of degrees of freedom.
            dof = 0
            for i in range(system.getNumParticles()):
                if system.getParticleMass(i) > 0 * units.dalton:
                    dof += 3
            dof -= system.getNumConstraints()
            if any(
                    type(system.getForce(i)) == mm.CMMotionRemover
                    for i in range(system.getNumForces())):
                dof -= 3
            self._dof = dof

        if simulation.topology.getUnitCellDimensions() is None:
            self._cell = False
Beispiel #5
0
    def _initialize(self, simulation):
        """Deferred initialization of the reporter, which happens before
        processing the first report.

        At the time that the first report is processed, we now have access
        to the simulation object, which we don't have at the point when the
        reporter is instantiated

        Parameters
        ----------
        simulation : simtk.openmm.app.Simulation
            The Simulation to generate a report for
        """
        if self._atomSubset is not None:
            if not min(self._atomSubset) >= 0:
                raise ValueError('atomSubset must be zero indexed. '
                                 'the smallest allowable value is zero')
            if not max(self._atomSubset) < simulation.system.getNumParticles():
                raise ValueError('atomSubset must be zero indexed. '
                                 'the largest value must be less than the number '
                                 'of particles in the system')
            if not all(a==int(a) for a in self._atomSubset):
                raise ValueError('all of the indices in atomSubset must be integers')

            self._atomSlice = self._atomSubset
            if hasattr(self._traj_file, 'topology'):
                self._traj_file.topology = _topology_from_subset(
                    Topology.from_openmm(simulation.topology), self._atomSubset)
        else:
            self._atomSlice = slice(None)
            if hasattr(self._traj_file, 'topology'):
                self._traj_file.topology = Topology.from_openmm(simulation.topology)


        system = simulation.system
        if self._temperature:
            # Compute the number of degrees of freedom.
            dof = 0
            for i in range(system.getNumParticles()):
                if system.getParticleMass(i) > 0*units.dalton:
                    dof += 3
            dof -= system.getNumConstraints()
            if any(type(system.getForce(i)) == mm.CMMotionRemover for i in range(system.getNumForces())):
                dof -= 3
            self._dof = dof

        if simulation.topology.getUnitCellDimensions() is None:
            self._cell = False
Beispiel #6
0
def to_mdtraj_Topology(item, atom_indices='all', check=True):

    if check:

        try:
            is_openmm_Topology(item)
        except:
            raise WrongFormError('openmm.Topology')

        try:
            atom_indices = digest_atom_indices(atom_indices)
        except:
            raise WrongAtomIndicesError()

    try:
        from mdtraj.core.topology import Topology as mdtraj_Topology
    except:
        raise LibraryNotFoundError('MDTraj')

    from . import extract

    tmp_item = extract(item,
                       atom_indices=atom_indices,
                       copy_if_all=False,
                       check=False)
    tmp_item = mdtraj_Topology.from_openmm(tmp_item)

    return tmp_item
Beispiel #7
0
def load_mol2(filename):
    """Load a TRIPOS mol2 file from disk.

    Parameters
    ----------
    filename : str
        Path to the prmtop file on disk.

    Returns
    -------
    traj : md.Trajectory
        The resulting topology, as an md.Topology object.

    Notes
    -----
    This function should work on GAFF and sybyl style MOL2 files, but has
    been primarily tested on GAFF mol2 files.
    This function does NOT accept multi-structure MOL2 files!!!
    The elements are guessed using GAFF atom types or via the atype string.

    Examples
    --------
    >>> traj = md.load_mol2('mysystem.mol2')
    """
    from mdtraj.core.trajectory import Trajectory
    from mdtraj.core.topology import Topology

    atoms, bonds = mol2_to_dataframes(filename)

    atoms_mdtraj = atoms[["name", "resName"]].copy()
    atoms_mdtraj["serial"] = atoms.index

    #Figure out 1 letter element names

    # IF this is a GAFF mol2, this line should work without issues
    atoms_mdtraj["element"] = atoms.atype.map(gaff_elements)
    # If this is a sybyl mol2, there should be NAN (null) values
    if atoms_mdtraj.element.isnull().any():
        # If this is a sybyl mol2, I think this works generally.
        atoms_mdtraj["element"] = atoms.atype.apply(lambda x: x.strip(".")[0])

    atoms_mdtraj["resSeq"] = np.ones(len(atoms), 'int')
    atoms_mdtraj["chainID"] = np.ones(len(atoms), 'int')

    if bonds is not None:
        bonds_mdtraj = bonds[["id0", "id1"]].values
        offset = bonds_mdtraj.min()  # Should this just be 1???
        bonds_mdtraj -= offset
    else:
        bonds_mdtraj = None

    top = Topology.from_dataframe(atoms_mdtraj, bonds_mdtraj)

    xyzlist = np.array([atoms[["x", "y", "z"]].values])
    xyzlist /= 10.0  # Convert from angstrom to nanometer

    traj = Trajectory(xyzlist, top)

    return traj
Beispiel #8
0
def load_mol2(filename):
    """Load a TRIPOS mol2 file from disk.

    Parameters
    ----------
    filename : str
        Path to the prmtop file on disk.

    Returns
    -------
    traj : md.Trajectory
        The resulting topology, as an md.Topology object.

    Notes
    -----
    This function should work on GAFF and sybyl style MOL2 files, but has
    been primarily tested on GAFF mol2 files.
    This function does NOT accept multi-structure MOL2 files!!!
    The elements are guessed using GAFF atom types or via the atype string.

    Examples
    --------
    >>> traj = md.load_mol2('mysystem.mol2')
    """
    from mdtraj.core.trajectory import Trajectory
    from mdtraj.core.topology import Topology

    atoms, bonds = mol2_to_dataframes(filename)

    atoms_mdtraj = atoms[["name", "resName"]].copy()
    atoms_mdtraj["serial"] = atoms.index

    #Figure out 1 letter element names

    # IF this is a GAFF mol2, this line should work without issues
    atoms_mdtraj["element"] = atoms.atype.map(gaff_elements)
    # If this is a sybyl mol2, there should be NAN (null) values
    if atoms_mdtraj.element.isnull().any():
        # If this is a sybyl mol2, I think this works generally.
        atoms_mdtraj["element"] = atoms.atype.apply(lambda x: x.strip(".")[0])

    atoms_mdtraj["resSeq"] = np.ones(len(atoms), 'int')
    atoms_mdtraj["chainID"] = np.ones(len(atoms), 'int')

    if bonds is not None:
        bonds_mdtraj = bonds[["id0", "id1"]].values
        offset = bonds_mdtraj.min()  # Should this just be 1???
        bonds_mdtraj -= offset
    else:
        bonds_mdtraj = None

    top = Topology.from_dataframe(atoms_mdtraj, bonds_mdtraj)

    xyzlist = np.array([atoms[["x", "y", "z"]].values])
    xyzlist /= 10.0  # Convert from angstrom to nanometer

    traj = Trajectory(xyzlist, top)

    return traj
Beispiel #9
0
def create_water_topology_on_disc(n):
    topfile = tempfile.mktemp('.pdb')
    top = Topology()
    chain = top.add_chain()

    for i in range(n):
        res = top.add_residue('r%i' % i, chain)
        h1 = top.add_atom('H', hydrogen, res)
        o = top.add_atom('O', oxygen, res)
        h2 = top.add_atom('H', hydrogen, res)
        top.add_bond(h1, o)
        top.add_bond(h2, o)

    xyz = np.zeros((n * 3, 3))
    Trajectory(xyz, top).save_pdb(topfile)
    return topfile
Beispiel #10
0
    def topology(self):
        """Get the topology out from the file

        Returns
        -------
        topology : mdtraj.Topology
            A topology object
        """
        try:
            raw = self._get_node(self._handle.root, name='topology')[0]
            if not isinstance(raw, string_types):
                raw = raw.decode()
            topology_dict = json.loads(raw)
        except self.tables.NoSuchNodeError:
            return None

        topology = Topology()

        for chain_dict in sorted(topology_dict['chains'],
                                 key=operator.itemgetter('index')):
            chain = topology.add_chain()
            for residue_dict in sorted(chain_dict['residues'],
                                       key=operator.itemgetter('index')):
                try:
                    resSeq = residue_dict["resSeq"]
                except KeyError:
                    resSeq = None
                    warnings.warn(
                        'No resSeq information found in HDF file, defaulting to zero-based indices'
                    )
                try:
                    segment_id = residue_dict["segmentID"]
                except KeyError:
                    segment_id = ""
                residue = topology.add_residue(residue_dict['name'],
                                               chain,
                                               resSeq=resSeq,
                                               segment_id=segment_id)
                for atom_dict in sorted(residue_dict['atoms'],
                                        key=operator.itemgetter('index')):
                    try:
                        element = elem.get_by_symbol(atom_dict['element'])
                    except KeyError:
                        element = elem.virtual
                    topology.add_atom(atom_dict['name'], element, residue)

        atoms = list(topology.atoms)
        for index1, index2 in topology_dict['bonds']:
            topology.add_bond(atoms[index1], atoms[index2])

        return topology
Beispiel #11
0
def create_water_topology_on_disc(n):
    topfile = tempfile.mktemp('.pdb')
    top = Topology()
    chain = top.add_chain()

    for i in xrange(n):
        res = top.add_residue('r%i' % i, chain)
        h1 = top.add_atom('H', hydrogen, res)
        o = top.add_atom('O', oxygen, res)
        h2 = top.add_atom('H', hydrogen, res)
        top.add_bond(h1, o)
        top.add_bond(h2, o)

    xyz = np.zeros((n * 3, 3))
    Trajectory(xyz, top).save_pdb(topfile)
    return topfile
Beispiel #12
0
    def __init__(self, topology):
        r"""Calpha representation mapping

        Maps an all-atom representation to just the C-alpha's of the backbone.

        Holds default assignment of .

        Parameters
        ----------
        topology : mdtraj.Topology object

        """

        n_calphas = len(
            [atm.index for atm in topology.atoms if atm.name == "CA"])
        assert (
            n_calphas == topology.n_residues
        ), " number of C-alpha is not equal to number of residues! check for missing or non-standard amino acids."

        self._ref_topology = topology.copy()

        # Build new topology
        newTopology = Topology()
        prev_ca = None
        ca_idxs = []
        atm_idx = 0
        for chain in topology._chains:
            newChain = newTopology.add_chain()
            for residue in chain._residues:
                resSeq = getattr(residue, 'resSeq', None) or residue.index
                newResidue = newTopology.add_residue(residue.name, newChain,
                                                     resSeq)
                # map CA
                new_ca = newTopology.add_atom(
                    'CA',
                    md.core.element.get_by_symbol('C'),
                    newResidue,
                    serial=atm_idx)

                ca_idxs.append([[ atm.index for atm in residue.atoms if \
                            (atm.name == "CA") ][0], atm_idx ])
                if prev_ca is None:
                    prev_ca = new_ca
                else:
                    if prev_ca.residue.chain.index == new_ca.residue.chain.index:
                        # Only bond atoms in same chain
                        newTopology.add_bond(prev_ca, new_ca)
                    prev_ca = new_ca
                atm_idx += 1

        self._ca_idxs = np.array(ca_idxs)
        self.topology = newTopology
Beispiel #13
0
def to_mdtraj_Topology(item,
                       molecular_system=None,
                       atom_indices='all',
                       structure_indices='all'):

    from mdtraj.core.topology import Topology

    tmp_item, tmp_molecular_system = to_openmm_Topology(
        item,
        molecular_system=molecular_system,
        atom_indices=atom_indices,
        structure_indices=structure_indices)
    tmp_item = Topology.from_openmm(tmp_item)
    if tmp_molecular_system is not None:
        tmp_molecular_system = tmp_molecular_system.combine_with_items(
            tmp_item)

    return tmp_item, tmp_molecular_system
Beispiel #14
0
    def mutate(self, mut_res_idx, mut_new_resname):
        """Mutate residue

        Parameters 
        ----------
        mut_res_idx : int
            Index of residue to mutate.
        mut_new_resname : str
            Three-letter code of residue to mutate to.
        """

        assert (self.topology.residue(mut_res_idx).name != mut_new_resname), "mutating the residue to itself!"

        # Build new topology
        newTopology = Topology()
        for chain in self.topology.chains:
            newChain = newTopology.add_chain()
            for residue in chain._residues:
                res_idx = residue.index
                if res_idx == mut_res_idx:
                    # create mutated residue
                    self._add_mutated_residue(mut_new_resname, newTopology, newChain, res_idx, residue)
                else:
                    # copy old residue atoms directly
                    newResidue = newTopology.add_residue(residue.name, newChain, res_idx)
                    for atom in residue.atoms:
                        newTopology.add_atom(atom.name, 
                                    md.core.element.get_by_symbol(atom.element.symbol), 
                                    newResidue, serial=atom.index)

        # The bond connectivity should stay identical
        for atm1, atm2 in self.topology._bonds:
            new_atm1 = newTopology.atom(atm1.index)
            new_atm2 = newTopology.atom(atm2.index)
            newTopology.add_bond(new_atm1, new_atm2)

        self._prev_topology = self.topology.copy()
        self.topology = newTopology
Beispiel #15
0
    def topology(self):
        """Get the topology out from the file

        Returns
        -------
        topology : mdtraj.Topology
            A topology object
        """
        try:
            raw = self._get_node('/', name='topology')[0]
            if not isinstance(raw, string_types):
                raw = raw.decode()
            topology_dict = json.loads(raw)
        except self.tables.NoSuchNodeError:
            return None

        topology = Topology()

        for chain_dict in sorted(topology_dict['chains'], key=operator.itemgetter('index')):
            chain = topology.add_chain()
            for residue_dict in sorted(chain_dict['residues'], key=operator.itemgetter('index')):
                try:
                    resSeq = residue_dict["resSeq"]
                except KeyError:
                    resSeq = None
                    warnings.warn('No resSeq information found in HDF file, defaulting to zero-based indices')
                try:
                    segment_id = residue_dict["segmentID"]
                except KeyError:
                    segment_id = ""
                residue = topology.add_residue(residue_dict['name'], chain, resSeq=resSeq, segment_id=segment_id)
                for atom_dict in sorted(residue_dict['atoms'], key=operator.itemgetter('index')):
                    try:
                        element = elem.get_by_symbol(atom_dict['element'])
                    except KeyError:
                        element = elem.virtual
                    topology.add_atom(atom_dict['name'], element, residue)

        atoms = list(topology.atoms)
        for index1, index2 in topology_dict['bonds']:
            topology.add_bond(atoms[index1], atoms[index2])

        return topology
Beispiel #16
0
    def topology(self):
        """Get the topology out from the file

        Returns
        -------
        topology : mdtraj.Topology
            A topology object
        """
        try:
            raw = self._get_node("/", name="topology")[0]
            if not isinstance(raw, string_types):
                raw = raw.decode()
            topology_dict = json.loads(raw)
        except self.tables.NoSuchNodeError:
            return None

        topology = Topology()

        for chain_dict in sorted(topology_dict["chains"], key=operator.itemgetter("index")):
            chain = topology.add_chain()
            for residue_dict in sorted(chain_dict["residues"], key=operator.itemgetter("index")):
                try:
                    resSeq = residue_dict["resSeq"]
                except KeyError:
                    resSeq = None
                    warnings.warn("No resSeq information found in HDF file, defaulting to zero-based indices")
                residue = topology.add_residue(residue_dict["name"], chain, resSeq=resSeq)
                for atom_dict in sorted(residue_dict["atoms"], key=operator.itemgetter("index")):
                    try:
                        element = elem.get_by_symbol(atom_dict["element"])
                    except KeyError:
                        element = None
                    topology.add_atom(atom_dict["name"], element, residue)

        atoms = list(topology.atoms)
        for index1, index2 in topology_dict["bonds"]:
            topology.add_bond(atoms[index1], atoms[index2])

        return topology
Beispiel #17
0
def extract(item, atom_indices='all', copy_if_all=True, check=True):

    if check:

        digest_item(item, 'mdtraj.Topology')
        atom_indices = digest_atom_indices(atom_indices)

    if atom_indices is 'all':

        if copy_if_all:
            from copy import deepcopy
            tmp_item = deepcopy(item)
        else:
            tmp_item = item
    else:

        from mdtraj.core.topology import Topology
        from mdtraj.utils import ilen

        atom_indices_to_be_kept = set(atom_indices)
        newTopology = Topology()
        old_atom_to_new_atom = {}

        for chain in item._chains:
            newChain = newTopology.add_chain()
            for group in chain._groups:
                resSeq = getattr(group, 'resSeq', None) or group.index
                newResidue = newTopology.add_group(group.name, newChain,
                                                   resSeq, group.segment_id)
                for atom in group._atoms:
                    if atom.index in atom_indices_to_be_kept:
                        try:  # OpenMM Topology objects don't have serial attributes, so we have to check first.
                            serial = atom.serial
                        except AttributeError:
                            serial = None
                        newAtom = newTopology.add_atom(atom.name,
                                                       atom.element,
                                                       newResidue,
                                                       serial=serial)
                        old_atom_to_new_atom[atom] = newAtom

        bondsiter = item.bonds
        if not hasattr(bondsiter, '__iter__'):
            bondsiter = bondsiter()

        for bond in bondsiter:
            try:
                atom1, atom2 = bond
                newTopology.add_bond(old_atom_to_new_atom[atom1],
                                     old_atom_to_new_atom[atom2],
                                     type=bond.type,
                                     order=bond.order)
            except KeyError:
                pass
                # we only put bonds into the new topology if both of their partners
                # were indexed and thus HAVE a new atom

        # Delete empty groups
        newTopology._groups = [
            r for r in newTopology._groups if len(r._atoms) > 0
        ]
        for chain in newTopology._chains:
            chain._groups = [r for r in chain._groups if len(r._atoms) > 0]

        # Delete empty chains
        newTopology._chains = [
            c for c in newTopology._chains if len(c._groups) > 0
        ]
        # Re-set the numAtoms and numResidues
        newTopology._numAtoms = ilen(newTopology.atoms)
        newTopology._numResidues = ilen(newTopology.groups)

        tmp_item = newTopology

    return tmp_item
Beispiel #18
0
    def topology(self, topology_object):
        """Set the topology in the file

        Parameters
        ----------
        topology_object : mdtraj.Topology
            A topology object
        """
        _check_mode(self.mode, ('w', 'a'))

        # we want to be able to handle the simtk.openmm Topology object
        # here too, so if it's not an mdtraj topology we'll just guess
        # that it's probably an openmm topology and convert
        if not isinstance(topology_object, Topology):
            topology_object = Topology.from_openmm(topology_object)

        try:
            topology_dict = {
                'chains': [],
                'bonds': []
            }

            for chain in topology_object.chains:
                chain_dict = {
                    'residues': [],
                    'index': int(chain.index)
                }
                for residue in chain.residues:
                    residue_dict = {
                        'index': int(residue.index),
                        'name': str(residue.name),
                        'atoms': [],
                        "resSeq": int(residue.resSeq),
                        "segmentID": str(residue.segment_id)
                    }

                    for atom in residue.atoms:

                        try:
                            element_symbol_string = str(atom.element.symbol)
                        except AttributeError:
                            element_symbol_string = ""

                        residue_dict['atoms'].append({
                            'index': int(atom.index),
                            'name': str(atom.name),
                            'element': element_symbol_string
                        })
                    chain_dict['residues'].append(residue_dict)
                topology_dict['chains'].append(chain_dict)

            for atom1, atom2 in topology_object.bonds:
                topology_dict['bonds'].append([
                    int(atom1.index),
                    int(atom2.index)
                ])

        except AttributeError as e:
            raise AttributeError('topology_object fails to implement the'
                'chains() -> residue() -> atoms() and bond() protocol. '
                'Specifically, we encountered the following %s' % e)

        # actually set the tables
        try:
            self._remove_node(where='/', name='topology')
        except self.tables.NoSuchNodeError:
            pass

        data = json.dumps(topology_dict)
        if not isinstance(data, bytes):
            data = data.encode('ascii')

        if self.tables.__version__ >= '3.0.0':
            self._handle.create_array(where='/', name='topology', obj=[data])
        else:
            self._handle.createArray(where='/', name='topology', object=[data])
Beispiel #19
0
    def _to_topology(self, atom_list, chain_types=None, residue_types=None):
        """Create a mdtraj.Topology from a Compound.

        Parameters
        ----------
        atom_list :
        chain_types :
        residue_types :

        Returns
        -------
        top : mtraj.Topology

        """
        from mdtraj.core.element import get_by_symbol
        from mdtraj.core.topology import Topology

        if isinstance(chain_types, Compound):
            chain_types = [Compound]
        if isinstance(chain_types, (list, set)):
            chain_types = tuple(chain_types)

        if isinstance(residue_types, Compound):
            residue_types = [Compound]
        if isinstance(residue_types, (list, set)):
            residue_types = tuple(residue_types)
        top = Topology()
        atom_mapping = {}

        default_chain = top.add_chain()
        default_residue = top.add_residue('RES', default_chain)

        last_residue_compound = None
        last_chain_compound = None
        last_residue = None
        last_chain = None

        for atom in atom_list:
            # Chains
            for parent in atom.ancestors():
                if chain_types and isinstance(parent, chain_types):
                    if parent != last_chain_compound:
                        last_chain_compound = parent
                        last_chain = top.add_chain()
                        last_chain_default_residue = top.add_residue(
                            'RES', last_chain)
                        last_chain.compound = last_chain_compound
                    break
            else:
                last_chain = default_chain
                last_chain.compound = last_chain_compound

            # Residues
            for parent in atom.ancestors():
                if residue_types and isinstance(parent, residue_types):
                    if parent != last_residue_compound:
                        last_residue_compound = parent
                        last_residue = top.add_residue(
                            parent.__class__.__name__, last_chain)
                        last_residue.compound = last_residue_compound
                    break
            else:
                if last_chain != default_chain:
                    last_residue = last_chain_default_residue
                else:
                    last_residue = default_residue
                last_residue.compound = last_residue_compound

            # Add the actual atoms
            try:
                elem = get_by_symbol(atom.name)
            except KeyError:
                elem = get_by_symbol("VS")
            at = top.add_atom(atom.name, elem, last_residue)
            at.charge = atom.charge
            atom_mapping[atom] = at

        # Remove empty default residues.
        chains_to_remove = [
            chain for chain in top.chains if chain.n_atoms == 0
        ]
        residues_to_remove = [res for res in top.residues if res.n_atoms == 0]
        for chain in chains_to_remove:
            top._chains.remove(chain)
        for res in residues_to_remove:
            for chain in top.chains:
                try:
                    chain._residues.remove(res)
                except ValueError:  # Already gone.
                    pass

        for atom1, atom2 in self.bonds():
            # Ensure that both atoms are part of the compound. This becomes an
            # issue if you try to convert a sub-compound to a topology which is
            # bonded to a different subcompound.
            if all(a in atom_mapping.keys() for a in [atom1, atom2]):
                top.add_bond(atom_mapping[atom1], atom_mapping[atom2])
        return top
Beispiel #20
0
def load_mol2(filename):
    """Load a TRIPOS mol2 file from disk.

    Parameters
    ----------
    filename : str
        Path to the prmtop file on disk.

    Returns
    -------
    traj : md.Trajectory
        The resulting topology, as an md.Topology object.

    Notes
    -----
    This function should work on GAFF and sybyl style MOL2 files, but has
    been primarily tested on GAFF mol2 files.
    This function does NOT accept multi-structure MOL2 files!!!
    The elements are guessed using GAFF atom types or via the atype string.

    Examples
    --------
    >>> traj = md.load_mol2('mysystem.mol2')
    """
    from mdtraj.core.trajectory import Trajectory
    from mdtraj.core.topology import Topology, Single, Double, Triple, Aromatic, Amide

    atoms, bonds = mol2_to_dataframes(filename)

    atoms_mdtraj = atoms[["name", "resName"]].copy()
    atoms_mdtraj["serial"] = atoms.index

    #Figure out 1 letter element names

    # IF this is a GAFF mol2, this line should work without issues
    atoms_mdtraj["element"] = atoms.atype.map(gaff_elements)
    # If this is a sybyl mol2, there should be NAN (null) values
    if atoms_mdtraj.element.isnull().any():
        # If this is a sybyl mol2, I think this works generally.
        atoms_mdtraj["element"] = atoms.atype.apply(lambda x: x.strip(".")[0])

    atoms_mdtraj["resSeq"] = np.ones(len(atoms), 'int')
    atoms_mdtraj["chainID"] = np.ones(len(atoms), 'int')

    bond_type_map = {
        '1': Single,
        '2': Double,
        '3': Triple,
        'am': Amide,
        'ar': Aromatic
    }
    if bonds is not None:
        bonds_mdtraj = bonds[["id0", "id1"]].values
        offset = bonds_mdtraj.min()  # Should this just be 1???
        bonds_mdtraj -= offset
        # Create the bond augment information
        n_bonds = bonds_mdtraj.shape[0]
        bond_augment = np.zeros([n_bonds, 2], dtype=float)
        # Add bond type information
        bond_augment[:, 0] = [float(bond_type_map[bond_value]) for bond_value in bonds["bond_type"].values]
        # Add Bond "order" information, this is not known from Mol2 files
        bond_augment[:, 1] = [0.0 for _ in range(n_bonds)]
        # Augment array, dtype is cast to minimal representation of float
        bonds_mdtraj = np.append(bonds_mdtraj, bond_augment, axis=-1)
    else:
        bonds_mdtraj = None

    top = Topology.from_dataframe(atoms_mdtraj, bonds_mdtraj)

    xyzlist = np.array([atoms[["x", "y", "z"]].values])
    xyzlist /= 10.0  # Convert from angstrom to nanometer

    traj = Trajectory(xyzlist, top)

    return traj
Beispiel #21
0
    def _to_topology(self, atom_list, chain_types=None, residue_types=None):
        """Create a mdtraj.Topology from a Compound.

        Parameters
        ----------
        atom_list :
        chain_types :
        residue_types :

        Returns
        -------
        top : mtraj.Topology

        """

        if isinstance(chain_types, Compound):
            chain_types = [Compound]
        if isinstance(chain_types, (list, set)):
            chain_types = tuple(chain_types)

        if isinstance(residue_types, Compound):
            residue_types = [Compound]
        if isinstance(residue_types, (list, set)):
            residue_types = tuple(residue_types)
        top = Topology()
        atom_mapping = {}

        default_chain = top.add_chain()
        default_residue = top.add_residue('RES', default_chain)

        last_residue_compound = None
        last_chain_compound = None
        last_residue = None
        last_chain = None

        for atom in atom_list:
            # Chains
            for parent in atom.ancestors():
                if chain_types and isinstance(parent, chain_types):
                    if parent != last_chain_compound:
                        last_chain_compound = parent
                        last_chain = top.add_chain()
                        last_chain_default_residue = top.add_residue('RES', last_chain)
                        last_chain.compound = last_chain_compound
                    break
            else:
                last_chain = default_chain
                last_chain.compound = last_chain_compound

            # Residues
            for parent in atom.ancestors():
                if residue_types and isinstance(parent, residue_types):
                    if parent != last_residue_compound:
                        last_residue_compound = parent
                        last_residue = top.add_residue(parent.__class__.__name__, last_chain)
                        last_residue.compound = last_residue_compound
                    break
            else:
                if last_chain != default_chain:
                    last_residue = last_chain_default_residue
                else:
                    last_residue = default_residue
                last_residue.compound = last_residue_compound

            # Add the actual atoms
            try:
                elem = get_by_symbol(atom.name)
            except KeyError:
                elem = get_by_symbol("VS")
            at = top.add_atom(atom.name, elem, last_residue)
            at.charge = atom.charge
            atom_mapping[atom] = at

        # Remove empty default residues.
        chains_to_remove = [chain for chain in top.chains if chain.n_atoms == 0]
        residues_to_remove = [res for res in top.residues if res.n_atoms == 0]
        for chain in chains_to_remove:
            top._chains.remove(chain)
        for res in residues_to_remove:
            for chain in top.chains:
                try:
                    chain._residues.remove(res)
                except ValueError:  # Already gone.
                    pass

        for atom1, atom2 in self.bonds():
            # Ensure that both atoms are part of the compound. This becomes an
            # issue if you try to convert a sub-compound to a topology which is
            # bonded to a different subcompound.
            if all(a in atom_mapping.keys() for a in [atom1, atom2]):
                top.add_bond(atom_mapping[atom1], atom_mapping[atom2])
        return top
Beispiel #22
0
    def topology(self, topology_object):
        """Set the topology in the file

        Parameters
        ----------
        topology_object : mdtraj.Topology
            A topology object
        """
        _check_mode(self.mode, ('w', 'a'))

        # we want to be able to handle the simtk.openmm Topology object
        # here too, so if it's not an mdtraj topology we'll just guess
        # that it's probably an openmm topology and convert
        if not isinstance(topology_object, Topology):
            topology_object = Topology.from_openmm(topology_object)

        try:
            topology_dict = {
                'chains': [],
                'bonds': []
            }

            for chain in topology_object.chains:
                chain_dict = {
                    'residues': [],
                    'index': int(chain.index)
                }
                for residue in chain.residues:
                    residue_dict = {
                        'index': int(residue.index),
                        'name': str(residue.name),
                        'atoms': [],
                        "resSeq": int(residue.resSeq),
                        "segmentID": str(residue.segment_id)
                    }

                    for atom in residue.atoms:

                        try:
                            element_symbol_string = str(atom.element.symbol)
                        except AttributeError:
                            element_symbol_string = ""

                        residue_dict['atoms'].append({
                            'index': int(atom.index),
                            'name': str(atom.name),
                            'element': element_symbol_string
                        })
                    chain_dict['residues'].append(residue_dict)
                topology_dict['chains'].append(chain_dict)

            for atom1, atom2 in topology_object.bonds:
                topology_dict['bonds'].append([
                    int(atom1.index),
                    int(atom2.index)
                ])

        except AttributeError as e:
            raise AttributeError('topology_object fails to implement the'
                'chains() -> residue() -> atoms() and bond() protocol. '
                'Specifically, we encountered the following %s' % e)

        # actually set the tables
        try:
            self._remove_node(where='/', name='topology')
        except self.tables.NoSuchNodeError:
            pass

        data = json.dumps(topology_dict)
        if not isinstance(data, bytes):
            data = data.encode('ascii')

        if self.tables.__version__ >= '3.0.0':
            self._handle.create_array(where='/', name='topology', obj=[data])
        else:
            self._handle.createArray(where='/', name='topology', object=[data])
Beispiel #23
0
def load_hoomdxml(filename, top=None):
    """Load a single conformation from an HOOMD-Blue XML file.

    For more information on this file format, see:
    http://codeblue.umich.edu/hoomd-blue/doc/page_xml_file_format.html
    Notably, all node names and attributes are in all lower case.
    HOOMD-Blue does not contain residue and chain information explicitly. 
    For this reason, chains will be found by looping over all the bonds and 
    finding what is bonded to what. 
    Each chain consisists of exactly one residue. 

    Parameters
    ----------
    filename : string
        The path on disk to the XML file
    top : None
        This argumet is ignored

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object, with corresponding 
        Topology.

    Notes
    -----
    This function requires the NetworkX python package.
    """
    from mdtraj.core.trajectory import Trajectory
    from mdtraj.core.topology import Topology
    topology = Topology()
    tree = cElementTree.parse(filename)
    config = tree.getroot().find('configuration')
    position = config.find('position')
    bond = config.find('bond')
    atom_type = config.find('type')  # MDTraj calls this "name"

    box = config.find('box')
    box.attrib = dict((key.lower(), val) for key, val in box.attrib.items())
    # be generous for case of box attributes
    lx = float(box.attrib['lx'])
    ly = float(box.attrib['ly'])
    lz = float(box.attrib['lz'])
    try:
        xy = float(box.attrib['xy'])
        xz = float(box.attrib['xz'])
        yz = float(box.attrib['yz'])
    except (ValueError, KeyError):
        xy = 0.0
        xz = 0.0
        yz = 0.0
    unitcell_vectors = np.array([[[lx, xy * ly, xz * lz], [0.0, ly, yz * lz],
                                  [0.0, 0.0, lz]]])

    positions, types = [], {}
    for pos in position.text.splitlines()[1:]:
        positions.append((float(pos.split()[0]), float(pos.split()[1]),
                          float(pos.split()[2])))

    for idx, atom_name in enumerate(atom_type.text.splitlines()[1:]):
        types[idx] = str(atom_name.split()[0])
    if len(types) != len(positions):
        raise ValueError('Different number of types and positions in xml file')

    # ignore the bond type
    if hasattr(bond, 'text'):
        bonds = [(int(b.split()[1]), int(b.split()[2]))
                 for b in bond.text.splitlines()[1:]]
        chains = _find_chains(bonds)
    else:
        chains = []
        bonds = []

    # Relate the first index in the bonded-group to mdtraj.Residue
    bonded_to_residue = {}
    for i, _ in enumerate(types):
        bonded_group = _in_chain(chains, i)
        if bonded_group is not None:
            if bonded_group[0] not in bonded_to_residue:
                t_chain = topology.add_chain()
                t_residue = topology.add_residue('A', t_chain)
                bonded_to_residue[bonded_group[0]] = t_residue
            topology.add_atom(types[i], virtual_site,
                              bonded_to_residue[bonded_group[0]])
        if bonded_group is None:
            t_chain = topology.add_chain()
            t_residue = topology.add_residue('A', t_chain)
            topology.add_atom(types[i], virtual_site, t_residue)

    for bond in bonds:
        atom1, atom2 = bond[0], bond[1]
        topology.add_bond(topology.atom(atom1), topology.atom(atom2))

    traj = Trajectory(xyz=np.array(positions), topology=topology)
    traj.unitcell_vectors = unitcell_vectors

    return traj
Beispiel #24
0
    def _read(self):
        "Read a single frame"
        from mdtraj.core.topology import Topology
        from mdtraj.core.element import Element, virtual
        # Read in the number of atoms.
        line = self._fh.readline()
        if line == '':
            raise _EOF()

        self._n_atoms = int(line.split()[0])
        self._line_counter += 1

        coords = np.empty((self._n_atoms, 3), dtype=np.float32)
        bond_partners = [[] for i in xrange(self._n_atoms)]
        atom_names = ['' for i in xrange(self._n_atoms)]
        line = self._fh.readline()
        s = line.split()
        self._line_counter += 1
        # See if we have box info on this line or not
        cell_lengths = cell_angles = None
        if len(s) == 6:
            try:
                cell_lengths = np.asarray(
                    [float(s[0]), float(s[1]),
                     float(s[2])])
                cell_angles = np.asarray(
                    [float(s[3]), float(s[4]),
                     float(s[5])])
                line = self._fh.readline()
                self._line_counter += 1
            except ValueError:
                pass
        i = 0
        while i < self._n_atoms - 1:
            atom_names[i] = s[1]
            bond_partners[i] = [int(x) for x in s[6:]]
            coords[i, :] = [float(s[pos]) for pos in [2, 3, 4]]
            i += 1
            line = self._fh.readline()
            s = line.split()
            self._line_counter += 1
        # Now do the last atom
        atom_names[i] = s[1]
        bond_partners[i] = [int(x) for x in s[6:]]
        coords[i, :] = [float(s[pos]) for pos in [2, 3, 4]]
        # Now see if we have to build a topology
        if self.topology is None:
            self.topology = top = Topology()
            chain = top.add_chain()  # only 1 chain
            res = top.add_residue('RES', chain, 1)  # only 1 residue
            for at in atom_names:
                # First get the element. Try for common 2-letter elements, then
                # use the first letter only (default to None if I can't find it)
                if at[:2].upper() in ('NA', 'CL', 'MG'):
                    elem = Element.getBySymbol(at[:2])
                else:
                    try:
                        elem = Element.getBySymbol(at[0])
                    except KeyError:
                        elem = virtual
                top.add_atom(at, elem, res)
            # Now add the bonds
            atoms = list(top.atoms)
            for i, bonds in enumerate(bond_partners):
                me = atoms[i]
                for b in bonds:
                    b -= 1
                    if b < i: continue
                    it = atoms[b]
                    top.add_bond(me, it)

        self._frame_index += 1
        return coords, cell_lengths, cell_angles
Beispiel #25
0
def load_mol2(filename):
    """Load a TRIPOS mol2 file from disk.

    Parameters
    ----------
    filename : path-like
        Path to the prmtop file on disk.

    Returns
    -------
    traj : md.Trajectory
        The resulting topology, as an md.Topology object.

    Notes
    -----
    This function should work on GAFF and sybyl style MOL2 files, but has
    been primarily tested on GAFF mol2 files.
    This function does NOT accept multi-structure MOL2 files!!!
    The elements are guessed using GAFF atom types or via the atype string.

    Examples
    --------
    >>> traj = md.load_mol2('mysystem.mol2')
    """
    from mdtraj.core.trajectory import Trajectory
    from mdtraj.core.topology import Topology, Single, Double, Triple, Aromatic, Amide

    atoms, bonds = mol2_to_dataframes(filename)

    atoms_mdtraj = atoms[["name", "resName"]].copy()
    atoms_mdtraj["serial"] = atoms.index

    #Figure out 1 letter element names

    # IF this is a GAFF mol2, this line should work without issues
    atoms_mdtraj["element"] = atoms.atype.map(gaff_elements)
    # If this is a sybyl mol2, there should be NAN (null) values
    if atoms_mdtraj.element.isnull().any():
        # If this is a sybyl mol2, I think this works generally.
        # Argument x is being passed as a list with only one element.
        def to_element(x):
            if isinstance(x, (list, tuple)):
                assert len(x) == 1
                x = x[0]

            if '.' in x:  # orbital-hybridizations in SYBL
                return x.split('.')[0]
            try:
                # check if we can convert the whole str to an Element,
                # if not, we only pass the first letter.
                from mdtraj.core.element import Element
                Element.getBySymbol(x)
            except KeyError:
                return x[0]
            return x

        atoms_mdtraj["element"] = atoms.atype.apply(to_element)

    # Check if elements inferred from atoms.atype are valid
    # If not, try to infer elements from atoms.name
    try:
        atoms_mdtraj['element'].apply(elem.get_by_symbol)
    except KeyError:
        try:
            atoms_mdtraj["element"] = atoms.name.apply(to_element)
            atoms_mdtraj['element'].apply(elem.get_by_symbol)
        except KeyError:
            raise KeyError('Invalid element passed to atoms DataFrame')

    atoms_mdtraj['resSeq'] = atoms['code']
    atoms_mdtraj["chainID"] = np.ones(len(atoms), 'int')

    bond_type_map = {
        '1': Single,
        '2': Double,
        '3': Triple,
        'am': Amide,
        'ar': Aromatic
    }
    if bonds is not None:
        bonds_mdtraj = bonds[["id0", "id1"]].values
        offset = bonds_mdtraj.min()  # Should this just be 1???
        bonds_mdtraj -= offset
        # Create the bond augment information
        n_bonds = bonds_mdtraj.shape[0]
        bond_augment = np.zeros([n_bonds, 2], dtype=float)
        # Add bond type information
        bond_augment[:, 0] = [
            float(bond_type_map[str(bond_value)])
            for bond_value in bonds["bond_type"].values
        ]
        # Add Bond "order" information, this is not known from Mol2 files
        bond_augment[:, 1] = [0.0 for _ in range(n_bonds)]
        # Augment array, dtype is cast to minimal representation of float
        bonds_mdtraj = np.append(bonds_mdtraj, bond_augment, axis=-1)
    else:
        bonds_mdtraj = None

    top = Topology.from_dataframe(atoms_mdtraj, bonds_mdtraj)

    xyzlist = np.array([atoms[["x", "y", "z"]].values])
    xyzlist /= 10.0  # Convert from angstrom to nanometer

    traj = Trajectory(xyzlist, top)

    return traj
Beispiel #26
0
    def __init__(self, topology, use_chains=None):
        if use_chains is None:
            use_chains = range(len(topology._chains))

        self._ref_topology = topology.copy()

        # Build new topology
        newTopology = Topology()
        new_atm_idx = 0
        res_idx = 1
        prev_ca = None
        ca_idxs = []
        self._sidechain_idxs = []
        self._sidechain_mass = []
        self._chain_indices = []
        for chain_count, chain in enumerate(topology._chains):
            if chain_count in use_chains:
                newChain = newTopology.add_chain()
                for residue in chain._residues:
                    #resSeq = getattr(residue, 'resSeq', None) or residue.index
                    newResidue = newTopology.add_residue(
                        residue.name, newChain, res_idx)
                    # map CA
                    new_ca = newTopology.add_atom(
                        'CA',
                        md.core.element.get_by_symbol('C'),
                        newResidue,
                        serial=new_atm_idx)
                    self._chain_indices.append(chain_count)
                    if prev_ca is None:
                        prev_ca = new_ca
                    else:
                        # only bond atoms in the same chain.
                        if new_ca.residue.chain.index == prev_ca.residue.chain.index:
                            newTopology.add_bond(prev_ca, new_ca)
                        prev_ca = new_ca
                    try:
                        ca_idxs.append([[ atm.index for atm in residue.atoms if \
                                (atm.name == "CA") ][0], new_atm_idx ])
                    except:
                        print(residue)
                        print(chain)
                        for atm in residue.atoms:
                            atm.name
                        raise
                    new_atm_idx += 1

                    if residue.name == 'GLY':
                        self._sidechain_idxs.append([])
                        self._sidechain_mass.append([])
                    else:
                        # map CB
                        cb_name = "CB%s" % atom_types.residue_code[
                            residue.name]
                        new_cb = newTopology.add_atom(
                            cb_name,
                            md.core.element.get_by_symbol('C'),
                            newResidue,
                            serial=new_atm_idx)
                        self._chain_indices.append(chain_count)

                        newTopology.add_bond(new_cb, new_ca)

                        self._sidechain_idxs.append([[ atm.index for atm in residue.atoms if \
                                    (atm.is_sidechain) and (atm.element.symbol != "H") ], new_atm_idx ])
                        self._sidechain_mass.append(np.array([ atm.element.mass for atm in residue.atoms if \
                                    (atm.is_sidechain) and (atm.element.symbol != "H") ]))
                        new_atm_idx += 1
                    res_idx += 1

        self._ca_idxs = np.array(ca_idxs)
        self.topology = newTopology
        assert self.topology.n_atoms == len(self._chain_indices)
Beispiel #27
0
class PDBTrajectoryFile(object):
    """Interface for reading and writing Protein Data Bank (PDB) files

    Parameters
    ----------
    filename : str
        The filename to open. A path to a file on disk.
    mode : {'r', 'w'}
        The mode in which to open the file, either 'r' for read or 'w' for write.
    force_overwrite : bool
        If opened in write mode, and a file by the name of `filename` already
        exists on disk, should we overwrite it?

    Attributes
    ----------
    positions : np.ndarray, shape=(n_frames, n_atoms, 3)
    topology : mdtraj.Topology
    closed : bool

    Notes
    -----
    When writing pdb files, mdtraj follows the PDB3.0 standard as closely as
    possible. During *reading* however, we try to be more lenient. For instance,
    we will parse common nonstandard atom names during reading, and convert them
    into the standard names. The replacement table used by mdtraj is at
    {mdtraj_source}/formats/pdb/data/pdbNames.xml.

    See Also
    --------
    mdtraj.load_pdb : High-level wrapper that returns a ``md.Trajectory``
    """
    distance_unit = 'angstroms'
    _residueNameReplacements = {}
    _atomNameReplacements = {}
    _chain_names = [chr(ord('A') + i) for i in range(26)]

    def __init__(self, filename, mode='r', force_overwrite=True):
        self._open = False
        self._file = None
        self._topology = None
        self._positions = None
        self._mode = mode
        self._last_topology = None

        if mode == 'r':
            PDBTrajectoryFile._loadNameReplacementTables()

            if _is_url(filename):
                self._file = urlopen(filename)
                if filename.lower().endswith('.gz'):
                    if six.PY3:
                        self._file = gzip.GzipFile(fileobj=self._file)
                    else:
                        self._file = gzip.GzipFile(fileobj=six.StringIO(
                            self._file.read()))
                if six.PY3:
                    self._file = six.StringIO(self._file.read().decode('utf-8'))
            else:
                if filename.lower().endswith('.gz'):
                    self._file = gzip.open(filename, 'r')
                    self._file = six.StringIO(self._file.read().decode('utf-8'))                    
                else:
                    self._file = open(filename, 'r')

            self._read_models()
        elif mode == 'w':
            self._header_written = False
            self._footer_written = False
            if os.path.exists(filename) and not force_overwrite:
                raise IOError('"%s" already exists' % filename)
            self._file = open(filename, 'w')
        else:
            raise ValueError("invalid mode: %s" % mode)

        self._open = True

    def write(self, positions, topology, modelIndex=None, unitcell_lengths=None, 
              unitcell_angles=None, bfactors=None):
        """Write a PDB file to disk

        Parameters
        ----------
        positions : array_like
            The list of atomic positions to write.
        topology : mdtraj.Topology
            The Topology defining the model to write.
        modelIndex : {int, None}
            If not None, the model will be surrounded by MODEL/ENDMDL records
            with this index
        unitcell_lengths : {tuple, None}
            Lengths of the three unit cell vectors, or None for a non-periodic system
        unitcell_angles : {tuple, None}
            Angles between the three unit cell vectors, or None for a non-periodic system
        bfactors : array_like, default=None, shape=(n_atoms,)
            Save bfactors with pdb file. Should contain a single number for
            each atom in the topology
        """
        if not self._mode == 'w':
            raise ValueError('file not opened for writing')
        if not self._header_written:
            self._write_header(unitcell_lengths, unitcell_angles)
            self._header_written = True

        if ilen(topology.atoms) != len(positions):
            raise ValueError('The number of positions must match the number of atoms')
        if np.any(np.isnan(positions)):
            raise ValueError('Particle position is NaN')
        if np.any(np.isinf(positions)):
            raise ValueError('Particle position is infinite')
        
        self._last_topology = topology  # Hack to save the topology of the last frame written, allows us to output CONECT entries in write_footer()

        if bfactors is None:
            bfactors = ['{0:5.2f}'.format(0.0)] * len(positions)
        else:
            if (np.max(bfactors) >= 100) or (np.min(bfactors) <= -10):
                raise ValueError("bfactors must be in (-10, 100)")

            bfactors = ['{0:5.2f}'.format(b) for b in bfactors]
        
        atomIndex = 1
        posIndex = 0
        if modelIndex is not None:
            print("MODEL     %4d" % modelIndex, file=self._file)
        for (chainIndex, chain) in enumerate(topology.chains):
            chainName = self._chain_names[chainIndex % len(self._chain_names)]
            residues = list(chain.residues)
            for (resIndex, res) in enumerate(residues):
                if len(res.name) > 3:
                    resName = res.name[:3]
                else:
                    resName = res.name
                for atom in res.atoms:
                    if len(atom.name) < 4 and atom.name[:1].isalpha() and (atom.element is None or len(atom.element.symbol) < 2):
                        atomName = ' '+atom.name
                    elif len(atom.name) > 4:
                        atomName = atom.name[:4]
                    else:
                        atomName = atom.name
                    coords = positions[posIndex]
                    if atom.element is not None:
                        symbol = atom.element.symbol
                    else:
                        symbol = ' '
                    line = "ATOM  %5d %-4s %3s %s%4d    %s%s%s  1.00 %s          %2s  " % (
                        atomIndex % 100000, atomName, resName, chainName,
                        (res.resSeq) % 10000, _format_83(coords[0]),
                        _format_83(coords[1]), _format_83(coords[2]),
                        bfactors[posIndex], symbol)
                    assert len(line) == 80, 'Fixed width overflow detected'
                    print(line, file=self._file)
                    posIndex += 1
                    atomIndex += 1
                if resIndex == len(residues)-1:
                    print("TER   %5d      %3s %s%4d" % (atomIndex, resName, chainName, res.resSeq), file=self._file)
                    atomIndex += 1

        if modelIndex is not None:
            print("ENDMDL", file=self._file)

    def _write_header(self, unitcell_lengths, unitcell_angles, write_metadata=True):
        """Write out the header for a PDB file.

        Parameters
        ----------
        unitcell_lengths : {tuple, None}
            The lengths of the three unitcell vectors, ``a``, ``b``, ``c``
        unitcell_angles : {tuple, None}
            The angles between the three unitcell vectors, ``alpha``,
            ``beta``, ``gamma``
        """
        if not self._mode == 'w':
            raise ValueError('file not opened for writing')

        if unitcell_lengths is None and unitcell_angles is None:
            return
        if unitcell_lengths is not None and unitcell_angles is not None:
            if not len(unitcell_lengths) == 3:
                raise ValueError('unitcell_lengths must be length 3')
            if not len(unitcell_angles) == 3:
                raise ValueError('unitcell_angles must be length 3')
        else:
            raise ValueError('either unitcell_lengths and unitcell_angles'
                             'should both be spefied, or neither')

        box = list(unitcell_lengths) + list(unitcell_angles)
        assert len(box) == 6

        if write_metadata:
            print("REMARK   1 CREATED WITH MDTraj %s, %s" % (version.version, str(date.today())), file=self._file)
        print("CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1           1 " % tuple(box), file=self._file)

    def _write_footer(self):
        if not self._mode == 'w':
            raise ValueError('file not opened for writing')

        # Identify bonds that should be listed as CONECT records.
        standardResidues = ['ALA', 'ASN', 'CYS', 'GLU', 'HIS', 'LEU', 'MET', 'PRO', 'THR', 'TYR',
                            'ARG', 'ASP', 'GLN', 'GLY', 'ILE', 'LYS', 'PHE', 'SER', 'TRP', 'VAL',
                            'A', 'G', 'C', 'U', 'I', 'DA', 'DG', 'DC', 'DT', 'DI', 'HOH']
        conectBonds = []
        if self._last_topology is not None:
            for atom1, atom2 in self._last_topology.bonds:
                if atom1.residue.name not in standardResidues or atom2.residue.name not in standardResidues:
                    conectBonds.append((atom1, atom2))
                elif atom1.name == 'SG' and atom2.name == 'SG' and atom1.residue.name == 'CYS' and atom2.residue.name == 'CYS':
                    conectBonds.append((atom1, atom2))
        if len(conectBonds) > 0:
            
            # Work out the index used in the PDB file for each atom.
            
            atomIndex = {}
            nextAtomIndex = 0
            prevChain = None
            for chain in self._last_topology.chains:
                for atom in chain.atoms:
                    if atom.residue.chain != prevChain:
                        nextAtomIndex += 1
                        prevChain = atom.residue.chain
                    atomIndex[atom] = nextAtomIndex
                    nextAtomIndex += 1
            
            # Record which other atoms each atom is bonded to.
            
            atomBonds = {}
            for atom1, atom2 in conectBonds:
                index1 = atomIndex[atom1]
                index2 = atomIndex[atom2]
                if index1 not in atomBonds:
                    atomBonds[index1] = []
                if index2 not in atomBonds:
                    atomBonds[index2] = []
                atomBonds[index1].append(index2)
                atomBonds[index2].append(index1)
            
            # Write the CONECT records.
            
            for index1 in sorted(atomBonds):
                bonded = atomBonds[index1]
                while len(bonded) > 4:
                    print("CONECT%5d%5d%5d%5d" % (index1, bonded[0], bonded[1], bonded[2]), file=self._file)
                    del bonded[:4]
                line = "CONECT%5d" % index1
                for index2 in bonded:
                    line = "%s%5d" % (line, index2)
                print(line, file=self._file)
        print("END", file=self._file)
        self._footer_written = True

    @classmethod
    def set_chain_names(cls, values):
        """Set the cycle of chain names used when writing PDB files

        When writing PDB files, PDBTrajectoryFile translates each chain's
        index into a name -- the name is what's written in the file. By
        default, chains are named with the letters A-Z.

        Parameters
        ----------
        values : list
            A list of chacters (strings of length 1) that the PDB writer will
            cycle through to choose chain names.
        """
        for item in values:
            if not isinstance(item, six.string_types) and len(item) == 1:
                raise TypeError('Names must be a single character string')
        cls._chain_names = values

    @property
    def positions(self):
        """The cartesian coordinates of all of the atoms in each frame. Available when a file is opened in mode='r'
        """
        return self._positions

    @property
    def topology(self):
        """The topology from this PDB file. Available when a file is opened in mode='r'
        """
        return self._topology

    @property
    def unitcell_lengths(self):
        "The unitcell lengths (3-tuple) in this PDB file. May be None"
        return self._unitcell_lengths

    @property
    def unitcell_angles(self):
        "The unitcell angles (3-tuple) in this PDB file. May be None"
        return self._unitcell_angles

    @property
    def closed(self):
        "Whether the file is closed"
        return not self._open

    def close(self):
        "Close the PDB file"
        if self._mode == 'w' and not self._footer_written:
            self._write_footer()
        if self._open:
            self._file.close()
        self._open = False

    def _read_models(self):
        if not self._mode == 'r':
            raise ValueError('file not opened for reading')

        self._topology = Topology()

        pdb = PdbStructure(self._file, load_all_models=True)

        atomByNumber = {}
        for chain in pdb.iter_chains():
            c = self._topology.add_chain()
            for residue in chain.iter_residues():
                resName = residue.get_name()
                if resName in PDBTrajectoryFile._residueNameReplacements:
                    resName = PDBTrajectoryFile._residueNameReplacements[resName]
                r = self._topology.add_residue(resName, c, residue.number)
                if resName in PDBTrajectoryFile._atomNameReplacements:
                    atomReplacements = PDBTrajectoryFile._atomNameReplacements[resName]
                else:
                    atomReplacements = {}
                for atom in residue.atoms:
                    atomName = atom.get_name()
                    if atomName in atomReplacements:
                        atomName = atomReplacements[atomName]
                    atomName = atomName.strip()
                    element = atom.element
                    if element is None:
                        element = self._guess_element(atomName, residue)

                    newAtom = self._topology.add_atom(atomName, element, r, serial=atom.serial_number)
                    atomByNumber[atom.serial_number] = newAtom

        # load all of the positions (from every model)
        _positions = []
        for model in pdb.iter_models(use_all_models=True):
            coords = []
            for chain in model.iter_chains():
                for residue in chain.iter_residues():
                    for atom in residue.atoms:
                        coords.append(atom.get_position())
            _positions.append(coords)

        if not all(len(f) == len(_positions[0]) for f in _positions):
            raise ValueError('PDB Error: All MODELs must contain the same number of ATOMs')

        self._positions = np.array(_positions)

        ## The atom positions read from the PDB file
        self._unitcell_lengths = pdb.get_unit_cell_lengths()
        self._unitcell_angles = pdb.get_unit_cell_angles()
        self._topology.create_standard_bonds()
        self._topology.create_disulfide_bonds(self.positions[0])

        # Add bonds based on CONECT records.
        connectBonds = []
        for connect in pdb.models[0].connects:
            i = connect[0]
            for j in connect[1:]:
                if i in atomByNumber and j in atomByNumber:
                    connectBonds.append((atomByNumber[i], atomByNumber[j]))
        if len(connectBonds) > 0:
            # Only add bonds that don't already exist.
            existingBonds = set(self._topology.bonds)
            for bond in connectBonds:
                if bond not in existingBonds and (bond[1], bond[0]) not in existingBonds:
                    self._topology.add_bond(bond[0], bond[1])
                    existingBonds.add(bond)

    @staticmethod
    def _loadNameReplacementTables():
        """Load the list of atom and residue name replacements."""
        if len(PDBTrajectoryFile._residueNameReplacements) == 0:
            tree = etree.parse(os.path.join(os.path.dirname(__file__), 'data', 'pdbNames.xml'))
            allResidues = {}
            proteinResidues = {}
            nucleicAcidResidues = {}
            for residue in tree.getroot().findall('Residue'):
                name = residue.attrib['name']
                if name == 'All':
                    PDBTrajectoryFile._parseResidueAtoms(residue, allResidues)
                elif name == 'Protein':
                    PDBTrajectoryFile._parseResidueAtoms(residue, proteinResidues)
                elif name == 'Nucleic':
                    PDBTrajectoryFile._parseResidueAtoms(residue, nucleicAcidResidues)
            for atom in allResidues:
                proteinResidues[atom] = allResidues[atom]
                nucleicAcidResidues[atom] = allResidues[atom]
            for residue in tree.getroot().findall('Residue'):
                name = residue.attrib['name']
                for id in residue.attrib:
                    if id == 'name' or id.startswith('alt'):
                        PDBTrajectoryFile._residueNameReplacements[residue.attrib[id]] = name
                if 'type' not in residue.attrib:
                    atoms = copy(allResidues)
                elif residue.attrib['type'] == 'Protein':
                    atoms = copy(proteinResidues)
                elif residue.attrib['type'] == 'Nucleic':
                    atoms = copy(nucleicAcidResidues)
                else:
                    atoms = copy(allResidues)
                PDBTrajectoryFile._parseResidueAtoms(residue, atoms)
                PDBTrajectoryFile._atomNameReplacements[name] = atoms

    def _guess_element(self, atom_name, residue):
        "Try to guess the element name"

        upper = atom_name.upper()
        if upper.startswith('CL'):
            element = elem.chlorine
        elif upper.startswith('NA'):
            element = elem.sodium
        elif upper.startswith('MG'):
            element = elem.magnesium
        elif upper.startswith('BE'):
            element = elem.beryllium
        elif upper.startswith('LI'):
            element = elem.lithium
        elif upper.startswith('K'):
            element = elem.potassium
        elif upper.startswith('ZN'):
            element = elem.zinc
        elif len(residue) == 1 and upper.startswith('CA'):
            element = elem.calcium

        # TJL has edited this. There are a few issues here. First,
        # parsing for the element is non-trivial, so I do my best
        # below. Second, there is additional parsing code in
        # pdbstructure.py, and I am unsure why it doesn't get used
        # here...
        elif len(residue) > 1 and upper.startswith('CE'):
            element = elem.carbon  # (probably) not Celenium...
        elif len(residue) > 1 and upper.startswith('CD'):
            element = elem.carbon  # (probably) not Cadmium...
        elif residue.name in ['TRP', 'ARG', 'GLN', 'HIS'] and upper.startswith('NE'):
            element = elem.nitrogen  # (probably) not Neon...
        elif residue.name in ['ASN'] and upper.startswith('ND'):
            element = elem.nitrogen  # (probably) not ND...
        elif residue.name == 'CYS' and upper.startswith('SG'):
            element = elem.sulfur  # (probably) not SG...
        else:
            try:
                element = elem.get_by_symbol(atom_name[0])
            except KeyError:
                try:
                    symbol = atom_name[0:2].strip().rstrip("AB0123456789").lstrip("0123456789")
                    element = elem.get_by_symbol(symbol)
                except KeyError:
                    element = None

        return element

    @staticmethod
    def _parseResidueAtoms(residue, map):
        for atom in residue.findall('Atom'):
            name = atom.attrib['name']
            for id in atom.attrib:
                map[atom.attrib[id]] = name

    def __del__(self):
        self.close()

    def __enter__(self):
        return self

    def __exit__(self, *exc_info):
        self.close()

    def __len__(self):
        "Number of frames in the file"
        if str(self._mode) != 'r':
            raise NotImplementedError('len() only available in mode="r" currently')
        if not self._open:
            raise ValueError('I/O operation on closed file')
        return len(self._positions)
Beispiel #28
0
class PDBTrajectoryFile(object):
    """Interface for reading and writing Protein Data Bank (PDB) files

    Parameters
    ----------
    filename : str
        The filename to open. A path to a file on disk.
    mode : {'r', 'w'}
        The mode in which to open the file, either 'r' for read or 'w' for write.
    force_overwrite : bool
        If opened in write mode, and a file by the name of `filename` already
        exists on disk, should we overwrite it?

    Attributes
    ----------
    positions : np.ndarray, shape=(n_frames, n_atoms, 3)
    topology : mdtraj.Topology
    closed : bool

    Notes
    -----
    When writing pdb files, mdtraj follows the PDB3.0 standard as closely as
    possible. During *reading* however, we try to be more lenient. For instance,
    we will parse common nonstandard atom names during reading, and convert them
    into the standard names. The replacement table used by mdtraj is at
    {mdtraj_source}/formats/pdb/data/pdbNames.xml.

    See Also
    --------
    mdtraj.load_pdb : High-level wrapper that returns a ``md.Trajectory``
    """
    distance_unit = 'angstroms'
    _residueNameReplacements = {}
    _atomNameReplacements = {}
    _chain_names = [chr(ord('A') + i) for i in range(26)]

    def __init__(self, filename, mode='r', force_overwrite=True):
        self._open = False
        self._file = None
        self._topology = None
        self._positions = None
        self._mode = mode
        self._last_topology = None

        if mode == 'r':
            PDBTrajectoryFile._loadNameReplacementTables()

            if _is_url(filename):
                self._file = urlopen(filename)
                if filename.lower().endswith('.gz'):
                    if six.PY3:
                        self._file = gzip.GzipFile(fileobj=self._file)
                    else:
                        self._file = gzip.GzipFile(fileobj=six.StringIO(
                            self._file.read()))
                if six.PY3:
                    self._file = six.StringIO(self._file.read().decode('utf-8'))
            else:
                self._file = open_maybe_zipped(filename, 'r')

            self._read_models()
        elif mode == 'w':
            self._header_written = False
            self._footer_written = False
            self._file = open_maybe_zipped(filename, 'w', force_overwrite)
        else:
            raise ValueError("invalid mode: %s" % mode)

        self._open = True

    def write(self, positions, topology, modelIndex=None, unitcell_lengths=None, 
              unitcell_angles=None, bfactors=None):
        """Write a PDB file to disk

        Parameters
        ----------
        positions : array_like
            The list of atomic positions to write.
        topology : mdtraj.Topology
            The Topology defining the model to write.
        modelIndex : {int, None}
            If not None, the model will be surrounded by MODEL/ENDMDL records
            with this index
        unitcell_lengths : {tuple, None}
            Lengths of the three unit cell vectors, or None for a non-periodic system
        unitcell_angles : {tuple, None}
            Angles between the three unit cell vectors, or None for a non-periodic system
        bfactors : array_like, default=None, shape=(n_atoms,)
            Save bfactors with pdb file. Should contain a single number for
            each atom in the topology
        """
        if not self._mode == 'w':
            raise ValueError('file not opened for writing')
        if not self._header_written:
            self._write_header(unitcell_lengths, unitcell_angles)
            self._header_written = True

        if ilen(topology.atoms) != len(positions):
            raise ValueError('The number of positions must match the number of atoms')
        if np.any(np.isnan(positions)):
            raise ValueError('Particle position is NaN')
        if np.any(np.isinf(positions)):
            raise ValueError('Particle position is infinite')
        
        self._last_topology = topology  # Hack to save the topology of the last frame written, allows us to output CONECT entries in write_footer()

        if bfactors is None:
            bfactors = ['{0:5.2f}'.format(0.0)] * len(positions)
        else:
            if (np.max(bfactors) >= 100) or (np.min(bfactors) <= -10):
                raise ValueError("bfactors must be in (-10, 100)")

            bfactors = ['{0:5.2f}'.format(b) for b in bfactors]
        
        atomIndex = 1
        posIndex = 0
        if modelIndex is not None:
            print("MODEL     %4d" % modelIndex, file=self._file)
        for (chainIndex, chain) in enumerate(topology.chains):
            chainName = self._chain_names[chainIndex % len(self._chain_names)]
            residues = list(chain.residues)
            for (resIndex, res) in enumerate(residues):
                if len(res.name) > 3:
                    resName = res.name[:3]
                else:
                    resName = res.name
                for atom in res.atoms:
                    if len(atom.name) < 4 and atom.name[:1].isalpha() and (atom.element is None or len(atom.element.symbol) < 2):
                        atomName = ' '+atom.name
                    elif len(atom.name) > 4:
                        atomName = atom.name[:4]
                    else:
                        atomName = atom.name
                    coords = positions[posIndex]
                    if atom.element is not None:
                        symbol = atom.element.symbol
                    else:
                        symbol = ' '
                    line = "ATOM  %5d %-4s %3s %s%4d    %s%s%s  1.00 %s          %2s  " % (
                        atomIndex % 100000, atomName, resName, chainName,
                        (res.resSeq) % 10000, _format_83(coords[0]),
                        _format_83(coords[1]), _format_83(coords[2]),
                        bfactors[posIndex], symbol)
                    assert len(line) == 80, 'Fixed width overflow detected'
                    print(line, file=self._file)
                    posIndex += 1
                    atomIndex += 1
                if resIndex == len(residues)-1:
                    print("TER   %5d      %3s %s%4d" % (atomIndex, resName, chainName, res.resSeq), file=self._file)
                    atomIndex += 1

        if modelIndex is not None:
            print("ENDMDL", file=self._file)

    def _write_header(self, unitcell_lengths, unitcell_angles, write_metadata=True):
        """Write out the header for a PDB file.

        Parameters
        ----------
        unitcell_lengths : {tuple, None}
            The lengths of the three unitcell vectors, ``a``, ``b``, ``c``
        unitcell_angles : {tuple, None}
            The angles between the three unitcell vectors, ``alpha``,
            ``beta``, ``gamma``
        """
        if not self._mode == 'w':
            raise ValueError('file not opened for writing')

        if unitcell_lengths is None and unitcell_angles is None:
            return
        if unitcell_lengths is not None and unitcell_angles is not None:
            if not len(unitcell_lengths) == 3:
                raise ValueError('unitcell_lengths must be length 3')
            if not len(unitcell_angles) == 3:
                raise ValueError('unitcell_angles must be length 3')
        else:
            raise ValueError('either unitcell_lengths and unitcell_angles'
                             'should both be spefied, or neither')

        box = list(unitcell_lengths) + list(unitcell_angles)
        assert len(box) == 6

        if write_metadata:
            print("REMARK   1 CREATED WITH MDTraj %s, %s" % (version.version, str(date.today())), file=self._file)
        print("CRYST1%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f P 1           1 " % tuple(box), file=self._file)

    def _write_footer(self):
        if not self._mode == 'w':
            raise ValueError('file not opened for writing')

        # Identify bonds that should be listed as CONECT records.
        standardResidues = ['ALA', 'ASN', 'CYS', 'GLU', 'HIS', 'LEU', 'MET', 'PRO', 'THR', 'TYR',
                            'ARG', 'ASP', 'GLN', 'GLY', 'ILE', 'LYS', 'PHE', 'SER', 'TRP', 'VAL',
                            'A', 'G', 'C', 'U', 'I', 'DA', 'DG', 'DC', 'DT', 'DI', 'HOH']
        conectBonds = []
        if self._last_topology is not None:
            for atom1, atom2 in self._last_topology.bonds:
                if atom1.residue.name not in standardResidues or atom2.residue.name not in standardResidues:
                    conectBonds.append((atom1, atom2))
                elif atom1.name == 'SG' and atom2.name == 'SG' and atom1.residue.name == 'CYS' and atom2.residue.name == 'CYS':
                    conectBonds.append((atom1, atom2))
        if len(conectBonds) > 0:
            
            # Work out the index used in the PDB file for each atom.
            
            atomIndex = {}
            nextAtomIndex = 0
            prevChain = None
            for chain in self._last_topology.chains:
                for atom in chain.atoms:
                    if atom.residue.chain != prevChain:
                        nextAtomIndex += 1
                        prevChain = atom.residue.chain
                    atomIndex[atom] = nextAtomIndex
                    nextAtomIndex += 1
            
            # Record which other atoms each atom is bonded to.
            
            atomBonds = {}
            for atom1, atom2 in conectBonds:
                index1 = atomIndex[atom1]
                index2 = atomIndex[atom2]
                if index1 not in atomBonds:
                    atomBonds[index1] = []
                if index2 not in atomBonds:
                    atomBonds[index2] = []
                atomBonds[index1].append(index2)
                atomBonds[index2].append(index1)
            
            # Write the CONECT records.
            
            for index1 in sorted(atomBonds):
                bonded = atomBonds[index1]
                while len(bonded) > 4:
                    print("CONECT%5d%5d%5d%5d" % (index1, bonded[0], bonded[1], bonded[2]), file=self._file)
                    del bonded[:4]
                line = "CONECT%5d" % index1
                for index2 in bonded:
                    line = "%s%5d" % (line, index2)
                print(line, file=self._file)
        print("END", file=self._file)
        self._footer_written = True

    @classmethod
    def set_chain_names(cls, values):
        """Set the cycle of chain names used when writing PDB files

        When writing PDB files, PDBTrajectoryFile translates each chain's
        index into a name -- the name is what's written in the file. By
        default, chains are named with the letters A-Z.

        Parameters
        ----------
        values : list
            A list of chacters (strings of length 1) that the PDB writer will
            cycle through to choose chain names.
        """
        for item in values:
            if not isinstance(item, six.string_types) and len(item) == 1:
                raise TypeError('Names must be a single character string')
        cls._chain_names = values

    @property
    def positions(self):
        """The cartesian coordinates of all of the atoms in each frame. Available when a file is opened in mode='r'
        """
        return self._positions

    @property
    def topology(self):
        """The topology from this PDB file. Available when a file is opened in mode='r'
        """
        return self._topology

    @property
    def unitcell_lengths(self):
        "The unitcell lengths (3-tuple) in this PDB file. May be None"
        return self._unitcell_lengths

    @property
    def unitcell_angles(self):
        "The unitcell angles (3-tuple) in this PDB file. May be None"
        return self._unitcell_angles

    @property
    def closed(self):
        "Whether the file is closed"
        return not self._open

    def close(self):
        "Close the PDB file"
        if self._mode == 'w' and not self._footer_written:
            self._write_footer()
        if self._open:
            self._file.close()
        self._open = False

    def _read_models(self):
        if not self._mode == 'r':
            raise ValueError('file not opened for reading')

        self._topology = Topology()

        pdb = PdbStructure(self._file, load_all_models=True)

        atomByNumber = {}
        for chain in pdb.iter_chains():
            c = self._topology.add_chain()
            for residue in chain.iter_residues():
                resName = residue.get_name()
                if resName in PDBTrajectoryFile._residueNameReplacements:
                    resName = PDBTrajectoryFile._residueNameReplacements[resName]
                r = self._topology.add_residue(resName, c, residue.number)
                r.segment_id = residue.segment_id
                if resName in PDBTrajectoryFile._atomNameReplacements:
                    atomReplacements = PDBTrajectoryFile._atomNameReplacements[resName]
                else:
                    atomReplacements = {}
                for atom in residue.atoms:
                    atomName = atom.get_name()
                    if atomName in atomReplacements:
                        atomName = atomReplacements[atomName]
                    atomName = atomName.strip()
                    element = atom.element
                    if element is None:
                        element = self._guess_element(atomName, residue)

                    newAtom = self._topology.add_atom(atomName, element, r, serial=atom.serial_number)
                    atomByNumber[atom.serial_number] = newAtom

        # load all of the positions (from every model)
        _positions = []
        for model in pdb.iter_models(use_all_models=True):
            coords = []
            for chain in model.iter_chains():
                for residue in chain.iter_residues():
                    for atom in residue.atoms:
                        coords.append(atom.get_position())
            _positions.append(coords)

        if not all(len(f) == len(_positions[0]) for f in _positions):
            raise ValueError('PDB Error: All MODELs must contain the same number of ATOMs')

        self._positions = np.array(_positions)

        ## The atom positions read from the PDB file
        self._unitcell_lengths = pdb.get_unit_cell_lengths()
        self._unitcell_angles = pdb.get_unit_cell_angles()
        self._topology.create_standard_bonds()
        self._topology.create_disulfide_bonds(self.positions[0])

        # Add bonds based on CONECT records.
        connectBonds = []
        for connect in pdb.models[-1].connects:
            i = connect[0]
            for j in connect[1:]:
                if i in atomByNumber and j in atomByNumber:
                    connectBonds.append((atomByNumber[i], atomByNumber[j]))
        if len(connectBonds) > 0:
            # Only add bonds that don't already exist.
            existingBonds = set(self._topology.bonds)
            for bond in connectBonds:
                if bond not in existingBonds and (bond[1], bond[0]) not in existingBonds:
                    self._topology.add_bond(bond[0], bond[1])
                    existingBonds.add(bond)

    @staticmethod
    def _loadNameReplacementTables():
        """Load the list of atom and residue name replacements."""
        if len(PDBTrajectoryFile._residueNameReplacements) == 0:
            tree = etree.parse(os.path.join(os.path.dirname(__file__), 'data', 'pdbNames.xml'))
            allResidues = {}
            proteinResidues = {}
            nucleicAcidResidues = {}
            for residue in tree.getroot().findall('Residue'):
                name = residue.attrib['name']
                if name == 'All':
                    PDBTrajectoryFile._parseResidueAtoms(residue, allResidues)
                elif name == 'Protein':
                    PDBTrajectoryFile._parseResidueAtoms(residue, proteinResidues)
                elif name == 'Nucleic':
                    PDBTrajectoryFile._parseResidueAtoms(residue, nucleicAcidResidues)
            for atom in allResidues:
                proteinResidues[atom] = allResidues[atom]
                nucleicAcidResidues[atom] = allResidues[atom]
            for residue in tree.getroot().findall('Residue'):
                name = residue.attrib['name']
                for id in residue.attrib:
                    if id == 'name' or id.startswith('alt'):
                        PDBTrajectoryFile._residueNameReplacements[residue.attrib[id]] = name
                if 'type' not in residue.attrib:
                    atoms = copy(allResidues)
                elif residue.attrib['type'] == 'Protein':
                    atoms = copy(proteinResidues)
                elif residue.attrib['type'] == 'Nucleic':
                    atoms = copy(nucleicAcidResidues)
                else:
                    atoms = copy(allResidues)
                PDBTrajectoryFile._parseResidueAtoms(residue, atoms)
                PDBTrajectoryFile._atomNameReplacements[name] = atoms

    def _guess_element(self, atom_name, residue):
        "Try to guess the element name"

        upper = atom_name.upper()
        if upper.startswith('CL'):
            element = elem.chlorine
        elif upper.startswith('NA'):
            element = elem.sodium
        elif upper.startswith('MG'):
            element = elem.magnesium
        elif upper.startswith('BE'):
            element = elem.beryllium
        elif upper.startswith('LI'):
            element = elem.lithium
        elif upper.startswith('K'):
            element = elem.potassium
        elif upper.startswith('ZN'):
            element = elem.zinc
        elif len(residue) == 1 and upper.startswith('CA'):
            element = elem.calcium

        # TJL has edited this. There are a few issues here. First,
        # parsing for the element is non-trivial, so I do my best
        # below. Second, there is additional parsing code in
        # pdbstructure.py, and I am unsure why it doesn't get used
        # here...
        elif len(residue) > 1 and upper.startswith('CE'):
            element = elem.carbon  # (probably) not Celenium...
        elif len(residue) > 1 and upper.startswith('CD'):
            element = elem.carbon  # (probably) not Cadmium...
        elif residue.name in ['TRP', 'ARG', 'GLN', 'HIS'] and upper.startswith('NE'):
            element = elem.nitrogen  # (probably) not Neon...
        elif residue.name in ['ASN'] and upper.startswith('ND'):
            element = elem.nitrogen  # (probably) not ND...
        elif residue.name == 'CYS' and upper.startswith('SG'):
            element = elem.sulfur  # (probably) not SG...
        else:
            try:
                element = elem.get_by_symbol(atom_name[0])
            except KeyError:
                try:
                    symbol = atom_name[0:2].strip().rstrip("AB0123456789").lstrip("0123456789")
                    element = elem.get_by_symbol(symbol)
                except KeyError:
                    element = None

        return element

    @staticmethod
    def _parseResidueAtoms(residue, map):
        for atom in residue.findall('Atom'):
            name = atom.attrib['name']
            for id in atom.attrib:
                map[atom.attrib[id]] = name

    def __del__(self):
        self.close()

    def __enter__(self):
        return self

    def __exit__(self, *exc_info):
        self.close()

    def __len__(self):
        "Number of frames in the file"
        if str(self._mode) != 'r':
            raise NotImplementedError('len() only available in mode="r" currently')
        if not self._open:
            raise ValueError('I/O operation on closed file')
        return len(self._positions)
Beispiel #29
0
    def _read_models(self):
        if not self._mode == 'r':
            raise ValueError('file not opened for reading')

        self._topology = Topology()

        pdb = PdbStructure(self._file, load_all_models=True)

        atomByNumber = {}
        for chain in pdb.iter_chains():
            c = self._topology.add_chain()
            for residue in chain.iter_residues():
                resName = residue.get_name()
                if resName in PDBTrajectoryFile._residueNameReplacements:
                    resName = PDBTrajectoryFile._residueNameReplacements[resName]
                r = self._topology.add_residue(resName, c, residue.number)
                r.segment_id = residue.segment_id
                if resName in PDBTrajectoryFile._atomNameReplacements:
                    atomReplacements = PDBTrajectoryFile._atomNameReplacements[resName]
                else:
                    atomReplacements = {}
                for atom in residue.atoms:
                    atomName = atom.get_name()
                    if atomName in atomReplacements:
                        atomName = atomReplacements[atomName]
                    atomName = atomName.strip()
                    element = atom.element
                    if element is None:
                        element = self._guess_element(atomName, residue)

                    newAtom = self._topology.add_atom(atomName, element, r, serial=atom.serial_number)
                    atomByNumber[atom.serial_number] = newAtom

        # load all of the positions (from every model)
        _positions = []
        for model in pdb.iter_models(use_all_models=True):
            coords = []
            for chain in model.iter_chains():
                for residue in chain.iter_residues():
                    for atom in residue.atoms:
                        coords.append(atom.get_position())
            _positions.append(coords)

        if not all(len(f) == len(_positions[0]) for f in _positions):
            raise ValueError('PDB Error: All MODELs must contain the same number of ATOMs')

        self._positions = np.array(_positions)

        ## The atom positions read from the PDB file
        self._unitcell_lengths = pdb.get_unit_cell_lengths()
        self._unitcell_angles = pdb.get_unit_cell_angles()
        self._topology.create_standard_bonds()
        self._topology.create_disulfide_bonds(self.positions[0])

        # Add bonds based on CONECT records.
        connectBonds = []
        for connect in pdb.models[-1].connects:
            i = connect[0]
            for j in connect[1:]:
                if i in atomByNumber and j in atomByNumber:
                    connectBonds.append((atomByNumber[i], atomByNumber[j]))
        if len(connectBonds) > 0:
            # Only add bonds that don't already exist.
            existingBonds = set(self._topology.bonds)
            for bond in connectBonds:
                if bond not in existingBonds and (bond[1], bond[0]) not in existingBonds:
                    self._topology.add_bond(bond[0], bond[1])
                    existingBonds.add(bond)
Beispiel #30
0
def load_mol2(filename):
    """Load a TRIPOS mol2 file from disk.

    Parameters
    ----------
    filename : str
        Path to the prmtop file on disk.

    Returns
    -------
    traj : md.Trajectory
        The resulting topology, as an md.Topology object.

    Notes
    -----
    This function should work on GAFF and sybyl style MOL2 files, but has
    been primarily tested on GAFF mol2 files.
    This function does NOT accept multi-structure MOL2 files!!!
    The elements are guessed using GAFF atom types or via the atype string.

    Examples
    --------
    >>> traj = md.load_mol2('mysystem.mol2')
    """
    from mdtraj.core.trajectory import Trajectory
    from mdtraj.core.topology import Topology, Single, Double, Triple, Aromatic, Amide

    atoms, bonds = mol2_to_dataframes(filename)

    atoms_mdtraj = atoms[["name", "resName"]].copy()
    atoms_mdtraj["serial"] = atoms.index

    #Figure out 1 letter element names

    # IF this is a GAFF mol2, this line should work without issues
    atoms_mdtraj["element"] = atoms.atype.map(gaff_elements)
    # If this is a sybyl mol2, there should be NAN (null) values
    if atoms_mdtraj.element.isnull().any():
        # If this is a sybyl mol2, I think this works generally.
        # Argument x is being passed as a list with only one element.
        def to_element(x):
            if isinstance(x, (list, tuple)):
                assert len(x) == 1
                x = x[0]

            if '.' in x:  # orbital-hybridizations in SYBL
                return x.split('.')[0]
            try:
                # check if we can convert the whole str to an Element,
                # if not, we only pass the first letter.
                from mdtraj.core.element import Element
                Element.getBySymbol(x)
            except KeyError:
                return to_element(x[0])
            return x
        atoms_mdtraj["element"] = atoms.atype.apply(to_element)

    atoms_mdtraj["resSeq"] = np.ones(len(atoms), 'int')
    atoms_mdtraj["chainID"] = np.ones(len(atoms), 'int')

    bond_type_map = {
        '1': Single,
        '2': Double,
        '3': Triple,
        'am': Amide,
        'ar': Aromatic
    }
    if bonds is not None:
        bonds_mdtraj = bonds[["id0", "id1"]].values
        offset = bonds_mdtraj.min()  # Should this just be 1???
        bonds_mdtraj -= offset
        # Create the bond augment information
        n_bonds = bonds_mdtraj.shape[0]
        bond_augment = np.zeros([n_bonds, 2], dtype=float)
        # Add bond type information
        bond_augment[:, 0] = [float(bond_type_map[str(bond_value)]) for bond_value in bonds["bond_type"].values]
        # Add Bond "order" information, this is not known from Mol2 files
        bond_augment[:, 1] = [0.0 for _ in range(n_bonds)]
        # Augment array, dtype is cast to minimal representation of float
        bonds_mdtraj = np.append(bonds_mdtraj, bond_augment, axis=-1)
    else:
        bonds_mdtraj = None

    top = Topology.from_dataframe(atoms_mdtraj, bonds_mdtraj)

    xyzlist = np.array([atoms[["x", "y", "z"]].values])
    xyzlist /= 10.0  # Convert from angstrom to nanometer

    traj = Trajectory(xyzlist, top)

    return traj
Beispiel #31
0
    def _read_models(self):
        if not self._mode == 'r':
            raise ValueError('file not opened for reading')

        self._topology = Topology()

        pdb = PdbStructure(self._file, load_all_models=True)

        atomByNumber = {}
        for chain in pdb.iter_chains():
            c = self._topology.add_chain()
            for residue in chain.iter_residues():
                resName = residue.get_name()
                if resName in PDBTrajectoryFile._residueNameReplacements:
                    resName = PDBTrajectoryFile._residueNameReplacements[resName]
                r = self._topology.add_residue(resName, c, residue.number)
                if resName in PDBTrajectoryFile._atomNameReplacements:
                    atomReplacements = PDBTrajectoryFile._atomNameReplacements[resName]
                else:
                    atomReplacements = {}
                for atom in residue.atoms:
                    atomName = atom.get_name()
                    if atomName in atomReplacements:
                        atomName = atomReplacements[atomName]
                    atomName = atomName.strip()
                    element = atom.element
                    if element is None:
                        element = self._guess_element(atomName, residue)

                    newAtom = self._topology.add_atom(atomName, element, r, serial=atom.serial_number)
                    atomByNumber[atom.serial_number] = newAtom

        # load all of the positions (from every model)
        _positions = []
        for model in pdb.iter_models(use_all_models=True):
            coords = []
            for chain in model.iter_chains():
                for residue in chain.iter_residues():
                    for atom in residue.atoms:
                        coords.append(atom.get_position())
            _positions.append(coords)

        if not all(len(f) == len(_positions[0]) for f in _positions):
            raise ValueError('PDB Error: All MODELs must contain the same number of ATOMs')

        self._positions = np.array(_positions)

        ## The atom positions read from the PDB file
        self._unitcell_lengths = pdb.get_unit_cell_lengths()
        self._unitcell_angles = pdb.get_unit_cell_angles()
        self._topology.create_standard_bonds()
        self._topology.create_disulfide_bonds(self.positions[0])

        # Add bonds based on CONECT records.
        connectBonds = []
        for connect in pdb.models[0].connects:
            i = connect[0]
            for j in connect[1:]:
                if i in atomByNumber and j in atomByNumber:
                    connectBonds.append((atomByNumber[i], atomByNumber[j]))
        if len(connectBonds) > 0:
            # Only add bonds that don't already exist.
            existingBonds = set(self._topology.bonds)
            for bond in connectBonds:
                if bond not in existingBonds and (bond[1], bond[0]) not in existingBonds:
                    self._topology.add_bond(bond[0], bond[1])
                    existingBonds.add(bond)
Beispiel #32
0
def load_hoomdxml(filename, top=None):
    """Load a single conformation from an HOOMD-Blue XML file.

    For more information on this file format, see:
    http://codeblue.umich.edu/hoomd-blue/doc/page_xml_file_format.html
    Notably, all node names and attributes are in all lower case.
    HOOMD-Blue does not contain residue and chain information explicitly. 
    For this reason, chains will be found by looping over all the bonds and 
    finding what is bonded to what. 
    Each chain consisists of exactly one residue. 

    Parameters
    ----------
    filename : string
        The path on disk to the XML file

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object, with corresponding 
        Topology.

    Notes
    -----
    This function requires the NetworkX python package.
    """
    from mdtraj.core.trajectory import Trajectory
    from mdtraj.core.topology import Topology
    topology = Topology()
    tree = cElementTree.parse(filename)
    config = tree.getroot().find('configuration')
    position = config.find('position')
    bond = config.find('bond')
    atom_type = config.find('type')  # MDTraj calls this "name"

    box = config.find('box')
    box.attrib = dict((key.lower(), val) for key, val in box.attrib.items())
    # be generous for case of box attributes
    lx = float(box.attrib['lx'])
    ly = float(box.attrib['ly'])
    lz = float(box.attrib['lz'])
    try:
        xy = float(box.attrib['xy'])
        xz = float(box.attrib['xz'])
        yz = float(box.attrib['yz'])
    except:
        xy = 0.0
        xz = 0.0
        yz = 0.0
    unitcell_vectors = np.array([[[lx,  xy*ly, xz*lz],
                                  [0.0, ly,    yz*lz],
                                  [0.0, 0.0,   lz   ]]])

    positions, types = [], {}
    for pos in position.text.splitlines()[1:]:
        positions.append((float(pos.split()[0]),
                          float(pos.split()[1]),
                          float(pos.split()[2])))

    for idx, atom_name in enumerate(atom_type.text.splitlines()[1:]):
        types[idx] = str(atom_name.split()[0])
    if len(types) != len(positions):
        raise ValueError('Different number of types and positions in xml file')

    # ignore the bond type
    bonds = [(int(b.split()[1]), int(b.split()[2])) for b in bond.text.splitlines()[1:]]
    chains = _find_chains(bonds)
    ions = [i for i in range(len(types)) if not _in_chain(chains, i)]

    # add chains, bonds and ions (each chain = 1 residue)
    for chain in chains:
        t_chain = topology.add_chain()
        t_residue = topology.add_residue('A', t_chain)
        for atom in chain:
            topology.add_atom(types[atom], 'U', t_residue)
    for ion in ions:
        t_chain = topology.add_chain()
        t_residue = topology.add_residue('A', t_chain)
        topology.add_atom(types[atom], 'U', t_residue)
    for bond in bonds:
        atom1, atom2 = bond[0], bond[1]
        topology.add_bond(topology.atom(atom1), topology.atom(atom2))

    traj = Trajectory(xyz=np.array(positions), topology=topology)
    traj.unitcell_vectors = unitcell_vectors

    return traj
Beispiel #33
0
    def topology(self, topology_object):
        """Set the topology in the file

        Parameters
        ----------
        topology_object : mdtraj.Topology
            A topology object
        """

        # we want to be able to handle the simtk.openmm Topology object
        # here too, so if it's not an mdtraj topology we'll just guess
        # that it's probably an openmm topology and convert
        if not isinstance(topology_object, Topology):
            topology_object = Topology.from_openmm(topology_object)

        try:
            topology_dict = {"chains": [], "bonds": []}

            for chain in topology_object.chains:
                chain_dict = {"residues": [], "index": int(chain.index)}
                for residue in chain.residues:
                    residue_dict = {
                        "index": int(residue.index),
                        "name": str(residue.name),
                        "atoms": [],
                        "resSeq": int(residue.resSeq),
                    }

                    for atom in residue.atoms:

                        try:
                            element_symbol_string = str(atom.element.symbol)
                        except AttributeError:
                            element_symbol_string = ""

                        residue_dict["atoms"].append(
                            {"index": int(atom.index), "name": str(atom.name), "element": element_symbol_string}
                        )
                    chain_dict["residues"].append(residue_dict)
                topology_dict["chains"].append(chain_dict)

            for atom1, atom2 in topology_object.bonds:
                topology_dict["bonds"].append([int(atom1.index), int(atom2.index)])

        except AttributeError as e:
            raise AttributeError(
                "topology_object fails to implement the"
                "chains() -> residue() -> atoms() and bond() protocol. "
                "Specifically, we encountered the following %s" % e
            )

        # actually set the tables
        try:
            self._remove_node(where="/", name="topology")
        except self.tables.NoSuchNodeError:
            pass

        data = json.dumps(topology_dict)
        if not isinstance(data, bytes):
            data = data.encode("ascii")

        if self.tables.__version__ >= "3.0.0":
            self._handle.create_array(where="/", name="topology", obj=[data])
        else:
            self._handle.createArray(where="/", name="topology", object=[data])
Beispiel #34
0
    def __init__(self, topology, use_chains=None):
        if use_chains is None:
            use_chains = range(len(topology._chains))

        self._ref_topology = topology.copy()

        # Build new topology
        newTopology = Topology()
        new_atm_idx = 0
        res_idx = 1
        prev_ca = None
        ca_idxs = []
        self._sidechain_idxs = []
        self._sidechain_mass = []
        self._chain_indices = []
        for chain_count, chain in enumerate(topology._chains):
            if chain_count in use_chains:
                newChain = newTopology.add_chain()
                for residue in chain._residues:
                    #resSeq = getattr(residue, 'resSeq', None) or residue.index
                    newResidue = newTopology.add_residue(residue.name, newChain, res_idx)
                    # map CA
                    new_ca = newTopology.add_atom('CA', md.core.element.get_by_symbol('C'),
                                        newResidue, serial=new_atm_idx)
                    self._chain_indices.append(chain_count)
                    if prev_ca is None:
                        prev_ca = new_ca
                    else:
                        # only bond atoms in the same chain.
                        if new_ca.residue.chain.index == prev_ca.residue.chain.index:
                            newTopology.add_bond(prev_ca, new_ca)
                        prev_ca = new_ca
                    try:
                        ca_idxs.append([[ atm.index for atm in residue.atoms if \
                                (atm.name == "CA") ][0], new_atm_idx ])
                    except:
                        print(residue)
                        print(chain)
                        for atm in residue.atoms:
                            atm.name
                        raise
                    new_atm_idx += 1

                    if residue.name == 'GLY':
                        self._sidechain_idxs.append([])
                        self._sidechain_mass.append([])
                    else:
                        # map CB
                        cb_name = "CB%s" % atom_types.residue_code[residue.name]
                        new_cb = newTopology.add_atom(cb_name, md.core.element.get_by_symbol('C'),
                                            newResidue, serial=new_atm_idx)
                        self._chain_indices.append(chain_count)

                        newTopology.add_bond(new_cb, new_ca)

                        self._sidechain_idxs.append([[ atm.index for atm in residue.atoms if \
                                    (atm.is_sidechain) and (atm.element.symbol != "H") ], new_atm_idx ])
                        self._sidechain_mass.append(np.array([ atm.element.mass for atm in residue.atoms if \
                                    (atm.is_sidechain) and (atm.element.symbol != "H") ]))
                        new_atm_idx += 1
                    res_idx += 1

        self._ca_idxs = np.array(ca_idxs)
        self.topology = newTopology
        assert self.topology.n_atoms == len(self._chain_indices)