Beispiel #1
0
def load_gsd_topology(filename, frame=0):
    """ Create an MDTraj.Topology from a GSD file 
    
    Parameters
    ----------
    filename : path-like
        Path of GSD trajectory file.
    frame : int, 0 
        Frame of GSD file to parse topology

    Returns
    -------
    top : mdtraj.Topology

    Notes
    -----
    GSD files support systems with variable topologies.
    For compatibility with MDTraj, only the topology from GSD frame 0 is
    used to construct the MDTraj topology.
    """
    import gsd.hoomd
    with gsd.hoomd.open(filename, 'rb') as gsdfile:
        top = Topology()
        generic_chain = top.add_chain()
        generic_residue = top.add_residue('A', generic_chain)
        all_particle_types = gsdfile[frame].particles.types
        for particle_type_id in gsdfile[frame].particles.typeid:
            top.add_atom(all_particle_types[particle_type_id], virtual_site,
                         generic_residue)

        for bond in gsdfile[frame].bonds.group:
            atom1, atom2 = bond[0], bond[1]
            top.add_bond(top.atom(atom1), top.atom(atom2))

    return top
Beispiel #2
0
    def __init__(self, topology):
        r"""Calpha representation mapping

        Maps an all-atom representation to just the C-alpha's of the backbone.

        Holds default assignment of .

        Parameters
        ----------
        topology : mdtraj.Topology object

        """

        n_calphas = len(
            [atm.index for atm in topology.atoms if atm.name == "CA"])
        assert (
            n_calphas == topology.n_residues
        ), " number of C-alpha is not equal to number of residues! check for missing or non-standard amino acids."

        self._ref_topology = topology.copy()

        # Build new topology
        newTopology = Topology()
        prev_ca = None
        ca_idxs = []
        atm_idx = 0
        for chain in topology._chains:
            newChain = newTopology.add_chain()
            for residue in chain._residues:
                resSeq = getattr(residue, 'resSeq', None) or residue.index
                newResidue = newTopology.add_residue(residue.name, newChain,
                                                     resSeq)
                # map CA
                new_ca = newTopology.add_atom(
                    'CA',
                    md.core.element.get_by_symbol('C'),
                    newResidue,
                    serial=atm_idx)

                ca_idxs.append([[ atm.index for atm in residue.atoms if \
                            (atm.name == "CA") ][0], atm_idx ])
                if prev_ca is None:
                    prev_ca = new_ca
                else:
                    if prev_ca.residue.chain.index == new_ca.residue.chain.index:
                        # Only bond atoms in same chain
                        newTopology.add_bond(prev_ca, new_ca)
                    prev_ca = new_ca
                atm_idx += 1

        self._ca_idxs = np.array(ca_idxs)
        self.topology = newTopology
Beispiel #3
0
    def topology(self):
        """Get the topology out from the file

        Returns
        -------
        topology : mdtraj.Topology
            A topology object
        """
        try:
            raw = self._get_node(self._handle.root, name='topology')[0]
            if not isinstance(raw, string_types):
                raw = raw.decode()
            topology_dict = json.loads(raw)
        except self.tables.NoSuchNodeError:
            return None

        topology = Topology()

        for chain_dict in sorted(topology_dict['chains'],
                                 key=operator.itemgetter('index')):
            chain = topology.add_chain()
            for residue_dict in sorted(chain_dict['residues'],
                                       key=operator.itemgetter('index')):
                try:
                    resSeq = residue_dict["resSeq"]
                except KeyError:
                    resSeq = None
                    warnings.warn(
                        'No resSeq information found in HDF file, defaulting to zero-based indices'
                    )
                try:
                    segment_id = residue_dict["segmentID"]
                except KeyError:
                    segment_id = ""
                residue = topology.add_residue(residue_dict['name'],
                                               chain,
                                               resSeq=resSeq,
                                               segment_id=segment_id)
                for atom_dict in sorted(residue_dict['atoms'],
                                        key=operator.itemgetter('index')):
                    try:
                        element = elem.get_by_symbol(atom_dict['element'])
                    except KeyError:
                        element = elem.virtual
                    topology.add_atom(atom_dict['name'], element, residue)

        atoms = list(topology.atoms)
        for index1, index2 in topology_dict['bonds']:
            topology.add_bond(atoms[index1], atoms[index2])

        return topology
Beispiel #4
0
def create_water_topology_on_disc(n):
    topfile = tempfile.mktemp('.pdb')
    top = Topology()
    chain = top.add_chain()

    for i in range(n):
        res = top.add_residue('r%i' % i, chain)
        h1 = top.add_atom('H', hydrogen, res)
        o = top.add_atom('O', oxygen, res)
        h2 = top.add_atom('H', hydrogen, res)
        top.add_bond(h1, o)
        top.add_bond(h2, o)

    xyz = np.zeros((n * 3, 3))
    Trajectory(xyz, top).save_pdb(topfile)
    return topfile
Beispiel #5
0
    def _read_models(self):
        if not self._mode == 'r':
            raise ValueError('file not opened for reading')

        self._topology = Topology()

        pdb = PdbStructure(self._file, load_all_models=True)

        atomByNumber = {}
        for chain in pdb.iter_chains():
            c = self._topology.add_chain()
            for residue in chain.iter_residues():
                resName = residue.get_name()
                if resName in PDBTrajectoryFile._residueNameReplacements:
                    resName = PDBTrajectoryFile._residueNameReplacements[resName]
                r = self._topology.add_residue(resName, c, residue.number)
                r.segment_id = residue.segment_id
                if resName in PDBTrajectoryFile._atomNameReplacements:
                    atomReplacements = PDBTrajectoryFile._atomNameReplacements[resName]
                else:
                    atomReplacements = {}
                for atom in residue.atoms:
                    atomName = atom.get_name()
                    if atomName in atomReplacements:
                        atomName = atomReplacements[atomName]
                    atomName = atomName.strip()
                    element = atom.element
                    if element is None:
                        element = self._guess_element(atomName, residue)

                    newAtom = self._topology.add_atom(atomName, element, r, serial=atom.serial_number)
                    atomByNumber[atom.serial_number] = newAtom

        # load all of the positions (from every model)
        _positions = []
        for model in pdb.iter_models(use_all_models=True):
            coords = []
            for chain in model.iter_chains():
                for residue in chain.iter_residues():
                    for atom in residue.atoms:
                        coords.append(atom.get_position())
            _positions.append(coords)

        if not all(len(f) == len(_positions[0]) for f in _positions):
            raise ValueError('PDB Error: All MODELs must contain the same number of ATOMs')

        self._positions = np.array(_positions)

        ## The atom positions read from the PDB file
        self._unitcell_lengths = pdb.get_unit_cell_lengths()
        self._unitcell_angles = pdb.get_unit_cell_angles()
        self._topology.create_standard_bonds()
        self._topology.create_disulfide_bonds(self.positions[0])

        # Add bonds based on CONECT records.
        connectBonds = []
        for connect in pdb.models[-1].connects:
            i = connect[0]
            for j in connect[1:]:
                if i in atomByNumber and j in atomByNumber:
                    connectBonds.append((atomByNumber[i], atomByNumber[j]))
        if len(connectBonds) > 0:
            # Only add bonds that don't already exist.
            existingBonds = set(self._topology.bonds)
            for bond in connectBonds:
                if bond not in existingBonds and (bond[1], bond[0]) not in existingBonds:
                    self._topology.add_bond(bond[0], bond[1])
                    existingBonds.add(bond)
Beispiel #6
0
def load_hoomdxml(filename, top=None):
    """Load a single conformation from an HOOMD-Blue XML file.

    For more information on this file format, see:
    http://codeblue.umich.edu/hoomd-blue/doc/page_xml_file_format.html
    Notably, all node names and attributes are in all lower case.
    HOOMD-Blue does not contain residue and chain information explicitly. 
    For this reason, chains will be found by looping over all the bonds and 
    finding what is bonded to what. 
    Each chain consisists of exactly one residue. 

    Parameters
    ----------
    filename : string
        The path on disk to the XML file
    top : None
        This argumet is ignored

    Returns
    -------
    trajectory : md.Trajectory
        The resulting trajectory, as an md.Trajectory object, with corresponding 
        Topology.

    Notes
    -----
    This function requires the NetworkX python package.
    """
    from mdtraj.core.trajectory import Trajectory
    from mdtraj.core.topology import Topology
    topology = Topology()
    tree = cElementTree.parse(filename)
    config = tree.getroot().find('configuration')
    position = config.find('position')
    bond = config.find('bond')
    atom_type = config.find('type')  # MDTraj calls this "name"

    box = config.find('box')
    box.attrib = dict((key.lower(), val) for key, val in box.attrib.items())
    # be generous for case of box attributes
    lx = float(box.attrib['lx'])
    ly = float(box.attrib['ly'])
    lz = float(box.attrib['lz'])
    try:
        xy = float(box.attrib['xy'])
        xz = float(box.attrib['xz'])
        yz = float(box.attrib['yz'])
    except (ValueError, KeyError):
        xy = 0.0
        xz = 0.0
        yz = 0.0
    unitcell_vectors = np.array([[[lx, xy * ly, xz * lz], [0.0, ly, yz * lz],
                                  [0.0, 0.0, lz]]])

    positions, types = [], {}
    for pos in position.text.splitlines()[1:]:
        positions.append((float(pos.split()[0]), float(pos.split()[1]),
                          float(pos.split()[2])))

    for idx, atom_name in enumerate(atom_type.text.splitlines()[1:]):
        types[idx] = str(atom_name.split()[0])
    if len(types) != len(positions):
        raise ValueError('Different number of types and positions in xml file')

    # ignore the bond type
    if hasattr(bond, 'text'):
        bonds = [(int(b.split()[1]), int(b.split()[2]))
                 for b in bond.text.splitlines()[1:]]
        chains = _find_chains(bonds)
    else:
        chains = []
        bonds = []

    # Relate the first index in the bonded-group to mdtraj.Residue
    bonded_to_residue = {}
    for i, _ in enumerate(types):
        bonded_group = _in_chain(chains, i)
        if bonded_group is not None:
            if bonded_group[0] not in bonded_to_residue:
                t_chain = topology.add_chain()
                t_residue = topology.add_residue('A', t_chain)
                bonded_to_residue[bonded_group[0]] = t_residue
            topology.add_atom(types[i], virtual_site,
                              bonded_to_residue[bonded_group[0]])
        if bonded_group is None:
            t_chain = topology.add_chain()
            t_residue = topology.add_residue('A', t_chain)
            topology.add_atom(types[i], virtual_site, t_residue)

    for bond in bonds:
        atom1, atom2 = bond[0], bond[1]
        topology.add_bond(topology.atom(atom1), topology.atom(atom2))

    traj = Trajectory(xyz=np.array(positions), topology=topology)
    traj.unitcell_vectors = unitcell_vectors

    return traj
Beispiel #7
0
    def _read(self):
        "Read a single frame"
        from mdtraj.core.topology import Topology
        from mdtraj.core.element import Element, virtual
        # Read in the number of atoms.
        line = self._fh.readline()
        if line == '':
            raise _EOF()

        self._n_atoms = int(line.split()[0])
        self._line_counter += 1

        coords = np.empty((self._n_atoms, 3), dtype=np.float32)
        bond_partners = [[] for i in xrange(self._n_atoms)]
        atom_names = ['' for i in xrange(self._n_atoms)]
        line = self._fh.readline()
        s = line.split()
        self._line_counter += 1
        # See if we have box info on this line or not
        cell_lengths = cell_angles = None
        if len(s) == 6:
            try:
                cell_lengths = np.asarray(
                    [float(s[0]), float(s[1]),
                     float(s[2])])
                cell_angles = np.asarray(
                    [float(s[3]), float(s[4]),
                     float(s[5])])
                line = self._fh.readline()
                self._line_counter += 1
            except ValueError:
                pass
        i = 0
        while i < self._n_atoms - 1:
            atom_names[i] = s[1]
            bond_partners[i] = [int(x) for x in s[6:]]
            coords[i, :] = [float(s[pos]) for pos in [2, 3, 4]]
            i += 1
            line = self._fh.readline()
            s = line.split()
            self._line_counter += 1
        # Now do the last atom
        atom_names[i] = s[1]
        bond_partners[i] = [int(x) for x in s[6:]]
        coords[i, :] = [float(s[pos]) for pos in [2, 3, 4]]
        # Now see if we have to build a topology
        if self.topology is None:
            self.topology = top = Topology()
            chain = top.add_chain()  # only 1 chain
            res = top.add_residue('RES', chain, 1)  # only 1 residue
            for at in atom_names:
                # First get the element. Try for common 2-letter elements, then
                # use the first letter only (default to None if I can't find it)
                if at[:2].upper() in ('NA', 'CL', 'MG'):
                    elem = Element.getBySymbol(at[:2])
                else:
                    try:
                        elem = Element.getBySymbol(at[0])
                    except KeyError:
                        elem = virtual
                top.add_atom(at, elem, res)
            # Now add the bonds
            atoms = list(top.atoms)
            for i, bonds in enumerate(bond_partners):
                me = atoms[i]
                for b in bonds:
                    b -= 1
                    if b < i: continue
                    it = atoms[b]
                    top.add_bond(me, it)

        self._frame_index += 1
        return coords, cell_lengths, cell_angles
Beispiel #8
0
    def __init__(self, topology, use_chains=None):
        if use_chains is None:
            use_chains = range(len(topology._chains))

        self._ref_topology = topology.copy()

        # Build new topology
        newTopology = Topology()
        new_atm_idx = 0
        res_idx = 1
        prev_ca = None
        ca_idxs = []
        self._sidechain_idxs = []
        self._sidechain_mass = []
        self._chain_indices = []
        for chain_count, chain in enumerate(topology._chains):
            if chain_count in use_chains:
                newChain = newTopology.add_chain()
                for residue in chain._residues:
                    #resSeq = getattr(residue, 'resSeq', None) or residue.index
                    newResidue = newTopology.add_residue(
                        residue.name, newChain, res_idx)
                    # map CA
                    new_ca = newTopology.add_atom(
                        'CA',
                        md.core.element.get_by_symbol('C'),
                        newResidue,
                        serial=new_atm_idx)
                    self._chain_indices.append(chain_count)
                    if prev_ca is None:
                        prev_ca = new_ca
                    else:
                        # only bond atoms in the same chain.
                        if new_ca.residue.chain.index == prev_ca.residue.chain.index:
                            newTopology.add_bond(prev_ca, new_ca)
                        prev_ca = new_ca
                    try:
                        ca_idxs.append([[ atm.index for atm in residue.atoms if \
                                (atm.name == "CA") ][0], new_atm_idx ])
                    except:
                        print(residue)
                        print(chain)
                        for atm in residue.atoms:
                            atm.name
                        raise
                    new_atm_idx += 1

                    if residue.name == 'GLY':
                        self._sidechain_idxs.append([])
                        self._sidechain_mass.append([])
                    else:
                        # map CB
                        cb_name = "CB%s" % atom_types.residue_code[
                            residue.name]
                        new_cb = newTopology.add_atom(
                            cb_name,
                            md.core.element.get_by_symbol('C'),
                            newResidue,
                            serial=new_atm_idx)
                        self._chain_indices.append(chain_count)

                        newTopology.add_bond(new_cb, new_ca)

                        self._sidechain_idxs.append([[ atm.index for atm in residue.atoms if \
                                    (atm.is_sidechain) and (atm.element.symbol != "H") ], new_atm_idx ])
                        self._sidechain_mass.append(np.array([ atm.element.mass for atm in residue.atoms if \
                                    (atm.is_sidechain) and (atm.element.symbol != "H") ]))
                        new_atm_idx += 1
                    res_idx += 1

        self._ca_idxs = np.array(ca_idxs)
        self.topology = newTopology
        assert self.topology.n_atoms == len(self._chain_indices)
Beispiel #9
0
def extract(item, atom_indices='all', copy_if_all=True, check=True):

    if check:

        digest_item(item, 'mdtraj.Topology')
        atom_indices = digest_atom_indices(atom_indices)

    if atom_indices is 'all':

        if copy_if_all:
            from copy import deepcopy
            tmp_item = deepcopy(item)
        else:
            tmp_item = item
    else:

        from mdtraj.core.topology import Topology
        from mdtraj.utils import ilen

        atom_indices_to_be_kept = set(atom_indices)
        newTopology = Topology()
        old_atom_to_new_atom = {}

        for chain in item._chains:
            newChain = newTopology.add_chain()
            for group in chain._groups:
                resSeq = getattr(group, 'resSeq', None) or group.index
                newResidue = newTopology.add_group(group.name, newChain,
                                                   resSeq, group.segment_id)
                for atom in group._atoms:
                    if atom.index in atom_indices_to_be_kept:
                        try:  # OpenMM Topology objects don't have serial attributes, so we have to check first.
                            serial = atom.serial
                        except AttributeError:
                            serial = None
                        newAtom = newTopology.add_atom(atom.name,
                                                       atom.element,
                                                       newResidue,
                                                       serial=serial)
                        old_atom_to_new_atom[atom] = newAtom

        bondsiter = item.bonds
        if not hasattr(bondsiter, '__iter__'):
            bondsiter = bondsiter()

        for bond in bondsiter:
            try:
                atom1, atom2 = bond
                newTopology.add_bond(old_atom_to_new_atom[atom1],
                                     old_atom_to_new_atom[atom2],
                                     type=bond.type,
                                     order=bond.order)
            except KeyError:
                pass
                # we only put bonds into the new topology if both of their partners
                # were indexed and thus HAVE a new atom

        # Delete empty groups
        newTopology._groups = [
            r for r in newTopology._groups if len(r._atoms) > 0
        ]
        for chain in newTopology._chains:
            chain._groups = [r for r in chain._groups if len(r._atoms) > 0]

        # Delete empty chains
        newTopology._chains = [
            c for c in newTopology._chains if len(c._groups) > 0
        ]
        # Re-set the numAtoms and numResidues
        newTopology._numAtoms = ilen(newTopology.atoms)
        newTopology._numResidues = ilen(newTopology.groups)

        tmp_item = newTopology

    return tmp_item
Beispiel #10
0
    def _to_topology(self, atom_list, chain_types=None, residue_types=None):
        """Create a mdtraj.Topology from a Compound.

        Parameters
        ----------
        atom_list :
        chain_types :
        residue_types :

        Returns
        -------
        top : mtraj.Topology

        """
        from mdtraj.core.element import get_by_symbol
        from mdtraj.core.topology import Topology

        if isinstance(chain_types, Compound):
            chain_types = [Compound]
        if isinstance(chain_types, (list, set)):
            chain_types = tuple(chain_types)

        if isinstance(residue_types, Compound):
            residue_types = [Compound]
        if isinstance(residue_types, (list, set)):
            residue_types = tuple(residue_types)
        top = Topology()
        atom_mapping = {}

        default_chain = top.add_chain()
        default_residue = top.add_residue('RES', default_chain)

        last_residue_compound = None
        last_chain_compound = None
        last_residue = None
        last_chain = None

        for atom in atom_list:
            # Chains
            for parent in atom.ancestors():
                if chain_types and isinstance(parent, chain_types):
                    if parent != last_chain_compound:
                        last_chain_compound = parent
                        last_chain = top.add_chain()
                        last_chain_default_residue = top.add_residue(
                            'RES', last_chain)
                        last_chain.compound = last_chain_compound
                    break
            else:
                last_chain = default_chain
                last_chain.compound = last_chain_compound

            # Residues
            for parent in atom.ancestors():
                if residue_types and isinstance(parent, residue_types):
                    if parent != last_residue_compound:
                        last_residue_compound = parent
                        last_residue = top.add_residue(
                            parent.__class__.__name__, last_chain)
                        last_residue.compound = last_residue_compound
                    break
            else:
                if last_chain != default_chain:
                    last_residue = last_chain_default_residue
                else:
                    last_residue = default_residue
                last_residue.compound = last_residue_compound

            # Add the actual atoms
            try:
                elem = get_by_symbol(atom.name)
            except KeyError:
                elem = get_by_symbol("VS")
            at = top.add_atom(atom.name, elem, last_residue)
            at.charge = atom.charge
            atom_mapping[atom] = at

        # Remove empty default residues.
        chains_to_remove = [
            chain for chain in top.chains if chain.n_atoms == 0
        ]
        residues_to_remove = [res for res in top.residues if res.n_atoms == 0]
        for chain in chains_to_remove:
            top._chains.remove(chain)
        for res in residues_to_remove:
            for chain in top.chains:
                try:
                    chain._residues.remove(res)
                except ValueError:  # Already gone.
                    pass

        for atom1, atom2 in self.bonds():
            # Ensure that both atoms are part of the compound. This becomes an
            # issue if you try to convert a sub-compound to a topology which is
            # bonded to a different subcompound.
            if all(a in atom_mapping.keys() for a in [atom1, atom2]):
                top.add_bond(atom_mapping[atom1], atom_mapping[atom2])
        return top