Example #1
0
    def _guess_element(self, atom_name, residue):
        "Try to guess the element name"

        upper = atom_name.upper()
        if upper.startswith('CL'):
            element = elem.chlorine
        elif upper.startswith('NA'):
            element = elem.sodium
        elif upper.startswith('MG'):
            element = elem.magnesium
        elif upper.startswith('BE'):
            element = elem.beryllium
        elif upper.startswith('LI'):
            element = elem.lithium
        elif upper.startswith('K'):
            element = elem.potassium
        elif upper.startswith('ZN'):
            element = elem.zinc
        elif len(residue) == 1 and upper.startswith('CA'):
            element = elem.calcium

        # TJL has edited this. There are a few issues here. First,
        # parsing for the element is non-trivial, so I do my best
        # below. Second, there is additional parsing code in
        # pdbstructure.py, and I am unsure why it doesn't get used
        # here...
        elif len(residue) > 1 and upper.startswith('CE'):
            element = elem.carbon  # (probably) not Celenium...
        elif len(residue) > 1 and upper.startswith('CD'):
            element = elem.carbon  # (probably) not Cadmium...
        elif residue.name in ['TRP', 'ARG', 'GLN', 'HIS'
                              ] and upper.startswith('NE'):
            element = elem.nitrogen  # (probably) not Neon...
        elif residue.name in ['ASN'] and upper.startswith('ND'):
            element = elem.nitrogen  # (probably) not ND...
        elif residue.name == 'CYS' and upper.startswith('SG'):
            element = elem.sulfur  # (probably) not SG...
        else:
            try:
                element = elem.get_by_symbol(atom_name[0])
            except KeyError:
                try:
                    symbol = atom_name[0:2].strip().rstrip(
                        "AB0123456789").lstrip("0123456789")
                    element = elem.get_by_symbol(symbol)
                except KeyError:
                    element = None

        return element
Example #2
0
    def from_openmm(cls, value):
        """Create a mdtraj topology from an OpenMM topology

        Parameters
        ----------
        value : simtk.openmm.app.Topology
            An OpenMM topology that you wish to convert to a
            mdtraj topology.
        """
        app = import_('simtk.openmm.app')

        if not isinstance(value, app.Topology):
            raise TypeError('value must be an OpenMM Topology. '
                            'You supplied a %s' % type(value))

        out = cls()
        atom_mapping = {}

        for chain in value.chains():
            c = out.add_chain()
            for residue in chain.residues():
                r = out.add_residue(residue.name, c, residue.segment_id)
                for atom in residue.atoms():
                    if atom.element is None:
                        element = elem.virtual
                    else:
                        element = elem.get_by_symbol(atom.element.symbol)
                    a = out.add_atom(atom.name, element, r)
                    atom_mapping[atom] = a

        for a1, a2 in value.bonds():
            out.add_bond(atom_mapping[a1], atom_mapping[a2])

        return out
Example #3
0
    def from_openmm(cls, value):
        """Create a mdtraj topology from an OpenMM topology

        Parameters
        ----------
        value : simtk.openmm.app.Topology
            An OpenMM topology that you wish to convert to a
            mdtraj topology.
        """
        app = import_('simtk.openmm.app')

        if not isinstance(value, app.Topology):
            raise TypeError('value must be an OpenMM Topology. '
                            'You supplied a %s' % type(value))

        out = cls()
        atom_mapping = {}

        for chain in value.chains():
            c = out.add_chain()
            for residue in chain.residues():
                r = out.add_residue(residue.name, c)
                for atom in residue.atoms():
                    if atom.element is None:
                        element = elem.virtual
                    else:
                        element = elem.get_by_symbol(atom.element.symbol)
                    a = out.add_atom(atom.name, element, r)
                    atom_mapping[atom] = a

        for a1, a2 in value.bonds():
            out.add_bond(atom_mapping[a1], atom_mapping[a2])

        return out
Example #4
0
def json_to_mdtraj_topology(json_string):
    """ Copied in part from MDTraj.formats.hdf5 topology property."""

    topology_dict = json.loads(json_string)

    topology = mdj.Topology()

    for chain_dict in sorted(topology_dict['chains'], key=operator.itemgetter('index')):
        chain = topology.add_chain()
        for residue_dict in sorted(chain_dict['residues'], key=operator.itemgetter('index')):
            try:
                resSeq = residue_dict["resSeq"]
            except KeyError:
                resSeq = None
                warn('No resSeq information found in HDF file, defaulting to zero-based indices')
            try:
                segment_id = residue_dict["segmentID"]
            except KeyError:
                segment_id = ""
            residue = topology.add_residue(residue_dict['name'], chain,
                                           resSeq=resSeq, segment_id=segment_id)
            for atom_dict in sorted(residue_dict['atoms'], key=operator.itemgetter('index')):
                try:
                    element = elem.get_by_symbol(atom_dict['element'])
                except KeyError:
                    element = elem.virtual
                topology.add_atom(atom_dict['name'], element, residue)

    atoms = list(topology.atoms)
    for index1, index2 in topology_dict['bonds']:
        topology.add_bond(atoms[index1], atoms[index2])

    return topology
Example #5
0
    def _guess_element(self, atom_name, residue):
        "Try to guess the element name"

        upper = atom_name.upper()
        if upper.startswith('CL'):
            element = elem.chlorine
        elif upper.startswith('NA'):
            element = elem.sodium
        elif upper.startswith('MG'):
            element = elem.magnesium
        elif upper.startswith('BE'):
            element = elem.beryllium
        elif upper.startswith('LI'):
            element = elem.lithium
        elif upper.startswith('K'):
            element = elem.potassium
        elif upper.startswith('ZN'):
            element = elem.zinc
        elif len(residue) == 1 and upper.startswith('CA'):
            element = elem.calcium

        # TJL has edited this. There are a few issues here. First,
        # parsing for the element is non-trivial, so I do my best
        # below. Second, there is additional parsing code in
        # pdbstructure.py, and I am unsure why it doesn't get used
        # here...
        elif len(residue) > 1 and upper.startswith('CE'):
            element = elem.carbon  # (probably) not Celenium...
        elif len(residue) > 1 and upper.startswith('CD'):
            element = elem.carbon  # (probably) not Cadmium...
        elif residue.name in ['TRP', 'ARG', 'GLN', 'HIS'] and upper.startswith('NE'):
            element = elem.nitrogen  # (probably) not Neon...
        elif residue.name in ['ASN'] and upper.startswith('ND'):
            element = elem.nitrogen  # (probably) not ND...
        elif residue.name == 'CYS' and upper.startswith('SG'):
            element = elem.sulfur  # (probably) not SG...
        else:
            try:
                element = elem.get_by_symbol(atom_name[0])
            except KeyError:
                try:
                    symbol = atom_name[0:2].strip().rstrip("AB0123456789").lstrip("0123456789")
                    element = elem.get_by_symbol(symbol)
                except KeyError:
                    element = None

        return element
Example #6
0
    def _read_topology(self):
        if not self._open:
            raise ValueError('I/O operation on closed file')
        if not self._mode == 'r':
            raise ValueError('file not opened for reading')
        pdb.PDBTrajectoryFile._loadNameReplacementTables()

        n_atoms = None
        topology = md.Topology()
        chain = topology.add_chain()
        residue = None
        atomReplacements = {}

        for ln, line in enumerate(self._file):
            if ln == 1:
                n_atoms = int(line.strip())
            elif ln > 1 and ln < n_atoms + 2:
                (thisresnum, thisresname, thisatomname, thisatomnum) = \
                    [line[i*5:i*5+5].strip() for i in range(4)]
                thisresnum, thisatomnum = map(int, (thisresnum, thisatomnum))
                if residue is None or residue.resSeq != thisresnum or residue.name != thisresname:
                    if residue is not None and residue.name != thisresname:
                        warnings.warn(
                            "WARNING: two consecutive residues with same number (%s, %s)"
                            % (thisresname, residue.name))
                    if thisresname in pdb.PDBTrajectoryFile._residueNameReplacements:
                        thisresname = pdb.PDBTrajectoryFile._residueNameReplacements[
                            thisresname]
                    residue = topology.add_residue(thisresname,
                                                   chain,
                                                   resSeq=thisresnum)
                    if thisresname in pdb.PDBTrajectoryFile._atomNameReplacements:
                        atomReplacements = pdb.PDBTrajectoryFile._atomNameReplacements[
                            thisresname]
                    else:
                        atomReplacements = {}

                thiselem = thisatomname
                if len(thiselem) > 1:
                    thiselem = thiselem[0] + sub('[A-Z0-9]', '', thiselem[1:])
                try:
                    element = elem.get_by_symbol(thiselem)
                except KeyError:
                    element = elem.virtual
                if thisatomname in atomReplacements:
                    thisatomname = atomReplacements[thisatomname]

                topology.add_atom(thisatomname,
                                  element=element,
                                  residue=residue,
                                  serial=thisatomnum)
        topology.create_standard_bonds()
        return n_atoms, topology
Example #7
0
File: hdf5.py Project: bkmi/mdtraj
    def topology(self):
        """Get the topology out from the file

        Returns
        -------
        topology : mdtraj.Topology
            A topology object
        """
        try:
            raw = self._get_node(self._handle.root, name='topology')[0]
            if not isinstance(raw, string_types):
                raw = raw.decode()
            topology_dict = json.loads(raw)
        except self.tables.NoSuchNodeError:
            return None

        topology = Topology()

        for chain_dict in sorted(topology_dict['chains'],
                                 key=operator.itemgetter('index')):
            chain = topology.add_chain()
            for residue_dict in sorted(chain_dict['residues'],
                                       key=operator.itemgetter('index')):
                try:
                    resSeq = residue_dict["resSeq"]
                except KeyError:
                    resSeq = None
                    warnings.warn(
                        'No resSeq information found in HDF file, defaulting to zero-based indices'
                    )
                try:
                    segment_id = residue_dict["segmentID"]
                except KeyError:
                    segment_id = ""
                residue = topology.add_residue(residue_dict['name'],
                                               chain,
                                               resSeq=resSeq,
                                               segment_id=segment_id)
                for atom_dict in sorted(residue_dict['atoms'],
                                        key=operator.itemgetter('index')):
                    try:
                        element = elem.get_by_symbol(atom_dict['element'])
                    except KeyError:
                        element = elem.virtual
                    topology.add_atom(atom_dict['name'], element, residue)

        atoms = list(topology.atoms)
        for index1, index2 in topology_dict['bonds']:
            topology.add_bond(atoms[index1], atoms[index2])

        return topology
Example #8
0
def _create_com_traj(xyz,
                     time,
                     unitcell_lengths,
                     unitcell_angles,
                     masses=None):
    top = md.Topology()
    chain = top.add_chain()
    for i, com in enumerate(xyz[0]):
        com_res = top.add_residue('COM{}'.format(i), chain)
        top.add_atom('COM', get_by_symbol('C'), residue=com_res)
    return md.Trajectory(xyz,
                         topology=top,
                         time=time,
                         unitcell_lengths=unitcell_lengths,
                         unitcell_angles=unitcell_angles)
Example #9
0
def butane_toy_model():

    # Starting configuration is close to a cis conformation
    phi0 = np.pi
    phi_ini = 2*np.pi*0.8
    xyz = np.zeros((1, 4, 3), float)
    xyz[:,0,:] = np.array([1, -1, 0])
    xyz[:,1,:] = np.array([0, -1, 0])
    xyz[:,2,:] = np.array([0, 0, 0])
    xyz[:,3,0] = np.cos(phi_ini)
    xyz[:,3,2] = np.sin(phi_ini)

    # Coordinates must be positive in gromacs
    shift = np.array([5,5,5])
    xyz += shift

    # Create a mdtraj Topology for our butane molecule.
    newtop = md.Topology()
    chain = newtop.add_chain()
    for i in range(4):
        res = newtop.add_residue("GLY", chain, i)
        new_ca = newtop.add_atom('CA', get_by_symbol('C'), res, serial=i)
        #if i >= 1:
        #    prev_ca = chain.atom(i - 1)
        #    newtop.add_bond(prev_ca, new_ca)
    model = mdb.models.Model(newtop, bead_repr="CA")

    model.mapping.add_atoms(mass=10)

    top = model.mapping.top

    # Add bond interactions
    model.Hamiltonian._add_bond("HARMONIC_BOND", top.atom(0), top.atom(1), 100, 1)
    model.Hamiltonian._add_bond("HARMONIC_BOND", top.atom(1), top.atom(2), 100, 1)
    model.Hamiltonian._add_bond("HARMONIC_BOND", top.atom(2), top.atom(3), 100, 1)

    # Add angle interactions
    model.Hamiltonian._add_angle("HARMONIC_ANGLE", top.atom(0), top.atom(1),
                                    top.atom(2), 20, np.pi/2)

    model.Hamiltonian._add_angle("HARMONIC_ANGLE", top.atom(1), top.atom(2),
                                    top.atom(3), 20, np.pi/2)

    # Add dihedral interaction
    model.Hamiltonian._add_dihedral("COSINE_DIHEDRAL", top.atom(0), top.atom(1),
                                    top.atom(2), top.atom(3), 0.1, phi0, 1)

    return xyz, top, model
Example #10
0
File: gro.py Project: xuw10/mdtraj
    def _read_topology(self):
        if not self._open:
            raise ValueError('I/O operation on closed file')
        if not self._mode == 'r':
            raise ValueError('file not opened for reading')
        pdb.PDBTrajectoryFile._loadNameReplacementTables()

        n_atoms = None
        topology = md.Topology()
        chain = topology.add_chain()
        residue = None
        atomReplacements = {}

        for ln, line in enumerate(self._file):
            if ln == 1:
                n_atoms = int(line.strip())
            elif ln > 1 and ln < n_atoms + 2:
                (thisresnum, thisresname, thisatomname, thisatomnum) = \
                    [line[i*5:i*5+5].strip() for i in range(4)]
                thisresnum, thisatomnum = map(int, (thisresnum, thisatomnum))
                if residue is None or residue.resSeq != thisresnum:
                    if thisresname in pdb.PDBTrajectoryFile._residueNameReplacements:
                        thisresname = pdb.PDBTrajectoryFile._residueNameReplacements[thisresname]
                    residue = topology.add_residue(thisresname, chain, resSeq=thisresnum)
                    if thisresname in pdb.PDBTrajectoryFile._atomNameReplacements:
                        atomReplacements = pdb.PDBTrajectoryFile._atomNameReplacements[thisresname]
                    else:
                        atomReplacements = {}

                thiselem = thisatomname
                element = None
                if len(thiselem) > 1:
                    thiselem = thiselem[0] + sub('[A-Z0-9]','',thiselem[1:])
                try:
                    element = elem.get_by_symbol(thiselem)
                except KeyError:
                    pass
                if thisatomname in atomReplacements:
                    thisatomname = atomReplacements[thisatomname]

                topology.add_atom(thisatomname, element=element, residue=residue,
                                  serial=thisatomnum)

        return n_atoms, topology
Example #11
0
    def topology(self):
        """Get the topology out from the file

        Returns
        -------
        topology : mdtraj.Topology
            A topology object
        """
        try:
            raw = self._get_node('/', name='topology')[0]
            if not isinstance(raw, string_types):
                raw = raw.decode()
            topology_dict = json.loads(raw)
        except self.tables.NoSuchNodeError:
            return None

        topology = Topology()

        for chain_dict in sorted(topology_dict['chains'], key=operator.itemgetter('index')):
            chain = topology.add_chain()
            for residue_dict in sorted(chain_dict['residues'], key=operator.itemgetter('index')):
                try:
                    resSeq = residue_dict["resSeq"]
                except KeyError:
                    resSeq = None
                    warnings.warn('No resSeq information found in HDF file, defaulting to zero-based indices')
                try:
                    segment_id = residue_dict["segmentID"]
                except KeyError:
                    segment_id = ""
                residue = topology.add_residue(residue_dict['name'], chain, resSeq=resSeq, segment_id=segment_id)
                for atom_dict in sorted(residue_dict['atoms'], key=operator.itemgetter('index')):
                    try:
                        element = elem.get_by_symbol(atom_dict['element'])
                    except KeyError:
                        element = elem.virtual
                    topology.add_atom(atom_dict['name'], element, residue)

        atoms = list(topology.atoms)
        for index1, index2 in topology_dict['bonds']:
            topology.add_bond(atoms[index1], atoms[index2])

        return topology
Example #12
0
    def topology(self):
        """Get the topology out from the file

        Returns
        -------
        topology : mdtraj.Topology
            A topology object
        """
        try:
            raw = self._get_node("/", name="topology")[0]
            if not isinstance(raw, string_types):
                raw = raw.decode()
            topology_dict = json.loads(raw)
        except self.tables.NoSuchNodeError:
            return None

        topology = Topology()

        for chain_dict in sorted(topology_dict["chains"], key=operator.itemgetter("index")):
            chain = topology.add_chain()
            for residue_dict in sorted(chain_dict["residues"], key=operator.itemgetter("index")):
                try:
                    resSeq = residue_dict["resSeq"]
                except KeyError:
                    resSeq = None
                    warnings.warn("No resSeq information found in HDF file, defaulting to zero-based indices")
                residue = topology.add_residue(residue_dict["name"], chain, resSeq=resSeq)
                for atom_dict in sorted(residue_dict["atoms"], key=operator.itemgetter("index")):
                    try:
                        element = elem.get_by_symbol(atom_dict["element"])
                    except KeyError:
                        element = None
                    topology.add_atom(atom_dict["name"], element, residue)

        atoms = list(topology.atoms)
        for index1, index2 in topology_dict["bonds"]:
            topology.add_bond(atoms[index1], atoms[index2])

        return topology
Example #13
0
def _topology_from_arrays(AtomID, AtomNames, ChainID, ResidueID, ResidueNames):
    """Build topology object from the arrays stored in the lh5 file"""
    # Delayed import due to wacky recursive imports in compatibilty
    from mdtraj import Topology
    topology = Topology()

    # assert that the ChainID is just an array of empty strings, which appears
    # to be the case in our test systems for this legacy format
    if not np.all(chainid == '' for chainid in ChainID):
        raise NotImplementedError('Im not prepared to parse multiple chains')
    chain0 = topology.add_chain()

    # register the residues
    registered_residues = {}
    for i in np.argsort(ResidueID):
        residue_name = ResidueNames[i]
        if not isinstance(residue_name, basestring):
            residue_name = residue_name.decode()
        if ResidueID[i] not in registered_residues:
            res = topology.add_residue(residue_name, chain0)
            registered_residues[ResidueID[i]] = res

    # register the atoms
    for i in np.argsort(AtomID):
        atom_name = AtomNames[i]
        if not isinstance(atom_name, basestring):
            atom_name = atom_name.decode()
        element_symbol = atom_name.lstrip('0123456789')[0]

        try:
            element = elem.get_by_symbol(element_symbol)
        except KeyError:
            element = elem.virtual

        topology.add_atom(atom_name, element,
                          registered_residues[ResidueID[i]])

    topology.create_standard_bonds()
    return topology
Example #14
0
def _topology_from_arrays(AtomID, AtomNames, ChainID, ResidueID, ResidueNames):
    """Build topology object from the arrays stored in the lh5 file"""
    # Delayed import due to wacky recursive imports in compatibilty
    from mdtraj import Topology
    topology = Topology()

    # assert that the ChainID is just an array of empty strings, which appears
    # to be the case in our test systems for this legacy format
    if not np.all(chainid == '' for chainid in ChainID):
        raise NotImplementedError('Im not prepared to parse multiple chains')
    chain0 = topology.add_chain()

    # register the residues
    registered_residues = {}
    for i in np.argsort(ResidueID):
        residue_name = ResidueNames[i]
        if not isinstance(residue_name, basestring):
            residue_name = residue_name.decode()
        if ResidueID[i] not in registered_residues:
            res = topology.add_residue(residue_name, chain0)
            registered_residues[ResidueID[i]] = res

    # register the atoms
    for i in np.argsort(AtomID):
        atom_name = AtomNames[i]
        if not isinstance(atom_name, basestring):
            atom_name = atom_name.decode()
        element_symbol = atom_name.lstrip('0123456789')[0]

        try:
            element = elem.get_by_symbol(element_symbol)
        except KeyError:
            element = None

        topology.add_atom(atom_name, element,
                          registered_residues[ResidueID[i]])

    topology.create_standard_bonds()
    return topology
Example #15
0
    def __init__(self, pdb_line):
        """Create a new pdb.Atom from an ATOM or HETATM line.

        Example line:
        ATOM   2209  CB  TYR A 299       6.167  22.607  20.046  1.00  8.12           C
        00000000011111111112222222222333333333344444444445555555555666666666677777777778
        12345678901234567890123456789012345678901234567890123456789012345678901234567890

        ATOM line format description from
          http://deposit.rcsb.org/adit/docs/pdb_atom_format.html:

        COLUMNS        DATA TYPE       CONTENTS
        --------------------------------------------------------------------------------
         1 -  6        Record name     "ATOM  "
         7 - 11        Integer         Atom serial number.
        13 - 16        Atom            Atom name.
        17             Character       Alternate location indicator.
        18 - 20        Residue name    Residue name.
        22             Character       Chain identifier.
        23 - 26        Integer         Residue sequence number.
        27             AChar           Code for insertion of residues.
        31 - 38        Real(8.3)       Orthogonal coordinates for X in Angstroms.
        39 - 46        Real(8.3)       Orthogonal coordinates for Y in Angstroms.
        47 - 54        Real(8.3)       Orthogonal coordinates for Z in Angstroms.
        55 - 60        Real(6.2)       Occupancy (Default = 1.0).
        61 - 66        Real(6.2)       Temperature factor (Default = 0.0).
        73 - 76        LString(4)      Segment identifier, left-justified.
        77 - 78        LString(2)      Element symbol, right-justified.
        79 - 80        LString(2)      Charge on the atom.

        """
        # We might modify first/final status during _finalize() methods
        self.is_first_atom_in_chain = False
        self.is_final_atom_in_chain = False
        self.is_first_residue_in_chain = False
        self.is_final_residue_in_chain = False
        # Start parsing fields from pdb line
        self.record_name = pdb_line[0:6].strip()
        self.serial_number = int(pdb_line[6:11])
        self.name_with_spaces = pdb_line[12:16]
        alternate_location_indicator = pdb_line[16]

        self.residue_name_with_spaces = pdb_line[17:20]
        # In some MD codes, notably ffamber in gromacs, residue name has a fourth character in
        # column 21
        possible_fourth_character = pdb_line[20:21]
        if possible_fourth_character != " ":
            # Fourth character should only be there if official 3 are already full
            if len(self.residue_name_with_spaces.strip()) != 3:
                raise ValueError('Misaligned residue name: %s' % pdb_line)
            self.residue_name_with_spaces += possible_fourth_character
        self.residue_name = self.residue_name_with_spaces.strip()

        self.chain_id = pdb_line[21]
        self.residue_number = int(pdb_line[22:26])
        self.insertion_code = pdb_line[26]
        # coordinates, occupancy, and temperature factor belong in Atom.Location object
        x = float(pdb_line[30:38])
        y = float(pdb_line[38:46])
        z = float(pdb_line[46:54])
        try:
            occupancy = float(pdb_line[54:60])
        except:
            occupancy = 1.0
        try:
            temperature_factor = float(pdb_line[60:66])
        except:
            temperature_factor = 0.0
        self.locations = {}
        loc = Atom.Location(alternate_location_indicator, np.array([x, y, z]),
                            occupancy, temperature_factor,
                            self.residue_name_with_spaces)
        self.locations[alternate_location_indicator] = loc
        self.default_location_id = alternate_location_indicator
        # segment id, element_symbol, and formal_charge are not always present
        self.segment_id = pdb_line[72:76].strip()
        self.element_symbol = pdb_line[76:78].strip()
        try:
            self.formal_charge = int(pdb_line[78:80])
        except ValueError:
            self.formal_charge = None
        # figure out atom element
        try:
            # First try to find a sensible element symbol from columns 76-77
            self.element = element.get_by_symbol(self.element_symbol)
        except KeyError:
            # otherwise, deduce element from first two characters of atom name
            # remove digits found in some hydrogen atom names
            symbol = self.name_with_spaces[0:2].strip().lstrip("0123456789")
            try:
                # Some molecular dynamics PDB files, such as gromacs with ffamber force
                # field, include 4-character hydrogen atom names beginning with "H".
                # Hopefully elements like holmium (Ho) and mercury (Hg) will have fewer than four
                # characters in the atom name.  This problem is the fault of molecular
                # dynamics code authors who feel the need to make up their own atom
                # nomenclature because it is too tedious to read that provided by the PDB.
                # These are the same folks who invent their own meanings for biochemical terms
                # like "dipeptide".  Clowntards.
                if len(self.name) == 4 and self.name[0:1] == "H":
                    self.element = element.hydrogen
                else:
                    self.element = element.get_by_symbol(symbol)
            except KeyError:
                # OK, I give up
                self.element = None
Example #16
0
    def _to_topology(self, atom_list, chain_types=None, residue_types=None):
        """Create a mdtraj.Topology from a Compound.

        Parameters
        ----------
        atom_list :
        chain_types :
        residue_types :

        Returns
        -------
        top : mtraj.Topology

        """
        from mdtraj.core.element import get_by_symbol
        from mdtraj.core.topology import Topology

        if isinstance(chain_types, Compound):
            chain_types = [Compound]
        if isinstance(chain_types, (list, set)):
            chain_types = tuple(chain_types)

        if isinstance(residue_types, Compound):
            residue_types = [Compound]
        if isinstance(residue_types, (list, set)):
            residue_types = tuple(residue_types)
        top = Topology()
        atom_mapping = {}

        default_chain = top.add_chain()
        default_residue = top.add_residue('RES', default_chain)

        last_residue_compound = None
        last_chain_compound = None
        last_residue = None
        last_chain = None

        for atom in atom_list:
            # Chains
            for parent in atom.ancestors():
                if chain_types and isinstance(parent, chain_types):
                    if parent != last_chain_compound:
                        last_chain_compound = parent
                        last_chain = top.add_chain()
                        last_chain_default_residue = top.add_residue(
                            'RES', last_chain)
                        last_chain.compound = last_chain_compound
                    break
            else:
                last_chain = default_chain
                last_chain.compound = last_chain_compound

            # Residues
            for parent in atom.ancestors():
                if residue_types and isinstance(parent, residue_types):
                    if parent != last_residue_compound:
                        last_residue_compound = parent
                        last_residue = top.add_residue(
                            parent.__class__.__name__, last_chain)
                        last_residue.compound = last_residue_compound
                    break
            else:
                if last_chain != default_chain:
                    last_residue = last_chain_default_residue
                else:
                    last_residue = default_residue
                last_residue.compound = last_residue_compound

            # Add the actual atoms
            try:
                elem = get_by_symbol(atom.name)
            except KeyError:
                elem = get_by_symbol("VS")
            at = top.add_atom(atom.name, elem, last_residue)
            at.charge = atom.charge
            atom_mapping[atom] = at

        # Remove empty default residues.
        chains_to_remove = [
            chain for chain in top.chains if chain.n_atoms == 0
        ]
        residues_to_remove = [res for res in top.residues if res.n_atoms == 0]
        for chain in chains_to_remove:
            top._chains.remove(chain)
        for res in residues_to_remove:
            for chain in top.chains:
                try:
                    chain._residues.remove(res)
                except ValueError:  # Already gone.
                    pass

        for atom1, atom2 in self.bonds():
            # Ensure that both atoms are part of the compound. This becomes an
            # issue if you try to convert a sub-compound to a topology which is
            # bonded to a different subcompound.
            if all(a in atom_mapping.keys() for a in [atom1, atom2]):
                top.add_bond(atom_mapping[atom1], atom_mapping[atom2])
        return top
Example #17
0
    def from_dataframe(cls, atoms, bonds=None):
        """Create a mdtraj topology from a pandas data frame

        Parameters
        ----------
        atoms : pandas.DataFrame
            The atoms in the topology, represented as a data frame. This data
            frame should have columns "serial" (atom index), "name" (atom name),
            "element" (atom's element), "resSeq" (index of the residue)
            "resName" (name of the residue), "chainID" (index of the chain),
            and optionally "segmentID", following the same conventions
            as wwPDB 3.0 format.
        bonds : np.ndarray, shape=(n_bonds, 2), dtype=int, optional
            The bonds in the topology, represented as an n_bonds x 2 array
            of the indices of the atoms involved in each bond. Specifiying
            bonds here is optional. To create standard protein bonds, you can
            use `create_standard_bonds` to "fill in" the bonds on your newly
            created Topology object

        See Also
        --------
        create_standard_bonds
        """
        pd = import_('pandas')

        if bonds is None:
            bonds = np.zeros((0, 2))

        for col in ["name", "element", "resSeq",
                    "resName", "chainID", "serial"]:
            if col not in atoms.columns:
                raise ValueError('dataframe must have column %s' % col)

        if "segmentID" not in atoms.columns:
            atoms["segmentID"] = ""

        out = cls()
        if not isinstance(atoms, pd.DataFrame):
            raise TypeError('atoms must be an instance of pandas.DataFrame. '
                            'You supplied a %s' % type(atoms))
        if not isinstance(bonds, np.ndarray):
            raise TypeError('bonds must be an instance of numpy.ndarray. '
                            'You supplied a %s' % type(bonds))

        if not np.all(np.arange(len(atoms)) == atoms.index):
            raise ValueError('atoms must be uniquely numbered '
                             'starting from zero.')
        out._atoms = [None for i in range(len(atoms))]

        for ci in np.unique(atoms['chainID']):
            chain_atoms = atoms[atoms['chainID'] == ci]
            c = out.add_chain()

            for ri in np.unique(chain_atoms['resSeq']):
                residue_atoms = chain_atoms[chain_atoms['resSeq'] == ri]
                rnames = residue_atoms['resName']
                residue_name = np.array(rnames)[0]
                segids = residue_atoms['segmentID']
                segment_id = np.array(segids)[0]
                if not np.all(rnames == residue_name):
                    raise ValueError('All of the atoms with residue index %d '
                                     'do not share the same residue name' % ri)
                r = out.add_residue(residue_name, c, ri,segment_id)

                for atom_index, atom in residue_atoms.iterrows():
                    atom_index = int(atom_index)  # Fixes bizarre hashing issue on Py3K.  See #545
                    a = Atom(atom['name'], elem.get_by_symbol(atom['element']),
                             atom_index, r, serial=atom['serial'])
                    out._atoms[atom_index] = a
                    r._atoms.append(a)

        for ai1, ai2 in bonds:
            out.add_bond(out.atom(ai1), out.atom(ai2))

        out._numAtoms = out.n_atoms
        return out
Example #18
0
def load_gro(filename):
    filename = os.path.abspath(filename)
    traj = md.load(filename)
    velxyz = np.zeros_like(traj.xyz)
    coord_indices = None
    top = md.Topology()
    chain = top.add_chain()
    residue = None
    with open(filename) as f:
        lines = iter(f)
        for model in count():
            try:
                title = next(lines)
            except StopIteration:
                assert model == len(traj)
                break
            _, _, time = title.partition('t=')
            n_atoms = int(next(lines))
            assert n_atoms == traj.n_atoms
            for i in range(n_atoms):
                line = next(lines)
                if model == 0:
                    resnum = int(line[0:5])
                    resname = line[5:10].strip()
                    atomname = line[10:15].strip()
                    atomnum = int(line[15:20])

                    if residue is None or resnum != residue.resSeq:
                        residue = top.add_residue(resname,
                                                  chain,
                                                  resSeq=resnum)

                    if len(atomname) > 1:
                        elem_symbol = atomname[0] + sub(
                            '[A-Z0-9]', '', atomname[1:])
                    else:
                        elem_symbol = atomname

                    try:
                        element = elem.get_by_symbol(elem_symbol)
                    except KeyError:
                        element = elem.virtual
                    top.add_atom(atomname,
                                 element=element,
                                 residue=residue,
                                 serial=atomnum)

                if coord_indices is None:
                    decs = (i for i, v in enumerate(line[20:], start=20)
                            if v == '.')
                    decidist = abs(next(decs) - next(decs))
                    coord_indices = list(
                        pairwise(range(20, 20 + decidist * 7, decidist)))

                x, y, z, *vel = (float(line[a:b]) for a, b in coord_indices
                                 if b <= len(line))
                assert np.isclose(traj.xyz[model, i, :], (x, y, z)).all()
                if vel:
                    velxyz[model, i, :] = vel
            boxvecs = next(lines)
    traj.top = top
    if np.all(velxyz == 0.0):
        velxyz = None
    return traj, velxyz
Example #19
0
    def _to_topology(self, atom_list, chain_types=None, residue_types=None):
        """Create a mdtraj.Topology from a Compound.

        Parameters
        ----------
        atom_list :
        chain_types :
        residue_types :

        Returns
        -------
        top : mtraj.Topology

        """

        if isinstance(chain_types, Compound):
            chain_types = [Compound]
        if isinstance(chain_types, (list, set)):
            chain_types = tuple(chain_types)

        if isinstance(residue_types, Compound):
            residue_types = [Compound]
        if isinstance(residue_types, (list, set)):
            residue_types = tuple(residue_types)
        top = Topology()
        atom_mapping = {}

        default_chain = top.add_chain()
        default_residue = top.add_residue('RES', default_chain)

        last_residue_compound = None
        last_chain_compound = None
        last_residue = None
        last_chain = None

        for atom in atom_list:
            # Chains
            for parent in atom.ancestors():
                if chain_types and isinstance(parent, chain_types):
                    if parent != last_chain_compound:
                        last_chain_compound = parent
                        last_chain = top.add_chain()
                        last_chain_default_residue = top.add_residue('RES', last_chain)
                        last_chain.compound = last_chain_compound
                    break
            else:
                last_chain = default_chain
                last_chain.compound = last_chain_compound

            # Residues
            for parent in atom.ancestors():
                if residue_types and isinstance(parent, residue_types):
                    if parent != last_residue_compound:
                        last_residue_compound = parent
                        last_residue = top.add_residue(parent.__class__.__name__, last_chain)
                        last_residue.compound = last_residue_compound
                    break
            else:
                if last_chain != default_chain:
                    last_residue = last_chain_default_residue
                else:
                    last_residue = default_residue
                last_residue.compound = last_residue_compound

            # Add the actual atoms
            try:
                elem = get_by_symbol(atom.name)
            except KeyError:
                elem = get_by_symbol("VS")
            at = top.add_atom(atom.name, elem, last_residue)
            at.charge = atom.charge
            atom_mapping[atom] = at

        # Remove empty default residues.
        chains_to_remove = [chain for chain in top.chains if chain.n_atoms == 0]
        residues_to_remove = [res for res in top.residues if res.n_atoms == 0]
        for chain in chains_to_remove:
            top._chains.remove(chain)
        for res in residues_to_remove:
            for chain in top.chains:
                try:
                    chain._residues.remove(res)
                except ValueError:  # Already gone.
                    pass

        for atom1, atom2 in self.bonds():
            # Ensure that both atoms are part of the compound. This becomes an
            # issue if you try to convert a sub-compound to a topology which is
            # bonded to a different subcompound.
            if all(a in atom_mapping.keys() for a in [atom1, atom2]):
                top.add_bond(atom_mapping[atom1], atom_mapping[atom2])
        return top
Example #20
0
    def from_dataframe(cls, atoms, bonds=None):
        """Create a mdtraj topology from a pandas data frame

        Parameters
        ----------
        atoms : pandas.DataFrame
            The atoms in the topology, represented as a data frame. This data
            frame should have columns "serial" (atom index), "name" (atom name),
            "element" (atom's element), "resSeq" (index of the residue)
            "resName" (name of the residue), "chainID" (index of the chain),
            and optionally "segmentID", following the same conventions 
            as wwPDB 3.0 format.
        bonds : np.ndarray, shape=(n_bonds, 2), dtype=int, optional
            The bonds in the topology, represented as an n_bonds x 2 array
            of the indices of the atoms involved in each bond. Specifiying
            bonds here is optional. To create standard protein bonds, you can
            use `create_standard_bonds` to "fill in" the bonds on your newly
            created Topology object

        See Also
        --------
        create_standard_bonds
        """
        pd = import_('pandas')

        if bonds is None:
            bonds = np.zeros((0, 2))

        for col in [
                "name", "element", "resSeq", "resName", "chainID", "serial"
        ]:
            if col not in atoms.columns:
                raise ValueError('dataframe must have column %s' % col)

        if "segmentID" not in atoms.columns:
            atoms["segmentID"] = ""

        out = cls()
        if not isinstance(atoms, pd.DataFrame):
            raise TypeError('atoms must be an instance of pandas.DataFrame. '
                            'You supplied a %s' % type(atoms))
        if not isinstance(bonds, np.ndarray):
            raise TypeError('bonds must be an instance of numpy.ndarray. '
                            'You supplied a %s' % type(bonds))

        if not np.all(np.arange(len(atoms)) == atoms.index):
            raise ValueError('atoms must be uniquely numbered '
                             'starting from zero.')
        out._atoms = [None for i in range(len(atoms))]

        for ci in np.unique(atoms['chainID']):
            chain_atoms = atoms[atoms['chainID'] == ci]
            c = out.add_chain()

            for ri in np.unique(chain_atoms['resSeq']):
                residue_atoms = chain_atoms[chain_atoms['resSeq'] == ri]
                rnames = residue_atoms['resName']
                residue_name = np.array(rnames)[0]
                segids = residue_atoms['segmentID']
                segment_id = np.array(segids)[0]
                if not np.all(rnames == residue_name):
                    raise ValueError('All of the atoms with residue index %d '
                                     'do not share the same residue name' % ri)
                r = out.add_residue(residue_name, c, ri, segment_id)

                for atom_index, atom in residue_atoms.iterrows():
                    atom_index = int(
                        atom_index
                    )  # Fixes bizarre hashing issue on Py3K.  See #545
                    a = Atom(atom['name'],
                             elem.get_by_symbol(atom['element']),
                             atom_index,
                             r,
                             serial=atom['serial'])
                    out._atoms[atom_index] = a
                    r._atoms.append(a)

        for ai1, ai2 in bonds:
            out.add_bond(out.atom(ai1), out.atom(ai2))

        out._numAtoms = out.n_atoms
        return out
Example #21
0
def to_mdtraj_Topology(item, atom_indices='all', check=True):

    if check:

        digest_item(item, 'molsysmt.Topology')
        atom_indices = digest_atom_indices(atom_indices)

    try:
        from mdtraj import Topology
        from mdtraj.core import element
    except:
        raise LibraryNotFound('mdtraj')

    n_atoms = item.atoms_dataframe.shape[0]

    atom_index_array = item.atoms_dataframe["atom_index"].to_numpy()
    atom_name_array = item.atoms_dataframe["atom_name"].to_numpy()
    atom_id_array = item.atoms_dataframe["atom_id"].to_numpy()
    atom_type_array = item.atoms_dataframe["atom_type"].to_numpy()

    group_index_array = item.atoms_dataframe["group_index"].to_numpy()
    group_name_array = item.atoms_dataframe["group_name"].to_numpy()
    group_id_array = item.atoms_dataframe["group_id"].to_numpy()
    group_type_array = item.atoms_dataframe["group_type"].to_numpy()

    chain_index_array = item.atoms_dataframe["chain_index"].to_numpy()
    chain_name_array = item.atoms_dataframe["chain_name"].to_numpy()
    chain_id_array = item.atoms_dataframe["chain_id"].to_numpy()
    chain_type_array = item.atoms_dataframe["chain_type"].to_numpy()

    bonds_atom1 = item.bonds_dataframe["atom1_index"].to_numpy()
    bonds_atom2 = item.bonds_dataframe["atom2_index"].to_numpy()

    tmp_item = Topology()

    former_group_index = -1
    former_chain_index = -1

    list_new_atoms = []

    for ii in range(n_atoms):

        atom_index = atom_index_array[ii]
        atom_name = atom_name_array[ii]
        atom_id = atom_id_array[ii]
        atom_type = atom_type_array[ii]

        group_index = group_index_array[ii]
        chain_index = chain_index_array[ii]

        new_group = (former_group_index != group_index)
        new_chain = (former_chain_index != chain_index)

        if new_chain:
            chain = tmp_item.add_chain()
            former_chain_index = chain_index

        if new_group:
            residue_name = group_name_array[ii]
            residue_id = group_id_array[ii]
            residue = tmp_item.add_residue(residue_name,
                                           chain,
                                           resSeq=str(residue_id))
            former_group_index = group_index

        elem = element.get_by_symbol(atom_type)
        atom = tmp_item.add_atom(atom_name, elem, residue)

        list_new_atoms.append(atom)

    for atom_1, atom_2 in zip(bonds_atom1, bonds_atom2):

        tmp_item.add_bond(
            list_new_atoms[atom_1],
            list_new_atoms[atom_2])  # falta bond type and bond order

    return tmp_item
Example #22
0
def json_to_mdtraj_topology(json_string):
    """Convert a JSON string topology to an mdtraj.Toplogy object.

    This can be used to write out different topology formats from
    mdtraj.

    Parameters
    ----------
    json_string : str
        JSON string of the topology.

    Returns
    -------
    topology : mdtraj.Topology object

    Warnings
    --------
    The topology format for wepy is subject to change in the future!

    Notes
    -----
    Copied in part from MDTraj.formats.hdf5 topology property.

    """

    topology_dict = json.loads(json_string)

    topology = mdj.Topology()

    for chain_dict in sorted(topology_dict['chains'],
                             key=operator.itemgetter('index')):
        chain = topology.add_chain()
        for residue_dict in sorted(chain_dict['residues'],
                                   key=operator.itemgetter('index')):
            try:
                resSeq = residue_dict["resSeq"]
            except KeyError:
                resSeq = None
                warn(
                    'No resSeq information found in HDF file, defaulting to zero-based indices'
                )
            try:
                segment_id = residue_dict["segmentID"]
            except KeyError:
                segment_id = ""
            residue = topology.add_residue(residue_dict['name'],
                                           chain,
                                           resSeq=resSeq,
                                           segment_id=segment_id)
            for atom_dict in sorted(residue_dict['atoms'],
                                    key=operator.itemgetter('index')):
                try:
                    element = elem.get_by_symbol(atom_dict['element'])
                except KeyError:
                    element = elem.virtual
                topology.add_atom(atom_dict['name'], element, residue)

    atoms = list(topology.atoms)
    for index1, index2 in topology_dict['bonds']:
        topology.add_bond(atoms[index1], atoms[index2])

    return topology
Example #23
0
    def __init__(self, pdb_line, pdbstructure=None):
        """Create a new pdb.Atom from an ATOM or HETATM line.

        Example line:
        ATOM   2209  CB  TYR A 299       6.167  22.607  20.046  1.00  8.12           C
        00000000011111111112222222222333333333344444444445555555555666666666677777777778
        12345678901234567890123456789012345678901234567890123456789012345678901234567890

        ATOM line format description from
          http://deposit.rcsb.org/adit/docs/pdb_atom_format.html:

        COLUMNS        DATA TYPE       CONTENTS
        --------------------------------------------------------------------------------
         1 -  6        Record name     "ATOM  "
         7 - 11        Integer         Atom serial number.
        13 - 16        Atom            Atom name.
        17             Character       Alternate location indicator.
        18 - 20        Residue name    Residue name.
        22             Character       Chain identifier.
        23 - 26        Integer         Residue sequence number.
        27             AChar           Code for insertion of residues.
        31 - 38        Real(8.3)       Orthogonal coordinates for X in Angstroms.
        39 - 46        Real(8.3)       Orthogonal coordinates for Y in Angstroms.
        47 - 54        Real(8.3)       Orthogonal coordinates for Z in Angstroms.
        55 - 60        Real(6.2)       Occupancy (Default = 1.0).
        61 - 66        Real(6.2)       Temperature factor (Default = 0.0).
        73 - 76        LString(4)      Segment identifier, left-justified.
        77 - 78        LString(2)      Element symbol, right-justified.
        79 - 80        LString(2)      Charge on the atom.

        """
        # We might modify first/final status during _finalize() methods
        self.is_first_atom_in_chain = False
        self.is_final_atom_in_chain = False
        self.is_first_residue_in_chain = False
        self.is_final_residue_in_chain = False
        # Start parsing fields from pdb line
        self.record_name = pdb_line[0:6].strip()
        # VMD sometimes uses hex for atoms greater than 9,999
        if pdbstructure is not None and pdbstructure._atom_numbers_are_hex:
            self.serial_number = int(pdb_line[6:11], 16)
        else:
            try:
                self.serial_number = int(pdb_line[6:11])
            except ValueError:
                try:
                    self.serial_number = int(pdb_line[6:11], 16)
                    pdbstructure._atom_numbers_are_hex = True
                except ValueError:
                    # Just give it the next number in sequence.
                    self.serial_number = pdbstructure._next_atom_number
        self.name_with_spaces = pdb_line[12:16]
        alternate_location_indicator = pdb_line[16]

        self.residue_name_with_spaces = pdb_line[17:20]
        # In some MD codes, notably ffamber in gromacs, residue name has a fourth character in
        # column 21
        possible_fourth_character = pdb_line[20:21]
        if possible_fourth_character != " ":
            # Fourth character should only be there if official 3 are already full
            if len(self.residue_name_with_spaces.strip()) != 3:
                raise ValueError('Misaligned residue name: %s' % pdb_line)
            self.residue_name_with_spaces += possible_fourth_character
        self.residue_name = self.residue_name_with_spaces.strip()

        self.chain_id = pdb_line[21]
        if pdbstructure is not None and pdbstructure._residue_numbers_are_hex:
            self.residue_number = int(pdb_line[22:26], 16)
        else:
            try:
                self.residue_number = int(pdb_line[22:26])
            except ValueError:
                try:
                    self.residue_number = int(pdb_line[22:26], 16)
                    pdbstructure._residue_numbers_are_hex = True
                except ValueError:
                    # When VMD runs out of hex values it starts filling in the residue ID field with ****
                    # Look at the most recent atoms to figure out whether this is a new residue or not.
                    if pdbstructure._current_model is None or pdbstructure._current_model._current_chain is None or pdbstructure._current_model._current_chain._current_residue is None:
                         # This is the first residue in the model.
                        self.residue_number = pdbstructure._next_residue_number
                    else:
                        currentRes = pdbstructure._current_model._current_chain._current_residue
                        if currentRes.name_with_spaces != self.residue_name_with_spaces:
                            # The residue name has changed.
                            self.residue_number = pdbstructure._next_residue_number
                        elif self.name_with_spaces in currentRes.atoms_by_name:
                            # There is already an atom with this name.
                            self.residue_number = pdbstructure._next_residue_number
                        else:
                            self.residue_number = currentRes.number
        self.insertion_code = pdb_line[26]
        # coordinates, occupancy, and temperature factor belong in Atom.Location object
        x = float(pdb_line[30:38])
        y = float(pdb_line[38:46])
        z = float(pdb_line[46:54])
        try:
            occupancy = float(pdb_line[54:60])
        except ValueError:
            occupancy = 1.0
        try:
            temperature_factor = float(pdb_line[60:66])
        except ValueError:
            temperature_factor = 0.0
        self.locations = {}
        loc = Atom.Location(alternate_location_indicator, np.array([x,y,z]), occupancy, temperature_factor, self.residue_name_with_spaces)
        self.locations[alternate_location_indicator] = loc
        self.default_location_id = alternate_location_indicator
        # segment id, element_symbol, and formal_charge are not always present
        self.segment_id = pdb_line[72:76].strip()
        self.element_symbol = pdb_line[76:78].strip()
        try: self.formal_charge = int(pdb_line[78:80])
        except ValueError: self.formal_charge = None
        # figure out atom element
        try:
            # First try to find a sensible element symbol from columns 76-77
            self.element = element.get_by_symbol(self.element_symbol)
        except KeyError:
            # otherwise, deduce element from first two characters of atom name
            # remove digits found in some hydrogen atom names
            symbol = self.name_with_spaces[0:2].strip().lstrip("0123456789")
            try:
                # Some molecular dynamics PDB files, such as gromacs with ffamber force
                # field, include 4-character hydrogen atom names beginning with "H".
                # Hopefully elements like holmium (Ho) and mercury (Hg) will have fewer than four
                # characters in the atom name.  This problem is the fault of molecular
                # dynamics code authors who feel the need to make up their own atom
                # nomenclature because it is too tedious to read that provided by the PDB.
                # These are the same folks who invent their own meanings for biochemical terms
                # like "dipeptide".  Clowntards.
                if len(self.name) == 4 and self.name[0:1] == "H":
                    self.element = element.hydrogen
                else:
                    self.element = element.get_by_symbol(symbol)
            except KeyError:
                # OK, I give up
                self.element = None
        if pdbstructure is not None:
            pdbstructure._next_atom_number = self.serial_number+1
            pdbstructure._next_residue_number = self.residue_number+1
Example #24
0
def topology_from_numpy(atoms, bonds=None):
    """Create a mdtraj topology from numpy arrays

    Parameters
    ----------
    atoms : np.ndarray
        The atoms in the topology, represented as a data frame. This data
        frame should have columns "serial" (atom index), "name" (atom name),
        "element" (atom's element), "resSeq" (index of the residue)
        "resName" (name of the residue), "chainID" (index of the chain),
        and optionally "segmentID", following the same conventions
        as wwPDB 3.0 format.
    bonds : np.ndarray, shape=(n_bonds, 2), dtype=int, optional
        The bonds in the topology, represented as an n_bonds x 2 array
        of the indices of the atoms involved in each bond. Specifiying
        bonds here is optional. To create standard protein bonds, you can
        use `create_standard_bonds` to "fill in" the bonds on your newly
        created Topology object

    See Also
    --------
    create_standard_bonds
    """
    if bonds is None:
        bonds = np.zeros((0, 2))

    for col in ["name", "element", "resSeq", "resName", "chainID", "serial"]:
        if col not in atoms.dtype.names:
            raise ValueError('dataframe must have column %s' % col)

    if "segmentID" not in atoms.dtype.names:
        atoms["segmentID"] = ""

    from mdtraj.core.topology import Atom
    from mdtraj.core import element as elem
    out = mdtraj.Topology()

    # TODO: allow for h5py data sets here, is there a way to check generic ndarray interface?
    #if not isinstance(bonds, np.ndarray):
    #    raise TypeError('bonds must be an instance of numpy.ndarray. '
    #                    'You supplied a %s' % type(bonds))

    out._atoms = [None for _ in range(len(atoms))]

    N = np.arange(0, len(atoms))

    for ci in np.unique(atoms['chainID']):
        chain_atoms = atoms[atoms['chainID'] == ci]
        subN = N[atoms['chainID'] == ci]
        c = out.add_chain()

        for ri in np.unique(chain_atoms['resSeq']):
            residue_atoms = chain_atoms[chain_atoms['resSeq'] == ri]
            mask = subN[chain_atoms['resSeq'] == ri]
            indices = N[mask]
            rnames = residue_atoms['resName']
            residue_name = np.array(rnames)[0]
            segids = residue_atoms['segmentID']
            segment_id = np.array(segids)[0]
            if not np.all(rnames == residue_name):
                raise ValueError('All of the atoms with residue index %d '
                                 'do not share the same residue name' % ri)
            r = out.add_residue(residue_name.decode('ascii'), c, ri,
                                segment_id.decode('ascii'))

            for ix, atom in enumerate(residue_atoms):
                e = atom['element'].decode('ascii')
                a = Atom(atom['name'].decode('ascii'),
                         elem.get_by_symbol(e),
                         int(indices[ix]),
                         r,
                         serial=atom['serial'])
                out._atoms[indices[ix]] = a
                r._atoms.append(a)

    for ai1, ai2 in bonds:
        out.add_bond(out.atom(ai1), out.atom(ai2))

    out._numAtoms = out.n_atoms
    return out
Example #25
0
def load_prmtop(filename):
    """Load an AMBER prmtop topology file from disk.

    Parameters
    ----------
    filename : str
        Path to the prmtop file on disk.

    Returns
    -------
    top : md.Topology
        The resulting topology, as an md.Topology object.

    Notes
    -----
    Deprecated fields in the prmtop file are not loaded. This includes the
    BOX dimensions, which should be stored in trajectory files instead of the
    prmtop for systems with periodic boundary conditions. Because '.binpos'
    files do not store box dimensions, this means that unitcell information
    will be lost if you use .binpos + .prmtop files with MDTraj.
    
    Examples
    --------
    >>> topology = md.load_prmtop('mysystem.prmtop')
    >>> # or
    >>> trajectory = md.load('trajectory.mdcrd', top='system.prmtop')
    """
    top = topology.Topology()

    prmtop_version = None
    flags = []
    raw_format = {}
    raw_data = {}

    with open(filename, 'r') as f:
        for line in f:
            if line.startswith('%VERSION'):
                tag, prmtop_version = line.rstrip().split(None, 1)

            elif line.startswith('%FLAG'):
                tag, flag = line.rstrip().split(None, 1)
                flags.append(flag)
                raw_data[flag] = []

            elif line.startswith('%FORMAT'):
                format = line.rstrip()
                index0 = format.index('(')
                index1 = format.index(')')
                format = format[index0 + 1:index1]
                m = FORMAT_RE_PATTERN.search(format)
                raw_format[flags[-1]] = (format, m.group(1), m.group(2),
                                         m.group(3), m.group(4))

            elif flags \
                 and 'TITLE'==flags[-1] \
                 and not raw_data['TITLE']:
                raw_data['TITLE'] = line.rstrip()

            else:
                flag = flags[-1]
                format, numItems, itemType, itemLength, itemPrecision = raw_format[
                    flag]
                iLength = int(itemLength)
                line = line.rstrip()
                for index in range(0, len(line), iLength):
                    item = line[index:index + iLength]
                    if item:
                        raw_data[flag].append(item.strip())

    # Add atoms to the topology

    pdb.PDBTrajectoryFile._loadNameReplacementTables()
    previous_residue = None
    c = top.add_chain()

    n_atoms = int(_get_pointer_value('NATOM', raw_data))

    # built a dictionary telling us which atom belongs to which residue
    residue_pointer_dict = {}
    res_pointers = raw_data['RESIDUE_POINTER']
    first_atom = [int(p) - 1 for p in res_pointers]  # minus 1 necessary
    first_atom.append(n_atoms)
    res = 0
    for i in range(n_atoms):
        while first_atom[res + 1] <= i:
            res += 1
        residue_pointer_dict[i] = res

    # add each residue/atom to the topology object
    for index in range(n_atoms):

        res_number = residue_pointer_dict[index]
        if res_number != previous_residue:

            previous_residue = res_number

            # check
            res_name = raw_data['RESIDUE_LABEL'][
                residue_pointer_dict[index]].strip()
            if res_name in pdb.PDBTrajectoryFile._residueNameReplacements:
                res_name = pdb.PDBTrajectoryFile._residueNameReplacements[
                    res_name]
            r = top.add_residue(res_name, c)

            if res_name in pdb.PDBTrajectoryFile._atomNameReplacements:
                atom_replacements = pdb.PDBTrajectoryFile._atomNameReplacements[
                    res_name]
            else:
                atom_replacements = {}

        atom_name = raw_data['ATOM_NAME'][index].strip()
        if atom_name in atom_replacements:
            atom_name = atom_replacements[atom_name]

        # Get the element from the prmtop file if available
        if 'ATOMIC_NUMBER' in raw_data:
            try:
                element = elem.Element.getByAtomicNumber(
                    int(raw_data['ATOMIC_NUMBER'][index]))
            except KeyError:
                element = None
        else:
            # Try to guess the element from the atom name.

            upper = atom_name.upper()
            if upper.startswith('CL'):
                element = elem.chlorine
            elif upper.startswith('NA'):
                element = elem.sodium
            elif upper.startswith('MG'):
                element = elem.magnesium
            elif upper.startswith('ZN'):
                element = elem.zinc
            else:
                try:
                    element = elem.get_by_symbol(atom_name[0])
                except KeyError:
                    element = None

        top.add_atom(atom_name, element, r)

    # Add bonds to the topology
    bond_pointers = raw_data["BONDS_INC_HYDROGEN"] + raw_data[
        "BONDS_WITHOUT_HYDROGEN"]
    atoms = list(top.atoms)

    bond_list = []
    for ii in range(0, len(bond_pointers), 3):
        if int(bond_pointers[ii]) < 0 or int(bond_pointers[ii + 1]) < 0:
            raise Exception("Found negative bonded atom pointers %s" %
                            ((bond_pointers[ii], bond_pointers[ii + 1]), ))

        else:
            bond_list.append(
                (int(bond_pointers[ii]) // 3, int(bond_pointers[ii + 1]) // 3))

    for bond in bond_list:
        top.add_bond(atoms[bond[0]], atoms[bond[1]])

    return top
Example #26
0
def load_prmtop(filename):
    """Load an AMBER prmtop topology file from disk.

    Parameters
    ----------
    filename : str
        Path to the prmtop file on disk.

    Returns
    -------
    top : md.Topology
        The resulting topology, as an md.Topology object.

    Notes
    -----
    Deprecated fields in the prmtop file are not loaded. This includes the
    BOX dimensions, which should be stored in trajectory files instead of the
    prmtop for systems with periodic boundary conditions. Because '.binpos'
    files do not store box dimensions, this means that unitcell information
    will be lost if you use .binpos + .prmtop files with MDTraj.
    
    Examples
    --------
    >>> topology = md.load_prmtop('mysystem.prmtop')
    >>> # or
    >>> trajectory = md.load('trajectory.mdcrd', top='system.prmtop')
    """
    top = topology.Topology()

    prmtop_version = None
    flags      = []
    raw_format = {}
    raw_data   = {}
    ignoring = False

    with open(filename, 'r') as f:
        for line in f:
            if line[0] == '%':
                if line.startswith('%VERSION'):
                    tag, prmtop_version = line.rstrip().split(None, 1)

                elif line.startswith('%FLAG'):
                    tag, flag = line.rstrip().split(None, 1)
                    flags.append(flag)
                    raw_data[flag] = []
                    ignoring = flag in ('TITLE', 'CTITLE')

                elif line.startswith('%FORMAT'):
                    format = line.rstrip()
                    index0=format.index('(')
                    index1=format.index(')')
                    format = format[index0+1:index1]
                    m = FORMAT_RE_PATTERN.search(format)
                    if m is None:
                        ignoring = True
                    else:
                        raw_format[flags[-1]] = (format, m.group(1), m.group(2), m.group(3), m.group(4))

                elif line.startswith('%COMMENT'):
                    continue

            elif not ignoring:
                flag=flags[-1]
                format, numItems, itemType, itemLength, itemPrecision = raw_format[flag]
                iLength=int(itemLength)
                line = line.rstrip()
                for index in range(0, len(line), iLength):
                    item = line[index:index+iLength]
                    if item:
                        raw_data[flag].append(item.strip())

    # Add atoms to the topology

    pdb.PDBTrajectoryFile._loadNameReplacementTables()
    previous_residue = None
    c = top.add_chain()

    n_atoms = int(_get_pointer_value('NATOM', raw_data))

    # built a dictionary telling us which atom belongs to which residue
    residue_pointer_dict = {}
    res_pointers = raw_data['RESIDUE_POINTER']        
    first_atom = [int(p)-1 for p in res_pointers] # minus 1 necessary
    first_atom.append(n_atoms)
    res = 0
    for i in range(n_atoms):
        while first_atom[res+1] <= i:
            res += 1
        residue_pointer_dict[i] = res

    # add each residue/atom to the topology object
    for index in range(n_atoms):

        res_number = residue_pointer_dict[index]
        if res_number != previous_residue:

            previous_residue = res_number

            # check
            res_name = raw_data['RESIDUE_LABEL'][residue_pointer_dict[index]].strip()
            if res_name in pdb.PDBTrajectoryFile._residueNameReplacements:
                res_name = pdb.PDBTrajectoryFile._residueNameReplacements[res_name]
            r = top.add_residue(res_name, c)

            if res_name in pdb.PDBTrajectoryFile._atomNameReplacements:
                atom_replacements = pdb.PDBTrajectoryFile._atomNameReplacements[res_name]
            else:
                atom_replacements = {}

        atom_name = raw_data['ATOM_NAME'][index].strip()
        if atom_name in atom_replacements:
            atom_name = atom_replacements[atom_name]

        # Get the element from the prmtop file if available
        if 'ATOMIC_NUMBER' in raw_data:
            try:
                element = elem.Element.getByAtomicNumber(int(raw_data['ATOMIC_NUMBER'][index]))
            except KeyError:
                element = elem.virtual
        else:
            # Try to guess the element from the atom name.

            upper = atom_name.upper()
            if upper.startswith('CL'):
                element = elem.chlorine
            elif upper.startswith('NA'):
                element = elem.sodium
            elif upper.startswith('MG'):
                element = elem.magnesium
            elif upper.startswith('ZN'):
                element = elem.zinc
            else:
                try:
                    element = elem.get_by_symbol(atom_name[0])
                except KeyError:
                    element = elem.virtual

        top.add_atom(atom_name, element, r)

    # Add bonds to the topology
    bond_pointers = raw_data["BONDS_INC_HYDROGEN"] + raw_data["BONDS_WITHOUT_HYDROGEN"]
    atoms = list(top.atoms)

    bond_list = []
    for ii in range(0,len(bond_pointers),3):
        if int(bond_pointers[ii])<0 or int(bond_pointers[ii+1])<0:
            raise Exception("Found negative bonded atom pointers %s"
                             % ((bond_pointers[ii],
                                 bond_pointers[ii+1]),))

        else:
            bond_list.append((int(bond_pointers[ii])//3, int(bond_pointers[ii+1])//3))

    for bond in bond_list:
        top.add_bond(atoms[bond[0]], atoms[bond[1]])

    return top
Example #27
0
def _new_json_to_mdtraj_topology(json_string):
    """Another implementation of converting JSON to mdtraj.

    TODO what is the difference between this and the other one?

    Parameters
    ----------
    json_string :
        

    Returns
    -------

    """

    topology_dict = json.loads(json_string)

    # start a new topology
    topology = mdj.Topology()

    # to not destroy the indexing in this file we will first go
    # through the topology and get all the atom dictionaries (adding
    # attributes for the residue indices), then loop through them and
    # make residues and chains when needed.

    atoms = {}
    residues = {}

    # the chains will just be collections of residues
    chains = []

    # loop through all the chains
    for chain_dict in topology_dict['chains']:

        # get the index and add the chain then save it
        chain_idx = chain_dict['index']
        chain = topology.add_chain()
        chains[chain_idx] = chain

        # then go through the residues here
        for residue_dict in chain_dict['residues']:

            residue_idx = residue_dict['index']

            resSeq = residue_dict["resSeq"]
            segment_id = residue_dict["segmentID"]

            residue = topology.add_residue(
                residue_dict['name'],
                chain,
                resSeq=residue_dict['resSeq'],
                segment_id=residue_dict['segmentID'])

            for atom_dict in sorted(residue_dict['atoms'],
                                    key=operator.itemgetter('index')):
                try:
                    element = elem.get_by_symbol(atom_dict['element'])
                except KeyError:
                    element = elem.virtual
                topology.add_atom(atom_dict['name'], element, residue)

    for index1, index2 in topology_dict['bonds']:
        topology.add_bond(atoms[index1], atoms[index2])

    return topology