Ejemplo n.º 1
0
def load_prmtop(filename):
    """Load an AMBER prmtop topology file from disk.

    Parameters
    ----------
    filename : str
        Path to the prmtop file on disk.

    Returns
    -------
    top : md.Topology
        The resulting topology, as an md.Topology object.

    Notes
    -----
    Deprecated fields in the prmtop file are not loaded. This includes the
    BOX dimensions, which should be stored in trajectory files instead of the
    prmtop for systems with periodic boundary conditions. Because '.binpos'
    files do not store box dimensions, this means that unitcell information
    will be lost if you use .binpos + .prmtop files with MDTraj.
    
    Examples
    --------
    >>> topology = md.load_prmtop('mysystem.prmtop')
    >>> # or
    >>> trajectory = md.load('trajectory.mdcrd', top='system.prmtop')
    """
    top = topology.Topology()

    prmtop_version = None
    flags = []
    raw_format = {}
    raw_data = {}

    with open(filename, 'r') as f:
        for line in f:
            if line.startswith('%VERSION'):
                tag, prmtop_version = line.rstrip().split(None, 1)

            elif line.startswith('%FLAG'):
                tag, flag = line.rstrip().split(None, 1)
                flags.append(flag)
                raw_data[flag] = []

            elif line.startswith('%FORMAT'):
                format = line.rstrip()
                index0 = format.index('(')
                index1 = format.index(')')
                format = format[index0 + 1:index1]
                m = FORMAT_RE_PATTERN.search(format)
                raw_format[flags[-1]] = (format, m.group(1), m.group(2),
                                         m.group(3), m.group(4))

            elif flags \
                 and 'TITLE'==flags[-1] \
                 and not raw_data['TITLE']:
                raw_data['TITLE'] = line.rstrip()

            else:
                flag = flags[-1]
                format, numItems, itemType, itemLength, itemPrecision = raw_format[
                    flag]
                iLength = int(itemLength)
                line = line.rstrip()
                for index in range(0, len(line), iLength):
                    item = line[index:index + iLength]
                    if item:
                        raw_data[flag].append(item.strip())

    # Add atoms to the topology

    pdb.PDBTrajectoryFile._loadNameReplacementTables()
    previous_residue = None
    c = top.add_chain()

    n_atoms = int(_get_pointer_value('NATOM', raw_data))

    # built a dictionary telling us which atom belongs to which residue
    residue_pointer_dict = {}
    res_pointers = raw_data['RESIDUE_POINTER']
    first_atom = [int(p) - 1 for p in res_pointers]  # minus 1 necessary
    first_atom.append(n_atoms)
    res = 0
    for i in range(n_atoms):
        while first_atom[res + 1] <= i:
            res += 1
        residue_pointer_dict[i] = res

    # add each residue/atom to the topology object
    for index in range(n_atoms):

        res_number = residue_pointer_dict[index]
        if res_number != previous_residue:

            previous_residue = res_number

            # check
            res_name = raw_data['RESIDUE_LABEL'][
                residue_pointer_dict[index]].strip()
            if res_name in pdb.PDBTrajectoryFile._residueNameReplacements:
                res_name = pdb.PDBTrajectoryFile._residueNameReplacements[
                    res_name]
            r = top.add_residue(res_name, c)

            if res_name in pdb.PDBTrajectoryFile._atomNameReplacements:
                atom_replacements = pdb.PDBTrajectoryFile._atomNameReplacements[
                    res_name]
            else:
                atom_replacements = {}

        atom_name = raw_data['ATOM_NAME'][index].strip()
        if atom_name in atom_replacements:
            atom_name = atom_replacements[atom_name]

        # Get the element from the prmtop file if available
        if 'ATOMIC_NUMBER' in raw_data:
            try:
                element = elem.Element.getByAtomicNumber(
                    int(raw_data['ATOMIC_NUMBER'][index]))
            except KeyError:
                element = None
        else:
            # Try to guess the element from the atom name.

            upper = atom_name.upper()
            if upper.startswith('CL'):
                element = elem.chlorine
            elif upper.startswith('NA'):
                element = elem.sodium
            elif upper.startswith('MG'):
                element = elem.magnesium
            elif upper.startswith('ZN'):
                element = elem.zinc
            else:
                try:
                    element = elem.get_by_symbol(atom_name[0])
                except KeyError:
                    element = None

        top.add_atom(atom_name, element, r)

    # Add bonds to the topology
    bond_pointers = raw_data["BONDS_INC_HYDROGEN"] + raw_data[
        "BONDS_WITHOUT_HYDROGEN"]
    atoms = list(top.atoms)

    bond_list = []
    for ii in range(0, len(bond_pointers), 3):
        if int(bond_pointers[ii]) < 0 or int(bond_pointers[ii + 1]) < 0:
            raise Exception("Found negative bonded atom pointers %s" %
                            ((bond_pointers[ii], bond_pointers[ii + 1]), ))

        else:
            bond_list.append(
                (int(bond_pointers[ii]) // 3, int(bond_pointers[ii + 1]) // 3))

    for bond in bond_list:
        top.add_bond(atoms[bond[0]], atoms[bond[1]])

    return top
Ejemplo n.º 2
0
def load_psf(fname):
    """Load a CHARMM or XPLOR PSF file from disk

    Parameters
    ----------
    fname : str
        Path to the PSF file on disk

    Returns
    -------
    top : md.Topology
        The resulting topology as an md.Topology object

    Notes
    -----
    Only the bond and atom sections are read in, and all atoms are added to the
    same chain in the topology

    Raises
    ------
    PSFError if any parsing errors occur

    Examples
    --------
    >>> topology = md.load_psf('mysystem.psf')
    >>> # or
    >>> trajectory = md.load('trajectory.dcd', top='system.psf')
    """

    top = topology.Topology()

    with open(fname, 'r') as f:
        line = f.readline()
        if not line.startswith('PSF'):
            raise PSFError('Unrecognized PSF file.')

        # Store all of the sections and store them in a dict
        f.readline()
        psfsections = dict()
        while True:
            try:
                sec, ptr, data = _parse_psf_section(f)
            except _PSFEOF:
                break
            psfsections[sec] = (ptr, data)
            # We only have to parse up to the NBOND section
            if sec == 'NBOND': break

    prev_residue = (None, None, None)

    pdb.PDBTrajectoryFile._loadNameReplacementTables()

    natom = _convert(psfsections['NATOM'][0], int, 'natom')
    last_chain = None
    for i in range(natom):
        words = psfsections['NATOM'][1][i].split()
        atid = _convert(words[0], int, 'atom index')
        if atid != i + 1:
            raise PSFError('Nonsequential atom indices detected!')
        segid = words[1]
        resid = _convert(words[2], int, 'residue number')
        rname = words[3]
        name = words[4]
        #       attype = words[5]
        #       charge = _convert(words[6], float, 'partial atomic charge')
        mass = _convert(words[7], float, 'atomic mass')
        if last_chain != segid:
            c = top.add_chain()
            last_chain = segid
        curr_residue = (resid, rname, segid)
        if prev_residue != curr_residue:
            prev_residue = curr_residue
            try:
                rname = pdb.PDBTrajectoryFile._residueNameReplacements[rname]
            except KeyError:
                pass
            r = top.add_residue(rname, c, resid)

        try:
            name = pdb.PDBTrajectoryFile._atomNameReplacements[rname][name]
        except KeyError:
            pass

        # Try to guess the element from the atom name for some of the common
        # ions using the names that CHARMM assigns to ions. If it's not one of
        # these 'weird' ion names, look up the element by mass. If the mass is
        # 0, assume a lone pair
        upper = name.upper()
        if upper.startswith('CLA'):
            element = elem.chlorine
        elif upper.startswith('SOD'):
            element = elem.sodium
        elif upper.startswith('POT'):
            element = elem.potassium
        elif upper == 'CAL':
            element = elem.calcium
        elif mass == 0:
            element = None
        else:
            element = elem.Element.getByMass(mass * u.dalton)
        top.add_atom(name, element, r)

    # Add bonds to the topology
    atoms = list(top.atoms)
    bond_data = psfsections['NBOND'][1]
    nbond = _convert(psfsections['NBOND'][0], int, 'number of bonds')
    if len(bond_data) != nbond * 2:
        raise PSFError('Got %d indexes for %d bonds' % (len(bond_data), nbond))

    for i in range(nbond):
        i2 = i * 2
        top.add_bond(atoms[bond_data[i2] - 1], atoms[bond_data[i2 + 1] - 1])

    return top