Ejemplo n.º 1
0
 def __init__(self, name=''):
     self.atoms = AtomList()
     self.bonds = TrackedList()
     self.name = name
     self.head = None
     self.tail = None
     self.connections = []
     self._atomnames = set()
     self.type = UNKNOWN
     self.first_patch = None
     self.last_patch = None
     self.groups = []
Ejemplo n.º 2
0
 def __init__(self, name=''):
     self.atoms = AtomList()
     self.bonds = TrackedList()
     self.name = name
     self.head = None
     self.tail = None
     self.connections = []
     self.type = UNKNOWN
     self.first_patch = None
     self.last_patch = None
     self.groups = []
     self.overload_level = 0
     self._map = dict()
Ejemplo n.º 3
0
 def __init__(self, name=''):
     self.atoms = AtomList()
     self.bonds = TrackedList()
     self.lonepairs = list() # TODO: Should this be a TrackedList?
     self.name = name
     self.head = None
     self.tail = None
     self.connections = []
     self.type = UNKNOWN
     self.first_patch = None
     self.last_patch = None
     self.groups = []
     self.override_level = 0
     self._map = dict()
     self._impr = []
Ejemplo n.º 4
0
 def __init__(self, name=""):
     self.atoms = AtomList()
     self.bonds = TrackedList()
     self.name = name
     self.head = None
     self.tail = None
     self.connections = []
     self._atomnames = set()
     self.type = UNKNOWN
     self.first_patch = None
     self.last_patch = None
     self.groups = []
Ejemplo n.º 5
0
 def __init__(self, name=''):
     self.atoms = AtomList()
     self.bonds = TrackedList()
     self.name = name
     self.head = None
     self.tail = None
     self.connections = []
     self.type = UNKNOWN
     self.first_patch = None
     self.last_patch = None
     self.groups = []
     self.overload_level = 0
     self._map = dict()
Ejemplo n.º 6
0
class ResidueTemplate(object):
    """
    This is a residue template, which contains a listing of the atoms in the
    residue template as well as a mapping of which atoms are bonded to other
    atoms.

    Parameters
    ----------
    name : str, optional
        If provided, this is the name of the residue

    Attributes
    ----------
    atoms : :class:`AtomList`
        List of atoms in this residue
    bonds : :class:`TrackedList`
        List of the bonds between the atoms in this residue
    coordinates : np.ndarray(natom, 3)
        The partial atomic coordinates
    connections : list of :class:`Atom`
        A list of all atoms that should form connections with atoms of another
        residue *besides* the head and tail atoms
    head : :class:`Atom` or None
        The atom that is connected to the residue that comes before this one
    tail : :class:`Atom` or None
        The atom that is connected to the *next* residue after this one
    first_patch : :class:`ResidueTemplate` or None
        If it is not None, this is the patch whose tail is added to the head
        atom of this residue when this residue is the first in a chain
    last_patch : :class:`ResidueTemplate` or None
        If it is not None, this is the patch whose head is added to the tail
        atom of this residue when this residue is the last in a chain
    groups : list of list(:class:`Atom`)
        If set, each group is a list of Atom instances making up each group
    overload_level : integer
        For use with OpenMM ResidueTemplates. If OpenMM ForceField is given multiple
        identically-matching residue templates with the same names it choses
        (overloads with) the one with the highest overload_level
        (overloadLevel in OpenMM). Default is 0.
    """
    def __init__(self, name=''):
        self.atoms = AtomList()
        self.bonds = TrackedList()
        self.name = name
        self.head = None
        self.tail = None
        self.connections = []
        self.type = UNKNOWN
        self.first_patch = None
        self.last_patch = None
        self.groups = []
        self.overload_level = 0
        self._map = dict()

    def __repr__(self):
        if self.head is not None:
            head = self.head.name
        else:
            head = 'None'
        if self.tail is not None:
            tail = self.tail.name
        else:
            tail = 'None'
        return '<%s %s: %d atoms; %d bonds; head=%s; tail=%s>' % (
            type(self).__name__, self.name, len(self.atoms), len(
                self.bonds), head, tail)

    @property
    def map(self):
        return self._map

    def add_atom(self, atom):
        """ Adds an atom to this residue template

        Parameters
        ----------
        atom : :class:`Atom`
            The atom to add to this residue

        Raises
        ------
        ValueError if ``atom`` has the same name as another atom in this
        residue already
        """
        if atom.name in self._map:
            raise ValueError('Residue already has atom named %s' % atom.name)
        atom.residue = self
        self.atoms.append(atom)
        self._map[atom.name] = atom

    def add_bond(self, atom1, atom2, order=1.0):
        """ Adds a bond between the two provided atoms in the residue

        Parameters
        ----------
        atom1 : :class:`Atom` or int or str
            One of the atoms in the bond. It must be in the ``atoms`` list of
            this ResidueTemplate. It can also be the atom index (index from 0)
            of the atom in the bond.
        atom2 : :class:`Atom` or int or str
            The other atom in the bond. It must be in the ``atoms`` list of this
            ResidueTemplate. It can also be the atom index (index from 0) of the
            atom in the bond.
        order : float
            The bond order of this bond. Bonds are classified as follows:
                1.0 -- single bond
                2.0 -- double bond
                3.0 -- triple bond
                1.5 -- aromatic bond
                1.25 -- amide bond
            Default is 1.0

        Raises
        ------
        IndexError if atom1 or atom2 are integers that are out of range of the
        number of atoms already in this template

        RuntimeError if atom1 or atom2 are :class:`Atom` instances but they are
        *not* in the atoms list of this ResidueTemplate

        Notes
        -----
        If atom1 and atom2 are already bonded, this routine does nothing. If
        atom1 or atom2 are strings, then they will match the first instance of
        the atom name that is the same as the atom name passed.
        """
        if not isinstance(atom1, Atom):
            atom1 = self[atom1]
        if not isinstance(atom2, Atom):
            atom2 = self[atom2]
        if atom1.list is not self.atoms or atom2.list is not self.atoms:
            raise RuntimeError('Both atoms must belong to template.atoms')
        # Do not add the same bond twice
        if atom1 not in atom2.bond_partners:
            self.bonds.append(Bond(atom1, atom2, order=order))

    @classmethod
    def from_residue(cls, residue):
        """
        This constructor creates a ResidueTemplate from a particular Residue
        object

        Parameters
        ----------
        residue : :class:`Residue`
            The residue from which to create a template
        """
        inst = cls(name=residue.name)
        for atom in residue:
            inst.add_atom(_copy.copy(atom))
        for atom in residue:
            for bond in atom.bonds:
                try:
                    i1 = residue.atoms.index(bond.atom1)
                    i2 = residue.atoms.index(bond.atom2)
                except ValueError:
                    if bond.atom1 in residue:
                        oatom = bond.atom2
                        idx = residue.atoms.index(bond.atom1)
                    else:
                        oatom = bond.atom1
                        idx = residue.atoms.index(bond.atom2)
                    if oatom.residue.idx == residue.idx - 1:
                        inst.head = inst.atoms[idx]
                    elif oatom.residue.idx == residue.idx + 1:
                        inst.tail = inst.atoms[idx]
                    elif oatom.residue.idx == residue.idx:
                        # Don't know WHAT to do with it
                        warnings.warn('Cannot determine head/tail for '
                                      'unordered residues.')
                    else:
                        # Disulfide or something... not head or tail
                        inst.connections.append(inst.atoms[idx])
                else:
                    inst.add_bond(i1, i2)
        return inst

    @property
    def coordinates(self):
        """ Atomic coordinates, in Angstroms, of all atoms in the template """
        try:
            return self._crd
        except AttributeError:
            self._crd = np.array([[a.xx, a.xy, a.xz] for a in self])
        return self._crd

    # Make ResidueTemplate look like a container of atoms, also indexable by the
    # atom name
    def __len__(self):
        return len(self.atoms)

    def __iter__(self):
        return iter(self.atoms)

    def __contains__(self, atom):
        if isinstance(atom, Atom):
            return atom in self.atoms
        if isinstance(atom, str):
            return atom in self._map
        raise AssertionError('Should not be here!')

    def __copy__(self):
        other = type(self)(name=self.name)

        for atom in self.atoms:
            other.add_atom(_copy.copy(atom))
        for bond in self.bonds:
            other.add_bond(bond.atom1.idx, bond.atom2.idx)
        other.type = self.type

        if self.head is not None:
            other.head = other.atoms[self.head.idx]
        if self.tail is not None:
            other.tail = other.atoms[self.tail.idx]
        for connection in self.connections:
            other.connections.append(other.atoms[connection.idx])
        other.first_patch = self.first_patch
        other.last_patch = self.last_patch

        return other

    def __getitem__(self, idx):
        if isinstance(idx, str):
            for atom in self.atoms:
                if atom.name == idx:
                    return atom
            raise IndexError('Atom %s not found in %s' % (idx, self.name))
        elif isinstance(idx, (list, tuple)):
            return [self[key] for key in idx]
        else:
            return self.atoms[idx]

    def fix_charges(self, to=None, precision=4):
        """
        Adjusts the partial charge of all atoms in the residue to match the
        requested target charge. The default target charge is the closest
        integer

        Parameters
        ----------
        to : float, optional
            The desired net charge of this residue template. Default is the
            closest integer charge
        precision : int, optional
            The number of decimal places that each charge should be rounded to.
            Default is 4

        Returns
        -------
        self : :class:`ResidueTemplate`
            The current residue template whose charges are being modified

        Notes
        -----
        This method modifies the atomic charges of this residue template
        in-place. Any residual charge (which is accumulated roundoff beyond the
        requested precision) is added to the first atom of the residue. This
        will typically be 10^-precision in magnitude, and should almost never be
        higher than 2*10^-precision. As long as a reasonable precision is chosen
        (no fewer than 3 or 4 decimal places), this will have only a negligible
        impact on a force field.

        If provided, "to" will be rounded to the ``precision``'th decimal place
        to make sure that the sum of the charges come out as close as possible
        to the target charge while still obeying the requested precision.

        Raises
        ------
        ValueError
            If you try to call fix_charges on a residue template with no atoms
        """
        if not self.atoms:
            raise ValueError('Cannot fix charges on an empty residue')
        net_charge = sum(a.charge for a in self.atoms)
        if to is None:
            to = round(net_charge)
        else:
            # We need to make sure
            to = round(to, precision)
        if net_charge == to:
            return self

        smear = (to - net_charge) / len(self)
        for atom in self:
            atom.charge = round(atom.charge + smear, precision)

        # Dump the extra tiny bit (O(10^-precision)) on the first atom
        self.atoms[0].charge += to - sum(atom.charge for atom in self.atoms)

        return self

    def to_dataframe(self):
        """ Create a pandas dataframe from the atom information

        Returns
        -------
        df : :class:`pandas.DataFrame`
            The pandas DataFrame with all of the atomic properties

        Notes
        -----
        The DataFrame will be over all atoms. The columns will be the attributes
        of the atom (as well as its containing residue). Some columns will
        *always* exist. Others will only exist if those attributes have been set
        on the Atom instances (see the :class:`Atom` docs for possible
        attributes and their meaning). The columns that will always be present
        are:

            - number : int
            - name : str
            - type : str
            - atomic_number : int
            - charge : float
            - mass : float
            - nb_idx : int
            - solvent_radius : float
            - screen : float
            - occupancy : float
            - bfactor : float
            - altloc : str
            - tree : str
            - join : int
            - irotat : int
            - rmin : float
            - epsilon : float
            - rmin_14 : float
            - epsilon_14 : float

        The following attributes are optionally present if they were present in
        the original file defining the structure:

            - xx : float (x-coordinate position)
            - xy : float (y-coordinate position)
            - xz : float (z-coordinate position)
            - vx : float (x-coordinate velocity)
            - vy : float (y-coordinate velocity)
            - vz : float (z-coordinate velocity)
        """
        import pandas as pd
        ret = pd.DataFrame()

        ret['number'] = [atom.number for atom in self.atoms]
        ret['name'] = [atom.name for atom in self.atoms]
        ret['type'] = [atom.type for atom in self.atoms]
        ret['atomic_number'] = [atom.atomic_number for atom in self.atoms]
        ret['charge'] = [atom.charge for atom in self.atoms]
        ret['mass'] = [atom.mass for atom in self.atoms]
        ret['nb_idx'] = [atom.nb_idx for atom in self.atoms]
        ret['solvent_radius'] = [atom.solvent_radius for atom in self.atoms]
        ret['screen'] = [atom.screen for atom in self.atoms]
        ret['occupancy'] = [atom.occupancy for atom in self.atoms]
        ret['bfactor'] = [atom.bfactor for atom in self.atoms]
        ret['altloc'] = [atom.altloc for atom in self.atoms]
        ret['tree'] = [atom.tree for atom in self.atoms]
        ret['join'] = [atom.join for atom in self.atoms]
        ret['irotat'] = [atom.irotat for atom in self.atoms]
        ret['rmin'] = [atom.rmin for atom in self.atoms]
        ret['epsilon'] = [atom.epsilon for atom in self.atoms]
        ret['rmin_14'] = [atom.rmin_14 for atom in self.atoms]
        ret['epsilon_14'] = [atom.epsilon_14 for atom in self.atoms]
        ret['resname'] = [atom.residue.name for atom in self.atoms]

        # Now for optional attributes
        # Coordinates
        try:
            coords = pd.DataFrame([[atom.xx, atom.xy, atom.xz]
                                   for atom in self.atoms],
                                  columns=['xx', 'xy', 'xz'])
        except AttributeError:
            pass
        else:
            ret = ret.join(coords)
        # Velocities
        try:
            vels = pd.DataFrame([[atom.vx, atom.vy, atom.vz]
                                 for atom in self.atoms],
                                columns=['vx', 'vy', 'vz'])
        except AttributeError:
            pass
        else:
            ret = ret.join(vels)
        return ret

    def to_structure(self):
        """
        Generates a Structure instance with a single residue from this
        ResidueTemplate

        Returns
        -------
        struct : :class:`parmed.structure.Structure`
            The Structure with all of the bonds and connectivity of this
            template
        """
        struct = Structure()
        for atom in self:
            struct.add_atom(_copy.copy(atom), self.name, 0)
        for bond in self.bonds:
            struct.bonds.append(
                Bond(struct.atoms[bond.atom1.idx],
                     struct.atoms[bond.atom2.idx]))
        return struct

    def save(self, fname, format=None, overwrite=False, **kwargs):
        """
        Saves the current ResidueTemplate in the requested file format.
        Supported formats can be specified explicitly or determined by file-name
        extension. The following formats are supported, with the recognized
        suffix shown in parentheses:

            - MOL2 (.mol2)
            - MOL3 (.mol3)
            - OFF (.lib/.off)
            - PDB (.pdb)
            - PQR (.pqr)

        Parameters
        ----------
        fname : str
            Name of the file to save. If ``format`` is ``None`` (see below), the
            file type will be determined based on the filename extension. If the
            type cannot be determined, a ValueError is raised.
        format : str, optional
            The case-insensitive keyword specifying what type of file ``fname``
            should be saved as. If ``None`` (default), the file type will be
            determined from filename extension of ``fname``
        overwrite : bool, optional
            If True, allow the target file to be overwritten. Otherwise, an
            IOError is raised if the file exists. Default is False
        kwargs : keyword-arguments
            Remaining arguments are passed on to the file writing routines that
            are called by this function

        Raises
        ------
        ValueError if either filename extension or ``format`` are not recognized
        TypeError if the structure cannot be converted to the desired format for
        whatever reason
        """
        from parmed.amber.offlib import AmberOFFLibrary
        from parmed.formats.mol2 import Mol2File
        extmap = {
            '.mol2': 'MOL2',
            '.mol3': 'MOL3',
            '.off': 'OFFLIB',
            '.lib': 'OFFLIB',
            '.pdb': 'PDB',
            '.pqr': 'PQR',
        }
        if format is not None:
            format = format.upper()
        else:
            base, ext = os.path.splitext(fname)
            if ext in ('.bz2', '.gz'):
                ext = os.path.splitext(base)[1]
            if ext in extmap:
                format = extmap[ext]
            else:
                raise ValueError('Could not determine file type of %s' % fname)
        if format == 'MOL2':
            Mol2File.write(self, fname, mol3=False, **kwargs)
        elif format == 'MOL3':
            Mol2File.write(self, fname, mol3=True, **kwargs)
        elif format in ('OFFLIB', 'OFF'):
            AmberOFFLibrary.write({self.name: self}, fname, **kwargs)
        elif format in ('PDB', 'PQR'):
            self.to_structure().save(fname,
                                     format=format,
                                     overwrite=overwrite,
                                     **kwargs)
        else:
            raise ValueError('Unrecognized format for ResidueTemplate save')
Ejemplo n.º 7
0
class ResidueTemplate(object):
    """
    This is a residue template, which contains a listing of the atoms in the
    residue template as well as a mapping of which atoms are bonded to other
    atoms.

    Parameters
    ----------
    name : str, optional
        If provided, this is the name of the residue

    Attributes
    ----------
    atoms : :class:`AtomList`
        List of atoms in this residue
    bonds : :class:`TrackedList`
        List of the bonds between the atoms in this residue
    coordinates : np.ndarray(natom, 3)
        The partial atomic coordinates
    connections : list of :class:`Atom`
        A list of all atoms that should form connections with atoms of another
        residue *besides* the head and tail atoms
    head : :class:`Atom` or None
        The atom that is connected to the residue that comes before this one
    tail : :class:`Atom` or None
        The atom that is connected to the *next* residue after this one
    first_patch : :class:`ResidueTemplate` or None
        If it is not None, this is the patch whose tail is added to the head
        atom of this residue when this residue is the first in a chain
    last_patch : :class:`ResidueTemplate` or None
        If it is not None, this is the patch whose head is added to the tail
        atom of this residue when this residue is the last in a chain
    groups : list of list(:class:`Atom`)
        If set, each group is a list of Atom instances making up each group
    """

    def __init__(self, name=""):
        self.atoms = AtomList()
        self.bonds = TrackedList()
        self.name = name
        self.head = None
        self.tail = None
        self.connections = []
        self._atomnames = set()
        self.type = UNKNOWN
        self.first_patch = None
        self.last_patch = None
        self.groups = []

    def __repr__(self):
        if self.head is not None:
            head = self.head.name
        else:
            head = "None"
        if self.tail is not None:
            tail = self.tail.name
        else:
            tail = "None"
        return "<%s %s: %d atoms; %d bonds; head=%s; tail=%s>" % (
            type(self).__name__,
            self.name,
            len(self.atoms),
            len(self.bonds),
            head,
            tail,
        )

    def add_atom(self, atom):
        """ Adds an atom to this residue template

        Parameters
        ----------
        atom : :class:`Atom`
            The atom to add to this residue

        Raises
        ------
        ValueError if ``atom`` has the same name as another atom in this
        residue already
        """
        if atom.name in self._atomnames:
            raise ValueError("Residue already has atom named %s" % atom.name)
        atom.residue = self
        self.atoms.append(atom)
        self._atomnames.add(atom.name)

    def add_bond(self, atom1, atom2):
        """ Adds a bond between the two provided atoms in the residue

        Parameters
        ----------
        atom1 : :class:`Atom` or int or str
            One of the atoms in the bond. It must be in the ``atoms`` list of
            this ResidueTemplate. It can also be the atom index (index from 0)
            of the atom in the bond.
        atom2 : :class:`Atom` or int or str
            The other atom in the bond. It must be in the ``atoms`` list of this
            ResidueTemplate. It can also be the atom index (index from 0) of the
            atom in the bond.

        Raises
        ------
        IndexError if atom1 or atom2 are integers that are out of range of the
        number of atoms already in this template

        RuntimeError if atom1 or atom2 are :class:`Atom` instances but they are
        *not* in the atoms list of this ResidueTemplate

        Notes
        -----
        If atom1 and atom2 are already bonded, this routine does nothing. If
        atom1 or atom2 are strings, then they will match the first instance of
        the atom name that is the same as the atom name passed.
        """
        if not isinstance(atom1, Atom):
            atom1 = self[atom1]
        if not isinstance(atom2, Atom):
            atom2 = self[atom2]
        if atom1.list is not self.atoms or atom2.list is not self.atoms:
            raise RuntimeError("Both atoms must belong to template.atoms")
        # Do not add the same bond twice
        if atom1 not in atom2.bond_partners:
            self.bonds.append(Bond(atom1, atom2))

    @classmethod
    def from_residue(cls, residue):
        """
        This constructor creates a ResidueTemplate from a particular Residue
        object

        Parameters
        ----------
        residue : :class:`Residue`
            The residue from which to create a template
        """
        inst = cls(name=residue.name)
        for atom in residue:
            inst.add_atom(_copy.copy(atom))
        for atom in residue:
            for bond in atom.bonds:
                try:
                    i1 = residue.atoms.index(bond.atom1)
                    i2 = residue.atoms.index(bond.atom2)
                except ValueError:
                    if bond.atom1 in residue:
                        oatom = bond.atom2
                        idx = residue.atoms.index(bond.atom1)
                    else:
                        oatom = bond.atom1
                        idx = residue.atoms.index(bond.atom2)
                    if oatom.residue.idx == residue.idx - 1:
                        inst.head = inst.atoms[idx]
                    elif oatom.residue.idx == residue.idx + 1:
                        inst.tail = inst.atoms[idx]
                    elif oatom.residue.idx == residue.idx:
                        # Don't know WHAT to do with it
                        warnings.warn("Cannot determine head/tail for " "unordered residues.")
                    else:
                        # Disulfide or something... not head or tail
                        inst.connections.append(inst.atoms[idx])
                else:
                    inst.add_bond(i1, i2)
        return inst

    @property
    def coordinates(self):
        """ Atomic coordinates, in Angstroms, of all atoms in the template """
        try:
            return self._crd
        except AttributeError:
            self._crd = np.array([[a.xx, a.xy, a.xz] for a in self])
        return self._crd

    # Make ResidueTemplate look like a container of atoms, also indexable by the
    # atom name
    def __len__(self):
        return len(self.atoms)

    def __iter__(self):
        return iter(self.atoms)

    def __contains__(self, atom):
        if isinstance(atom, Atom):
            return atom in self.atoms
        if isinstance(atom, str):
            for a in self.atoms:
                if a.name == atom:
                    return True
        return False

    def __copy__(self):
        other = type(self)(name=self.name)

        for atom in self.atoms:
            other.add_atom(_copy.copy(atom))
        for bond in self.bonds:
            other.add_bond(bond.atom1.idx, bond.atom2.idx)
        other.type = self.type

        if self.head is not None:
            other.head = other.atoms[self.head.idx]
        if self.tail is not None:
            other.tail = other.atoms[self.tail.idx]
        for connection in self.connections:
            other.connections.append(other.atoms[connection.idx])
        other.first_patch = self.first_patch
        other.last_patch = self.last_patch

        return other

    def __getitem__(self, idx):
        if isinstance(idx, str):
            for atom in self.atoms:
                if atom.name == idx:
                    return atom
            raise IndexError("Atom %s not found in %s" % (idx, self.name))
        elif isinstance(idx, (list, tuple)):
            return [self[key] for key in idx]
        else:
            return self.atoms[idx]

    def fix_charges(self, to=None, precision=4):
        """
        Adjusts the partial charge of all atoms in the residue to match the
        requested target charge. The default target charge is the closest
        integer

        Parameters
        ----------
        to : float, optional
            The desired net charge of this residue template. Default is the
            closest integer charge
        precision : int, optional
            The number of decimal places that each charge should be rounded to.
            Default is 4

        Returns
        -------
        self : :class:`ResidueTemplate`
            The current residue template whose charges are being modified

        Notes
        -----
        This method modifies the atomic charges of this residue template
        in-place. Any residual charge (which is accumulated roundoff beyond the
        requested precision) is added to the first atom of the residue. This
        will typically be 10^-precision in magnitude, and should almost never be
        higher than 2*10^-precision. As long as a reasonable precision is chosen
        (no fewer than 3 or 4 decimal places), this will have only a negligible
        impact on a force field.

        If provided, "to" will be rounded to the ``precision``'th decimal place
        to make sure that the sum of the charges come out as close as possible
        to the target charge while still obeying the requested precision.

        Raises
        ------
        ValueError
            If you try to call fix_charges on a residue template with no atoms
        """
        if not self.atoms:
            raise ValueError("Cannot fix charges on an empty residue")
        net_charge = sum(a.charge for a in self.atoms)
        if to is None:
            to = round(net_charge)
        else:
            # We need to make sure
            to = round(to, precision)
        if net_charge == to:
            return self

        smear = (to - net_charge) / len(self)
        for atom in self:
            atom.charge = round(atom.charge + smear, precision)

        # Dump the extra tiny bit (O(10^-precision)) on the first atom
        self.atoms[0].charge += to - sum(atom.charge for atom in self.atoms)

        return self

    def to_dataframe(self):
        """ Create a pandas dataframe from the atom information

        Returns
        -------
        df : :class:`pandas.DataFrame`
            The pandas DataFrame with all of the atomic properties

        Notes
        -----
        The DataFrame will be over all atoms. The columns will be the attributes
        of the atom (as well as its containing residue). Some columns will
        *always* exist. Others will only exist if those attributes have been set
        on the Atom instances (see the :class:`Atom` docs for possible
        attributes and their meaning). The columns that will always be present
        are:

            - number : int
            - name : str
            - type : str
            - atomic_number : int
            - charge : float
            - mass : float
            - nb_idx : int
            - radii : float
            - screen : float
            - occupancy : float
            - bfactor : float
            - altloc : str
            - tree : str
            - join : int
            - irotat : int
            - rmin : float
            - epsilon : float
            - rmin_14 : float
            - epsilon_14 : float

        The following attributes are optionally present if they were present in
        the original file defining the structure:

            - xx : float (x-coordinate position)
            - xy : float (y-coordinate position)
            - xz : float (z-coordinate position)
            - vx : float (x-coordinate velocity)
            - vy : float (y-coordinate velocity)
            - vz : float (z-coordinate velocity)
        """
        if pd is None:
            raise ImportError("pandas is not available; cannot create a pandas " "DataFrame from this Structure")
        ret = pd.DataFrame()

        ret["number"] = [atom.number for atom in self.atoms]
        ret["name"] = [atom.name for atom in self.atoms]
        ret["type"] = [atom.type for atom in self.atoms]
        ret["atomic_number"] = [atom.atomic_number for atom in self.atoms]
        ret["charge"] = [atom.charge for atom in self.atoms]
        ret["mass"] = [atom.mass for atom in self.atoms]
        ret["nb_idx"] = [atom.nb_idx for atom in self.atoms]
        ret["radii"] = [atom.radii for atom in self.atoms]
        ret["screen"] = [atom.screen for atom in self.atoms]
        ret["occupancy"] = [atom.occupancy for atom in self.atoms]
        ret["bfactor"] = [atom.bfactor for atom in self.atoms]
        ret["altloc"] = [atom.altloc for atom in self.atoms]
        ret["tree"] = [atom.tree for atom in self.atoms]
        ret["join"] = [atom.join for atom in self.atoms]
        ret["irotat"] = [atom.irotat for atom in self.atoms]
        ret["rmin"] = [atom.rmin for atom in self.atoms]
        ret["epsilon"] = [atom.epsilon for atom in self.atoms]
        ret["rmin_14"] = [atom.rmin_14 for atom in self.atoms]
        ret["epsilon_14"] = [atom.epsilon_14 for atom in self.atoms]
        ret["resname"] = [atom.residue.name for atom in self.atoms]

        # Now for optional attributes
        # Coordinates
        try:
            coords = pd.DataFrame([[atom.xx, atom.xy, atom.xz] for atom in self.atoms], columns=["xx", "xy", "xz"])
        except AttributeError:
            pass
        else:
            ret = ret.join(coords)
        # Velocities
        try:
            vels = pd.DataFrame([[atom.vx, atom.vy, atom.vz] for atom in self.atoms], columns=["vx", "vy", "vz"])
        except AttributeError:
            pass
        else:
            ret = ret.join(vels)
        return ret

    def to_structure(self):
        """
        Generates a Structure instance with a single residue from this
        ResidueTemplate

        Returns
        -------
        struct : :class:`parmed.structure.Structure`
            The Structure with all of the bonds and connectivity of this
            template
        """
        struct = Structure()
        for atom in self:
            struct.add_atom(_copy.copy(atom), self.name, 0)
        for bond in self.bonds:
            struct.bonds.append(Bond(struct.atoms[bond.atom1.idx], struct.atoms[bond.atom2.idx]))
        return struct

    def save(self, fname, format=None, overwrite=False, **kwargs):
        """
        Saves the current ResidueTemplate in the requested file format.
        Supported formats can be specified explicitly or determined by file-name
        extension. The following formats are supported, with the recognized
        suffix shown in parentheses:

            - MOL2 (.mol2)
            - MOL3 (.mol3)
            - OFF (.lib/.off)
            - PDB (.pdb)
            - PQR (.pqr)

        Parameters
        ----------
        fname : str
            Name of the file to save. If ``format`` is ``None`` (see below), the
            file type will be determined based on the filename extension. If the
            type cannot be determined, a ValueError is raised.
        format : str, optional
            The case-insensitive keyword specifying what type of file ``fname``
            should be saved as. If ``None`` (default), the file type will be
            determined from filename extension of ``fname``
        overwrite : bool, optional
            If True, allow the target file to be overwritten. Otherwise, an
            IOError is raised if the file exists. Default is False
        kwargs : keyword-arguments
            Remaining arguments are passed on to the file writing routines that
            are called by this function

        Raises
        ------
        ValueError if either filename extension or ``format`` are not recognized
        TypeError if the structure cannot be converted to the desired format for
        whatever reason
        """
        from parmed.amber.offlib import AmberOFFLibrary
        from parmed.formats.mol2 import Mol2File

        extmap = {".mol2": "MOL2", ".mol3": "MOL3", ".off": "OFFLIB", ".lib": "OFFLIB", ".pdb": "PDB", ".pqr": "PQR"}
        if format is not None:
            format = format.upper()
        else:
            base, ext = os.path.splitext(fname)
            if ext in (".bz2", ".gz"):
                ext = os.path.splitext(base)[1]
            if ext in extmap:
                format = extmap[ext]
            else:
                raise ValueError("Could not determine file type of %s" % fname)
        if format == "MOL2":
            Mol2File.write(self, fname, mol3=False, **kwargs)
        elif format == "MOL3":
            Mol2File.write(self, fname, mol3=True, **kwargs)
        elif format in ("OFFLIB", "OFF"):
            AmberOFFLibrary.write({self.name: self}, fname, **kwargs)
        elif format in ("PDB", "PQR"):
            self.to_structure().save(fname, format=format, overwrite=overwrite, **kwargs)
        else:
            raise ValueError("Unrecognized format for ResidueTemplate save")
Ejemplo n.º 8
0
class ResidueTemplate(object):
    """
    This is a residue template, which contains a listing of the atoms in the
    residue template as well as a mapping of which atoms are bonded to other
    atoms.

    Parameters
    ----------
    name : str, optional
        If provided, this is the name of the residue

    Attributes
    ----------
    atoms : :class:`AtomList`
        List of atoms in this residue
    bonds : :class:`TrackedList`
        List of the bonds between the atoms in this residue
    coordinates : np.ndarray(natom, 3)
        The partial atomic coordinates
    connections : list of :class:`Atom`
        A list of all atoms that should form connections with atoms of another
        residue *besides* the head and tail atoms
    head : :class:`Atom` or None
        The atom that is connected to the residue that comes before this one
    tail : :class:`Atom` or None
        The atom that is connected to the *next* residue after this one
    first_patch : :class:`ResidueTemplate` or None
        If it is not None, this is the patch whose tail is added to the head
        atom of this residue when this residue is the first in a chain
    last_patch : :class:`ResidueTemplate` or None
        If it is not None, this is the patch whose head is added to the tail
        atom of this residue when this residue is the last in a chain
    groups : list of list(:class:`Atom`)
        If set, each group is a list of Atom instances making up each group
    override_level : integer
        For use with OpenMM ResidueTemplates. If OpenMM ForceField is given multiple
        identically-matching residue templates with the same names it choses
        (overrides with) the one with the highest override_level
        (overrideLevel in OpenMM). Default is 0.
    """

    def __init__(self, name=''):
        self.atoms = AtomList()
        self.bonds = TrackedList()
        self.name = name
        self.head = None
        self.tail = None
        self.connections = []
        self.type = UNKNOWN
        self.first_patch = None
        self.last_patch = None
        self.groups = []
        self.override_level = 0
        self._map = dict()
        self._impr = []

    def __repr__(self):
        if self.head is not None:
            head = self.head.name
        else:
            head = 'None'
        if self.tail is not None:
            tail = self.tail.name
        else:
            tail = 'None'
        return '<%s %s: %d atoms; %d bonds; head=%s; tail=%s>' % (
                    type(self).__name__, self.name, len(self.atoms),
                    len(self.bonds), head, tail)

    @property
    def map(self):
        return self._map

    def add_atom(self, atom):
        """ Adds an atom to this residue template

        Parameters
        ----------
        atom : :class:`Atom`
            The atom to add to this residue

        Raises
        ------
        ValueError if ``atom`` has the same name as another atom in this
        residue already
        """
        if atom.name in self._map:
            raise ValueError('Residue already has atom named %s' % atom.name)
        atom.residue = self
        self.atoms.append(atom)
        self._map[atom.name] = atom

    def delete_atom(self, atom):
        """ Delete an atom from this residue template, along with corresponding bonds.

        Parameters
        ----------
        atom : :class:`Atom` or str
            The atom or atom name to be deleted

        """
        if type(atom) is str:
            atom_name = atom
        else:
            atom_name = atom.name

        if atom_name not in self._map:
            raise KeyError("Could not find atom '%s' in ResidueTemplate, which contains atoms: %s" % (atom_name, list(self._map.keys())))

        atom = self._map[atom_name]

        # Adjust head and tail if needed
        if self.head == atom:
            self.head = None
        if self.tail == atom:
            self.tail = None

        # Remove all bonds involving this atom
        for bond in list(self.bonds):
            if (bond.atom1 == atom) or (bond.atom2 == atom):
                self.delete_bond(bond)

        # Remove all impropers involving this atom
        for impr in list(self._impr):
            if atom in impr:
                self._impr.remove(impr)

        # Disconnect the atom from the residue so that it does not trigger atom.residue.delete_atom(atom)
        atom.residue = None

        # Remove the atom from the ResidueTemplate
        del self._map[atom_name]
        self.atoms.remove(atom)

    def add_bond(self, atom1, atom2, order=1.0):
        """ Adds a bond between the two provided atoms in the residue

        Parameters
        ----------
        atom1 : :class:`Atom` or int or str
            One of the atoms in the bond. It must be in the ``atoms`` list of
            this ResidueTemplate. It can also be the atom index (index from 0)
            of the atom in the bond.
        atom2 : :class:`Atom` or int or str
            The other atom in the bond. It must be in the ``atoms`` list of this
            ResidueTemplate. It can also be the atom index (index from 0) of the
            atom in the bond.
        order : float
            The bond order of this bond. Bonds are classified as follows:
                1.0 -- single bond
                2.0 -- double bond
                3.0 -- triple bond
                1.5 -- aromatic bond
                1.25 -- amide bond
            Default is 1.0

        Raises
        ------
        IndexError if atom1 or atom2 are integers that are out of range of the
        number of atoms already in this template

        RuntimeError if atom1 or atom2 are :class:`Atom` instances but they are
        *not* in the atoms list of this ResidueTemplate

        Notes
        -----
        If atom1 and atom2 are already bonded, this routine does nothing. If
        atom1 or atom2 are strings, then they will match the first instance of
        the atom name that is the same as the atom name passed.
        """
        if not isinstance(atom1, Atom):
            atom1 = self[atom1]
        if not isinstance(atom2, Atom):
            atom2 = self[atom2]
        if atom1.list is not self.atoms or atom2.list is not self.atoms:
            raise RuntimeError('Both atoms must belong to template.atoms')
        # Do not add the same bond twice
        if atom1 not in atom2.bond_partners:
            self.bonds.append(Bond(atom1, atom2, order=order))

    def delete_bond(self, bond):
        """ Delete a bond from this residue template.

        Parameters
        ----------
        bond : :class:`Bond`
            The bond to be deleted

        """
        if bond in self.bonds:
            bond.delete()
            self.bonds.remove(bond)
        else:
            raise ValueError('The specified bond {} does not belong to this residue {}'.format(bond, self))

    @classmethod
    def from_residue(cls, residue):
        """
        This constructor creates a ResidueTemplate from a particular Residue
        object
        Parameters
        ----------
        residue : :class:`Residue`
            The residue from which to create a template
        """
        inst = cls(name=residue.name)
        for atom in residue:
            inst.add_atom(_copy.copy(atom))
        for atom in residue:
            for bond in atom.bonds:
                try:
                    i1 = residue.atoms.index(bond.atom1)
                    i2 = residue.atoms.index(bond.atom2)
                except ValueError:
                    if bond.atom1 in residue:
                        oatom = bond.atom2
                        idx = residue.atoms.index(bond.atom1)
                    else:
                        oatom = bond.atom1
                        idx = residue.atoms.index(bond.atom2)
                    if oatom.residue.idx == residue.idx - 1:
                        inst.head = inst.atoms[idx]
                    elif oatom.residue.idx == residue.idx + 1:
                        inst.tail = inst.atoms[idx]
                    elif oatom.residue.idx == residue.idx:
                        # Don't know WHAT to do with it
                        warnings.warn('Cannot determine head/tail for '
                                      'unordered residues.')
                    else:
                        # Disulfide or something... not head or tail
                        inst.connections.append(inst.atoms[idx])
                else:
                    inst.add_bond(i1, i2)
        return inst

    @property
    def coordinates(self):
        """ Atomic coordinates, in Angstroms, of all atoms in the template """
        try:
            return self._crd
        except AttributeError:
            self._crd = np.array([[a.xx, a.xy, a.xz] for a in self])
        return self._crd

    @property
    def empirical_chemical_formula(self):
        """ Return the empirical chemical formula (in Hill notation) as a string (e.g. 'H2O', 'C6H12'), omitting EPs """
        # Count number of appearances of each element
        element_count = defaultdict(int)
        for atom in self.atoms:
            element_count[atom.element_name] += 1
        # Pop EPs if they are present, since they are not chemical
        if 'EP' in element_count:
            element_count.pop('EP')
        # Render to string using Hill notation
        # https://en.wikipedia.org/wiki/Chemical_formula#Hill_system
        def format_and_pop_element(element_count, element):
            count = element_count.pop(element)
            if count == 1:
                return element
            return element + str(count)

        chemical_formula = ''
        # If carbon is present, first list C, then H (if present)
        if 'C' in element_count:
            chemical_formula += format_and_pop_element(element_count, 'C')
            if 'H' in element_count:
                chemical_formula += format_and_pop_element(element_count, 'H')
        # Remaining elements are listed alphabetically
        alphabetical_elements = sorted(element_count.keys())
        for element in alphabetical_elements:
            chemical_formula += format_and_pop_element(element_count, element)

        return chemical_formula

    @property
    def net_charge(self):
        return sum([a.charge for a in self])

    # Make ResidueTemplate look like a container of atoms, also indexable by the
    # atom name
    def __len__(self):
        return len(self.atoms)

    def __iter__(self):
        return iter(self.atoms)

    def __contains__(self, atom):
        if isinstance(atom, Atom):
            return atom in self.atoms
        if isinstance(atom, str):
            return atom in self._map
        raise AssertionError('Should not be here!')

    def __copy__(self):
        other = type(self)(name=self.name)

        for atom in self.atoms:
            other.add_atom(_copy.copy(atom))
        for bond in self.bonds:
            other.add_bond(bond.atom1.idx, bond.atom2.idx)
        other.type = self.type

        if self.head is not None:
            other.head = other.atoms[self.head.idx]
        if self.tail is not None:
            other.tail = other.atoms[self.tail.idx]
        for connection in self.connections:
            other.connections.append(other.atoms[connection.idx])
        other.first_patch = self.first_patch
        other.last_patch = self.last_patch

        return other

    def __getitem__(self, idx):
        if isinstance(idx, str):
            for atom in self.atoms:
                if atom.name == idx:
                    return atom
            raise IndexError('Atom %s not found in %s' % (idx, self.name))
        elif isinstance(idx, (list, tuple)):
            return [self[key] for key in  idx]
        else:
            return self.atoms[idx]

    def fix_charges(self, to=None, precision=4):
        """
        Adjusts the partial charge of all atoms in the residue to match the
        requested target charge. The default target charge is the closest
        integer

        Parameters
        ----------
        to : float, optional
            The desired net charge of this residue template. Default is the
            closest integer charge
        precision : int, optional
            The number of decimal places that each charge should be rounded to.
            Default is 4

        Returns
        -------
        self : :class:`ResidueTemplate`
            The current residue template whose charges are being modified

        Notes
        -----
        This method modifies the atomic charges of this residue template
        in-place. Any residual charge (which is accumulated roundoff beyond the
        requested precision) is added to the first atom of the residue. This
        will typically be 10^-precision in magnitude, and should almost never be
        higher than 2*10^-precision. As long as a reasonable precision is chosen
        (no fewer than 3 or 4 decimal places), this will have only a negligible
        impact on a force field.

        If provided, "to" will be rounded to the ``precision``'th decimal place
        to make sure that the sum of the charges come out as close as possible
        to the target charge while still obeying the requested precision.

        Raises
        ------
        ValueError
            If you try to call fix_charges on a residue template with no atoms
        """
        if not self.atoms:
            raise ValueError('Cannot fix charges on an empty residue')
        net_charge = self.net_charge
        if to is None:
            to = round(net_charge)
        else:
            # We need to make sure
            to = round(to, precision)
        if net_charge == to:
            return self

        smear = (to - net_charge) / len(self)
        for atom in self:
            atom.charge = round(atom.charge + smear, precision)

        # Dump the extra tiny bit (O(10^-precision)) on the first atom
        self.atoms[0].charge += to - sum(atom.charge for atom in self.atoms)

        return self

    def apply_patch(self, patch, precision=4):
        """
        Apply the specified PatchTemplate to the ResidueTemplate.

        This only handles patches that affect a single residue.

        An exception is thrown if patch is incompatible because
        * The patch specifies that an atom is to be deleted that doesn't exist in the residue
        * A bond specified as being added in the patch does not have both atom names present after adding/deleting atoms from the patch
        * The new net charge is not integral to the specified precision
        * The residue is not modified in any way (no atoms or bonds added/changed/deleted)

        Parameters
        ----------

        patch : PatchTemplate
            The patch to apply to this residue

        precision : int, optional
            Each valid patch should be produce a net charge that is integral to
            this many decimal places.
            Default is 4

        Returns
        -------

        residue : ResidueTemplate
            A new ResidueTemplate corresponding to the patched residue is returned.
            The original remains unmodified.

        """
        # Create a copy
        # TODO: Once ResidueTemplate.from_residue() actually copies all info, use that instead?
        residue = _copy.copy(self)
        # Record whether we've actually modified the residue.
        modifications_made = False
        # Delete atoms
        for atom_name in patch.delete_atoms:
            try:
                residue.delete_atom(atom_name)
                modifications_made = True
            except KeyError as e:
                raise IncompatiblePatchError(str(e))
        # Add or replace atoms
        for atom in patch.atoms:
            if atom.name in residue:
                # Overwrite type and charge
                residue[atom.name].type = atom.type
                residue[atom.name].charge = atom.charge
            else:
                residue.add_atom(Atom(name=atom.name, type=atom.type, charge=atom.charge))
            modifications_made = True
        # Add bonds
        for (atom1_name, atom2_name, order) in patch.add_bonds:
            try:
                # Remove dangling bonds
                for name in [atom1_name, atom2_name]:
                    if residue.head and (name == residue.head.name):
                        residue.head = None
                    if residue.tail and (name == residue.tail.name):
                        residue.tail = None
                # Add bond
                residue.add_bond(atom1_name, atom2_name, order)
                modifications_made = True
            except IndexError as e:
                raise IncompatiblePatchError('Bond %s-%s could not be added to patched residue: atoms are %s' % (atom1_name, atom2_name, list(residue._map.keys())))
        # Delete impropers
        for impr in patch.delete_impropers:
            try:
                residue._impr.remove(impr)
                # removal of impropers doesn't do anything as far as OpenMM is concerned, so don't note this as a modification having been made
            except ValueError as e:
                raise IncompatiblePatchError('Improper %s was not found in residue to be patched.' % impr)
        # Check that the net charge is integral.
        net_charge = residue.net_charge
        is_integral = (round(net_charge, precision) - round(net_charge)) == 0.0
        if not is_integral:
            raise IncompatiblePatchError('Patch is not compatible with residue due to non-integral charge (charge was %f).' % net_charge)
        # Ensure residue is connected
        import networkx as nx
        G = residue.to_networkx()
        if not nx.is_connected(G):
            components = [ c for c in nx.connected_components(G) ]
            raise IncompatiblePatchError('Patched residue bond graph is not a connected graph: %s' % str(components))
        # Make sure the patch has actually modified the residue
        if not modifications_made:
            raise IncompatiblePatchError('Patch did not modify residue.')

        return residue

    def to_networkx(self):
        """ Create a NetworkX graph of atoms and bonds

        Returns
        -------
        G : :class:`networkx.Graph`
            A NetworkX Graph representing the molecule

        """
        import networkx
        G = networkx.Graph()
        for atom in self.atoms:
            G.add_node(atom.name, charge=atom.charge, type=atom.type)
        for bond in self.bonds:
            G.add_edge(bond.atom1.name, bond.atom2.name)
        return G

    def to_dataframe(self):
        """ Create a pandas dataframe from the atom information

        Returns
        -------
        df : :class:`pandas.DataFrame`
            The pandas DataFrame with all of the atomic properties

        Notes
        -----
        The DataFrame will be over all atoms. The columns will be the attributes
        of the atom (as well as its containing residue). Some columns will
        *always* exist. Others will only exist if those attributes have been set
        on the Atom instances (see the :class:`Atom` docs for possible
        attributes and their meaning). The columns that will always be present
        are:

            - number : int
            - name : str
            - type : str
            - atomic_number : int
            - charge : float
            - mass : float
            - nb_idx : int
            - solvent_radius : float
            - screen : float
            - occupancy : float
            - bfactor : float
            - altloc : str
            - tree : str
            - join : int
            - irotat : int
            - rmin : float
            - epsilon : float
            - rmin_14 : float
            - epsilon_14 : float

        The following attributes are optionally present if they were present in
        the original file defining the structure:

            - xx : float (x-coordinate position)
            - xy : float (y-coordinate position)
            - xz : float (z-coordinate position)
            - vx : float (x-coordinate velocity)
            - vy : float (y-coordinate velocity)
            - vz : float (z-coordinate velocity)
        """
        import pandas as pd
        ret = pd.DataFrame()

        ret['number'] = [atom.number for atom in self.atoms]
        ret['name'] = [atom.name for atom in self.atoms]
        ret['type'] = [atom.type for atom in self.atoms]
        ret['atomic_number'] = [atom.atomic_number for atom in self.atoms]
        ret['charge'] = [atom.charge for atom in self.atoms]
        ret['mass'] = [atom.mass for atom in self.atoms]
        ret['nb_idx'] = [atom.nb_idx for atom in self.atoms]
        ret['solvent_radius'] = [atom.solvent_radius for atom in self.atoms]
        ret['screen'] = [atom.screen for atom in self.atoms]
        ret['occupancy'] = [atom.occupancy for atom in self.atoms]
        ret['bfactor'] = [atom.bfactor for atom in self.atoms]
        ret['altloc'] = [atom.altloc for atom in self.atoms]
        ret['tree'] = [atom.tree for atom in self.atoms]
        ret['join'] = [atom.join for atom in self.atoms]
        ret['irotat'] = [atom.irotat for atom in self.atoms]
        ret['rmin'] = [atom.rmin for atom in self.atoms]
        ret['epsilon'] = [atom.epsilon for atom in self.atoms]
        ret['rmin_14'] = [atom.rmin_14 for atom in self.atoms]
        ret['epsilon_14'] = [atom.epsilon_14 for atom in self.atoms]
        ret['resname'] = [atom.residue.name for atom in self.atoms]

        # Now for optional attributes
        # Coordinates
        try:
            coords = pd.DataFrame(
                    [[atom.xx, atom.xy, atom.xz] for atom in self.atoms],
                    columns=['xx', 'xy', 'xz']
            )
        except AttributeError:
            pass
        else:
            ret = ret.join(coords)
        # Velocities
        try:
            vels = pd.DataFrame(
                    [[atom.vx, atom.vy, atom.vz] for atom in self.atoms],
                    columns=['vx', 'vy', 'vz']
            )
        except AttributeError:
            pass
        else:
            ret = ret.join(vels)
        return ret

    def to_structure(self):
        """
        Generates a Structure instance with a single residue from this
        ResidueTemplate

        Returns
        -------
        struct : :class:`parmed.structure.Structure`
            The Structure with all of the bonds and connectivity of this
            template
        """
        struct = Structure()
        for atom in self:
            struct.add_atom(_copy.copy(atom), self.name, 0)
        for bond in self.bonds:
            struct.bonds.append(Bond(struct.atoms[bond.atom1.idx],
                                     struct.atoms[bond.atom2.idx])
            )
        return struct

    def save(self, fname, format=None, overwrite=False, **kwargs):
        """
        Saves the current ResidueTemplate in the requested file format.
        Supported formats can be specified explicitly or determined by file-name
        extension. The following formats are supported, with the recognized
        suffix shown in parentheses:

            - MOL2 (.mol2)
            - MOL3 (.mol3)
            - OFF (.lib/.off)
            - PDB (.pdb)
            - PQR (.pqr)

        Parameters
        ----------
        fname : str
            Name of the file to save. If ``format`` is ``None`` (see below), the
            file type will be determined based on the filename extension. If the
            type cannot be determined, a ValueError is raised.
        format : str, optional
            The case-insensitive keyword specifying what type of file ``fname``
            should be saved as. If ``None`` (default), the file type will be
            determined from filename extension of ``fname``
        overwrite : bool, optional
            If True, allow the target file to be overwritten. Otherwise, an
            IOError is raised if the file exists. Default is False
        kwargs : keyword-arguments
            Remaining arguments are passed on to the file writing routines that
            are called by this function

        Raises
        ------
        ValueError if either filename extension or ``format`` are not recognized
        TypeError if the structure cannot be converted to the desired format for
        whatever reason
        """
        from parmed.amber.offlib import AmberOFFLibrary
        from parmed.formats.mol2 import Mol2File
        extmap = {
                '.mol2' : 'MOL2',
                '.mol3' : 'MOL3',
                '.off' : 'OFFLIB',
                '.lib' : 'OFFLIB',
                '.pdb' : 'PDB',
                '.pqr' : 'PQR',
        }
        if format is not None:
            format = format.upper()
        else:
            base, ext = os.path.splitext(fname)
            if ext in ('.bz2', '.gz'):
                ext = os.path.splitext(base)[1]
            if ext in extmap:
                format = extmap[ext]
            else:
                raise ValueError('Could not determine file type of %s' % fname)
        if format == 'MOL2':
            Mol2File.write(self, fname, mol3=False, **kwargs)
        elif format == 'MOL3':
            Mol2File.write(self, fname, mol3=True, **kwargs)
        elif format in ('OFFLIB', 'OFF'):
            AmberOFFLibrary.write({self.name : self}, fname, **kwargs)
        elif format in ('PDB', 'PQR'):
            self.to_structure().save(fname, format=format, overwrite=overwrite,
                                     **kwargs)
        else:
            raise ValueError('Unrecognized format for ResidueTemplate save')
Ejemplo n.º 9
0
class ResidueTemplate(object):
    """
    This is a residue template, which contains a listing of the atoms in the
    residue template as well as a mapping of which atoms are bonded to other
    atoms.

    Parameters
    ----------
    name : str, optional
        If provided, this is the name of the residue

    Attributes
    ----------
    atoms : :class:`AtomList`
        List of atoms in this residue
    bonds : :class:`TrackedList`
        List of the bonds between the atoms in this residue
    coordinates : np.ndarray(natom, 3)
        The partial atomic coordinates
    connections : list of :class:`Atom`
        A list of all atoms that should form connections with atoms of another
        residue *besides* the head and tail atoms
    head : :class:`Atom` or None
        The atom that is connected to the residue that comes before this one
    tail : :class:`Atom` or None
        The atom that is connected to the *next* residue after this one
    first_patch : :class:`ResidueTemplate` or None
        If it is not None, this is the patch whose tail is added to the head
        atom of this residue when this residue is the first in a chain
    last_patch : :class:`ResidueTemplate` or None
        If it is not None, this is the patch whose head is added to the tail
        atom of this residue when this residue is the last in a chain
    groups : list of list(:class:`Atom`)
        If set, each group is a list of Atom instances making up each group
    override_level : integer
        For use with OpenMM ResidueTemplates. If OpenMM ForceField is given multiple
        identically-matching residue templates with the same names it choses
        (overrides with) the one with the highest override_level
        (overrideLevel in OpenMM). Default is 0.
    """
    def __init__(self, name=''):
        self.atoms = AtomList()
        self.bonds = TrackedList()
        self.lonepairs = list()  # TODO: Should this be a TrackedList?
        self.anisotropies = list()
        self.name = name
        self.head = None
        self.tail = None
        self.connections = []
        self.type = UNKNOWN
        self.first_patch = None
        self.last_patch = None
        self.groups = []
        self.override_level = 0
        self._map = dict()
        self._impr = []

    def __repr__(self):
        if self.head is not None:
            head = self.head.name
        else:
            head = 'None'
        if self.tail is not None:
            tail = self.tail.name
        else:
            tail = 'None'
        return '<%s %s: %d atoms; %d bonds; head=%s; tail=%s>' % (
            type(self).__name__, self.name, len(self.atoms), len(
                self.bonds), head, tail)

    @property
    def map(self):
        return self._map

    def add_atom(self, atom):
        """ Adds an atom to this residue template

        Parameters
        ----------
        atom : :class:`Atom`
            The atom to add to this residue

        Raises
        ------
        ValueError if ``atom`` has the same name as another atom in this
        residue already
        """
        if atom.name in self._map:
            raise ValueError('Residue already has atom named %s' % atom.name)
        atom.residue = self
        self.atoms.append(atom)
        self._map[atom.name] = atom

    def delete_atom(self, atom):
        """ Delete an atom from this residue template, along with corresponding bonds.

        Parameters
        ----------
        atom : :class:`Atom` or str
            The atom or atom name to be deleted

        """
        if type(atom) is str:
            atom_name = atom
        else:
            atom_name = atom.name

        if atom_name not in self._map:
            raise KeyError(
                "Could not find atom '%s' in ResidueTemplate, which contains atoms: %s"
                % (atom_name, list(self._map.keys())))

        atom = self._map[atom_name]

        # Adjust head and tail if needed
        if self.head == atom:
            self.head = None
        if self.tail == atom:
            self.tail = None

        # Remove all bonds involving this atom
        for bond in list(self.bonds):
            if (bond.atom1 == atom) or (bond.atom2 == atom):
                self.delete_bond(bond)

        # Remove all impropers involving this atom
        for impr in list(self._impr):
            if atom in impr:
                self._impr.remove(impr)

        # Disconnect the atom from the residue so that it does not trigger atom.residue.delete_atom(atom)
        atom.residue = None

        # Remove the atom from the ResidueTemplate
        del self._map[atom_name]
        self.atoms.remove(atom)

    def add_bond(self, atom1, atom2, order=1.0):
        """ Adds a bond between the two provided atoms in the residue

        Parameters
        ----------
        atom1 : :class:`Atom` or int or str
            One of the atoms in the bond. It must be in the ``atoms`` list of
            this ResidueTemplate. It can also be the atom index (index from 0)
            of the atom in the bond.
        atom2 : :class:`Atom` or int or str
            The other atom in the bond. It must be in the ``atoms`` list of this
            ResidueTemplate. It can also be the atom index (index from 0) of the
            atom in the bond.
        order : float
            The bond order of this bond. Bonds are classified as follows:
                1.0 -- single bond
                2.0 -- double bond
                3.0 -- triple bond
                1.5 -- aromatic bond
                1.25 -- amide bond
            Default is 1.0

        Raises
        ------
        IndexError if atom1 or atom2 are integers that are out of range of the
        number of atoms already in this template

        RuntimeError if atom1 or atom2 are :class:`Atom` instances but they are
        *not* in the atoms list of this ResidueTemplate

        Notes
        -----
        If atom1 and atom2 are already bonded, this routine does nothing. If
        atom1 or atom2 are strings, then they will match the first instance of
        the atom name that is the same as the atom name passed.
        """
        if not isinstance(atom1, Atom):
            atom1 = self[atom1]
        if not isinstance(atom2, Atom):
            atom2 = self[atom2]
        if atom1.list is not self.atoms or atom2.list is not self.atoms:
            raise RuntimeError('Both atoms must belong to template.atoms')
        # Do not add the same bond twice
        if atom1 not in atom2.bond_partners:
            self.bonds.append(Bond(atom1, atom2, order=order))

    def delete_bond(self, bond):
        """ Delete a bond from this residue template.

        Parameters
        ----------
        bond : :class:`Bond`
            The bond to be deleted

        """
        if bond in self.bonds:
            bond.delete()
            self.bonds.remove(bond)
        else:
            raise ValueError(
                'The specified bond {} does not belong to this residue {}'.
                format(bond, self))

    @classmethod
    def from_residue(cls, residue):
        """
        This constructor creates a ResidueTemplate from a particular Residue
        object
        Parameters
        ----------
        residue : :class:`Residue`
            The residue from which to create a template
        """
        inst = cls(name=residue.name)
        for atom in residue:
            inst.add_atom(_copy.copy(atom))
        for atom in residue:
            for bond in atom.bonds:
                try:
                    i1 = residue.atoms.index(bond.atom1)
                    i2 = residue.atoms.index(bond.atom2)
                except ValueError:
                    if bond.atom1 in residue:
                        oatom = bond.atom2
                        idx = residue.atoms.index(bond.atom1)
                    else:
                        oatom = bond.atom1
                        idx = residue.atoms.index(bond.atom2)
                    if oatom.residue.idx == residue.idx - 1:
                        inst.head = inst.atoms[idx]
                    elif oatom.residue.idx == residue.idx + 1:
                        inst.tail = inst.atoms[idx]
                    elif oatom.residue.idx == residue.idx:
                        # Don't know WHAT to do with it
                        warnings.warn('Cannot determine head/tail for '
                                      'unordered residues.')
                    else:
                        # Disulfide or something... not head or tail
                        inst.connections.append(inst.atoms[idx])
                else:
                    inst.add_bond(i1, i2)
        return inst

    @property
    def coordinates(self):
        """ Atomic coordinates, in Angstroms, of all atoms in the template """
        try:
            return self._crd
        except AttributeError:
            self._crd = np.array([[a.xx, a.xy, a.xz] for a in self])
        return self._crd

    @property
    def empirical_chemical_formula(self):
        """ Return the empirical chemical formula (in Hill notation) as a string (e.g. 'H2O', 'C6H12'), omitting EPs """
        # Count number of appearances of each element
        element_count = defaultdict(int)
        for atom in self.atoms:
            element_count[atom.element_name] += 1
        # Pop EPs if they are present, since they are not chemical
        if 'EP' in element_count:
            element_count.pop('EP')
        # Render to string using Hill notation
        # https://en.wikipedia.org/wiki/Chemical_formula#Hill_system
        def format_and_pop_element(element_count, element):
            count = element_count.pop(element)
            if count == 1:
                return element
            return element + str(count)

        chemical_formula = ''
        # If carbon is present, first list C, then H (if present)
        if 'C' in element_count:
            chemical_formula += format_and_pop_element(element_count, 'C')
            if 'H' in element_count:
                chemical_formula += format_and_pop_element(element_count, 'H')
        # Remaining elements are listed alphabetically
        alphabetical_elements = sorted(element_count.keys())
        for element in alphabetical_elements:
            chemical_formula += format_and_pop_element(element_count, element)

        return chemical_formula

    @property
    def net_charge(self):
        return sum([a.charge for a in self])

    # Make ResidueTemplate look like a container of atoms, also indexable by the
    # atom name
    def __len__(self):
        return len(self.atoms)

    def __iter__(self):
        return iter(self.atoms)

    def __contains__(self, atom):
        if isinstance(atom, Atom):
            return atom in self.atoms
        if isinstance(atom, str):
            return atom in self._map
        raise AssertionError('Should not be here!')

    def __copy__(self):
        other = type(self)(name=self.name)

        for atom in self.atoms:
            other.add_atom(_copy.copy(atom))
        for bond in self.bonds:
            other.add_bond(bond.atom1.idx, bond.atom2.idx)
        other.type = self.type

        if self.head is not None:
            other.head = other.atoms[self.head.idx]
        if self.tail is not None:
            other.tail = other.atoms[self.tail.idx]
        for connection in self.connections:
            other.connections.append(other.atoms[connection.idx])
        other.first_patch = self.first_patch
        other.last_patch = self.last_patch

        return other

    def __getitem__(self, idx):
        if isinstance(idx, str):
            for atom in self.atoms:
                if atom.name == idx:
                    return atom
            raise IndexError('Atom %s not found in %s' % (idx, self.name))
        elif isinstance(idx, (list, tuple)):
            return [self[key] for key in idx]
        else:
            return self.atoms[idx]

    def fix_charges(self, to=None, precision=4):
        """
        Adjusts the partial charge of all atoms in the residue to match the
        requested target charge. The default target charge is the closest
        integer

        Parameters
        ----------
        to : float, optional
            The desired net charge of this residue template. Default is the
            closest integer charge
        precision : int, optional
            The number of decimal places that each charge should be rounded to.
            Default is 4

        Returns
        -------
        self : :class:`ResidueTemplate`
            The current residue template whose charges are being modified

        Notes
        -----
        This method modifies the atomic charges of this residue template
        in-place. Any residual charge (which is accumulated roundoff beyond the
        requested precision) is added to the first atom of the residue. This
        will typically be 10^-precision in magnitude, and should almost never be
        higher than 2*10^-precision. As long as a reasonable precision is chosen
        (no fewer than 3 or 4 decimal places), this will have only a negligible
        impact on a force field.

        If provided, "to" will be rounded to the ``precision``'th decimal place
        to make sure that the sum of the charges come out as close as possible
        to the target charge while still obeying the requested precision.

        Raises
        ------
        ValueError
            If you try to call fix_charges on a residue template with no atoms
        """
        if not self.atoms:
            raise ValueError('Cannot fix charges on an empty residue')
        net_charge = self.net_charge
        if to is None:
            to = round(net_charge)
        else:
            # We need to make sure
            to = round(to, precision)
        if net_charge == to:
            return self

        smear = (to - net_charge) / len(self)
        for atom in self:
            atom.charge = round(atom.charge + smear, precision)

        # Dump the extra tiny bit (O(10^-precision)) on the first atom
        self.atoms[0].charge += to - sum(atom.charge for atom in self.atoms)

        return self

    def apply_patch(self, patch, precision=4):
        """
        Apply the specified PatchTemplate to the ResidueTemplate.

        This only handles patches that affect a single residue.

        An exception is thrown if patch is incompatible because
        * The patch specifies that an atom is to be deleted that doesn't exist in the residue
        * A bond specified as being added in the patch does not have both atom names present after adding/deleting atoms from the patch
        * The new net charge is not integral to the specified precision
        * The residue is not modified in any way (no atoms or bonds added/changed/deleted)

        Parameters
        ----------

        patch : PatchTemplate
            The patch to apply to this residue

        precision : int, optional
            Each valid patch should be produce a net charge that is integral to
            this many decimal places.
            Default is 4

        Returns
        -------

        residue : ResidueTemplate
            A new ResidueTemplate corresponding to the patched residue is returned.
            The original remains unmodified.

        """
        # Create a copy
        # TODO: Once ResidueTemplate.from_residue() actually copies all info, use that instead?
        residue = _copy.copy(self)
        # Record whether we've actually modified the residue.
        modifications_made = False
        # Delete atoms
        for atom_name in patch.delete_atoms:
            try:
                residue.delete_atom(atom_name)
                modifications_made = True
            except (KeyError, MoleculeError) as e:
                if atom_name.startswith('D') and atom_name[
                        1:] in self and atom_name[1:] in patch.delete_atoms:
                    # This is a Drude particle.  We're also deleting its parent atom, so don't report an error.
                    pass
                else:
                    raise IncompatiblePatchError(
                        'Atom %s could not be deleted from the patched residue: atoms are %s (exception: %s)'
                        % (atom_name, list(residue._map.keys()), str(e)))
        # Add or replace atoms
        for atom in patch.atoms:
            if atom.name in residue:
                # Overwrite type and charge
                residue[atom.name].type = atom.type
                residue[atom.name].charge = atom.charge
            else:
                residue.add_atom(
                    Atom(name=atom.name, type=atom.type, charge=atom.charge))
            modifications_made = True
        # Add bonds
        for (atom1_name, atom2_name, order) in patch.add_bonds:
            try:
                # Remove dangling bonds
                for name in [atom1_name, atom2_name]:
                    if residue.head and (name == residue.head.name):
                        residue.head = None
                    if residue.tail and (name == residue.tail.name):
                        residue.tail = None
                # Add bond
                residue.add_bond(atom1_name, atom2_name, order)
                modifications_made = True
            except (IndexError, MoleculeError) as e:
                raise IncompatiblePatchError(
                    'Bond %s-%s could not be added to patched residue: atoms are %s (exception: %s)'
                    % (atom1_name, atom2_name, list(
                        residue._map.keys()), str(e)))
        # Delete impropers
        for impr in patch.delete_impropers:
            try:
                residue._impr.remove(impr)
                # removal of impropers doesn't do anything as far as OpenMM is concerned, so don't note this as a modification having been made
            except ValueError as e:
                raise IncompatiblePatchError(
                    'Improper %s was not found in residue to be patched.' %
                    impr)
        # Check that the net charge is integral.
        net_charge = residue.net_charge
        is_integral = (round(net_charge, precision) - round(net_charge)) == 0.0
        if not is_integral:
            raise IncompatiblePatchError(
                'Patch is not compatible with residue due to non-integral charge (charge was %f).'
                % net_charge)
        # Ensure residue is connected
        import networkx as nx
        G = residue.to_networkx(False)
        if not nx.is_connected(G):
            components = [c for c in nx.connected_components(G)]
            raise IncompatiblePatchError(
                'Patched residue bond graph is not a connected graph: %s' %
                str(components))
        # Make sure the patch has actually modified the residue
        if not modifications_made:
            raise IncompatiblePatchError('Patch did not modify residue.')

        return residue

    def patch_is_compatible(self, patch):
        """Determine whether a specified patch is compatible with this residue.

        Compatibility is determined by whether Residue.Template.apply_patch(patch) raises as
        exception or not.

        Parameters
        ----------
        patch : PatchTemplate
            The patch to be applied to this residue.

        Returns
        -------
        is_compatible : bool
            True if patch is compatible with the residue; False if not.

        """
        try:
            self.apply_patch(patch)
            return True
        except IncompatiblePatchError:
            return False

    def to_networkx(self, include_extra_particles=True):
        """ Create a NetworkX graph of atoms and bonds

        Parameters
        ----------
        include_extra_particles : bool
            Whether to include "atoms" that actually represent extra particles (atomic_number == 0).

        Returns
        -------
        G : :class:`networkx.Graph`
            A NetworkX Graph representing the molecule

        """
        import networkx
        G = networkx.Graph()
        for atom in self.atoms:
            if atom.atomic_number != 0 or include_extra_particles:
                G.add_node(atom.name, charge=atom.charge, type=atom.type)
        for bond in self.bonds:
            if (bond.atom1.atomic_number != 0 and
                    bond.atom2.atomic_number != 0) or include_extra_particles:
                G.add_edge(bond.atom1.name, bond.atom2.name)
        return G

    def to_dataframe(self):
        """ Create a pandas dataframe from the atom information

        Returns
        -------
        df : :class:`pandas.DataFrame`
            The pandas DataFrame with all of the atomic properties

        Notes
        -----
        The DataFrame will be over all atoms. The columns will be the attributes
        of the atom (as well as its containing residue). Some columns will
        *always* exist. Others will only exist if those attributes have been set
        on the Atom instances (see the :class:`Atom` docs for possible
        attributes and their meaning). The columns that will always be present
        are:

            - number : int
            - name : str
            - type : str
            - atomic_number : int
            - charge : float
            - mass : float
            - nb_idx : int
            - solvent_radius : float
            - screen : float
            - occupancy : float
            - bfactor : float
            - altloc : str
            - tree : str
            - join : int
            - irotat : int
            - rmin : float
            - epsilon : float
            - rmin_14 : float
            - epsilon_14 : float

        The following attributes are optionally present if they were present in
        the original file defining the structure:

            - xx : float (x-coordinate position)
            - xy : float (y-coordinate position)
            - xz : float (z-coordinate position)
            - vx : float (x-coordinate velocity)
            - vy : float (y-coordinate velocity)
            - vz : float (z-coordinate velocity)
        """
        import pandas as pd
        ret = pd.DataFrame()

        ret['number'] = [atom.number for atom in self.atoms]
        ret['name'] = [atom.name for atom in self.atoms]
        ret['type'] = [atom.type for atom in self.atoms]
        ret['atomic_number'] = [atom.atomic_number for atom in self.atoms]
        ret['charge'] = [atom.charge for atom in self.atoms]
        ret['mass'] = [atom.mass for atom in self.atoms]
        ret['nb_idx'] = [atom.nb_idx for atom in self.atoms]
        ret['solvent_radius'] = [atom.solvent_radius for atom in self.atoms]
        ret['screen'] = [atom.screen for atom in self.atoms]
        ret['occupancy'] = [atom.occupancy for atom in self.atoms]
        ret['bfactor'] = [atom.bfactor for atom in self.atoms]
        ret['altloc'] = [atom.altloc for atom in self.atoms]
        ret['tree'] = [atom.tree for atom in self.atoms]
        ret['join'] = [atom.join for atom in self.atoms]
        ret['irotat'] = [atom.irotat for atom in self.atoms]
        ret['rmin'] = [atom.rmin for atom in self.atoms]
        ret['epsilon'] = [atom.epsilon for atom in self.atoms]
        ret['rmin_14'] = [atom.rmin_14 for atom in self.atoms]
        ret['epsilon_14'] = [atom.epsilon_14 for atom in self.atoms]
        ret['resname'] = [atom.residue.name for atom in self.atoms]

        # Now for optional attributes
        # Coordinates
        try:
            coords = pd.DataFrame([[atom.xx, atom.xy, atom.xz]
                                   for atom in self.atoms],
                                  columns=['xx', 'xy', 'xz'])
        except AttributeError:
            pass
        else:
            ret = ret.join(coords)
        # Velocities
        try:
            vels = pd.DataFrame([[atom.vx, atom.vy, atom.vz]
                                 for atom in self.atoms],
                                columns=['vx', 'vy', 'vz'])
        except AttributeError:
            pass
        else:
            ret = ret.join(vels)
        return ret

    def to_structure(self):
        """
        Generates a Structure instance with a single residue from this
        ResidueTemplate

        Returns
        -------
        struct : :class:`parmed.structure.Structure`
            The Structure with all of the bonds and connectivity of this
            template
        """
        struct = Structure()
        for atom in self:
            struct.add_atom(_copy.copy(atom), self.name, 0)
        for bond in self.bonds:
            struct.bonds.append(
                Bond(struct.atoms[bond.atom1.idx],
                     struct.atoms[bond.atom2.idx]))
        return struct

    def save(self, fname, format=None, overwrite=False, **kwargs):
        """
        Saves the current ResidueTemplate in the requested file format.
        Supported formats can be specified explicitly or determined by file-name
        extension. The following formats are supported, with the recognized
        suffix shown in parentheses:

            - MOL2 (.mol2)
            - MOL3 (.mol3)
            - OFF (.lib/.off)
            - PDB (.pdb)
            - PQR (.pqr)

        Parameters
        ----------
        fname : str
            Name of the file to save. If ``format`` is ``None`` (see below), the
            file type will be determined based on the filename extension. If the
            type cannot be determined, a ValueError is raised.
        format : str, optional
            The case-insensitive keyword specifying what type of file ``fname``
            should be saved as. If ``None`` (default), the file type will be
            determined from filename extension of ``fname``
        overwrite : bool, optional
            If True, allow the target file to be overwritten. Otherwise, an
            IOError is raised if the file exists. Default is False
        kwargs : keyword-arguments
            Remaining arguments are passed on to the file writing routines that
            are called by this function

        Raises
        ------
        ValueError if either filename extension or ``format`` are not recognized
        TypeError if the structure cannot be converted to the desired format for
        whatever reason
        """
        from parmed.amber.offlib import AmberOFFLibrary
        from parmed.formats.mol2 import Mol2File
        extmap = {
            '.mol2': 'MOL2',
            '.mol3': 'MOL3',
            '.off': 'OFFLIB',
            '.lib': 'OFFLIB',
            '.pdb': 'PDB',
            '.pqr': 'PQR',
        }
        if format is not None:
            format = format.upper()
        else:
            base, ext = os.path.splitext(fname)
            if ext in ('.bz2', '.gz'):
                ext = os.path.splitext(base)[1]
            if ext in extmap:
                format = extmap[ext]
            else:
                raise ValueError('Could not determine file type of %s' % fname)
        if format == 'MOL2':
            Mol2File.write(self, fname, mol3=False, **kwargs)
        elif format == 'MOL3':
            Mol2File.write(self, fname, mol3=True, **kwargs)
        elif format in ('OFFLIB', 'OFF'):
            AmberOFFLibrary.write({self.name: self}, fname, **kwargs)
        elif format in ('PDB', 'PQR'):
            self.to_structure().save(fname,
                                     format=format,
                                     overwrite=overwrite,
                                     **kwargs)
        else:
            raise ValueError('Unrecognized format for ResidueTemplate save')