Example #1
0
class NeighborSearch(object):
    """Class for neighbor searching.

    This class can be used for two related purposes:

     1. To find all atoms/residues/chains/models/structures within radius
        of a given query position.
     2. To find all atoms/residues/chains/models/structures that are within
        a fixed radius of each other.

    NeighborSearch makes use of the KDTree class implemented in C for speed.
    """
    def __init__(self, atom_list, bucket_size=10):
        """Create the object.

        Arguments:
         - atom_list - list of atoms. This list is used in the queries.
           It can contain atoms from different structures.
         - bucket_size - bucket size of KD tree. You can play around
           with this to optimize speed if you feel like it.

        """
        from Bio.PDB.kdtrees import KDTree
        self.atom_list = atom_list
        # get the coordinates
        coord_list = [a.get_coord() for a in atom_list]
        # to Nx3 array of type float
        self.coords = numpy.array(coord_list, dtype="d")
        assert bucket_size > 1
        assert self.coords.shape[1] == 3
        self.kdt = KDTree(self.coords, bucket_size)

    # Private

    def _get_unique_parent_pairs(self, pair_list):
        # translate a list of (entity, entity) tuples to
        # a list of (parent entity, parent entity) tuples,
        # thereby removing duplicate (parent entity, parent entity)
        # pairs.
        # o pair_list - a list of (entity, entity) tuples
        parent_pair_list = []
        for (e1, e2) in pair_list:
            p1 = e1.get_parent()
            p2 = e2.get_parent()
            if p1 == p2:
                continue
            elif p1 < p2:
                parent_pair_list.append((p1, p2))
            else:
                parent_pair_list.append((p2, p1))
        return uniqueify(parent_pair_list)

    # Public

    def search(self, center, radius, level="A"):
        """Neighbor search.

        Return all atoms/residues/chains/models/structures
        that have at least one atom within radius of center.
        What entity level is returned (e.g. atoms or residues)
        is determined by level (A=atoms, R=residues, C=chains,
        M=models, S=structures).

        Arguments:
         - center - Numeric array
         - radius - float
         - level - char (A, R, C, M, S)

        """
        if level not in entity_levels:
            raise PDBException("%s: Unknown level" % level)
        center = numpy.require(center, dtype='d', requirements='C')
        if center.shape != (3, ):
            raise Exception("Expected a 3-dimensional NumPy array")
        points = self.kdt.search(center, radius)
        atom_list = [self.atom_list[point.index] for point in points]
        if level == "A":
            return atom_list
        else:
            return unfold_entities(atom_list, level)

    def search_all(self, radius, level="A"):
        """All neighbor search.

        Search all entities that have atoms pairs within
        radius.

        Arguments:
         - radius - float
         - level - char (A, R, C, M, S)

        """
        if level not in entity_levels:
            raise PDBException("%s: Unknown level" % level)
        neighbors = self.kdt.neighbor_search(radius)
        atom_list = self.atom_list
        atom_pair_list = []
        for neighbor in neighbors:
            i1 = neighbor.index1
            i2 = neighbor.index2
            a1 = atom_list[i1]
            a2 = atom_list[i2]
            atom_pair_list.append((a1, a2))
        if level == "A":
            # return atoms
            return atom_pair_list
        next_level_pair_list = atom_pair_list
        for l in ["R", "C", "M", "S"]:
            next_level_pair_list = self._get_unique_parent_pairs(
                next_level_pair_list)
            if level == l:
                return next_level_pair_list
Example #2
0
class KDTree(object):
    """An interface to Thomas Hamelryck's C KDTree module that can handle
    periodic boundary conditions.  Both point and pair search are performed
    using the single :meth:`search` method and results are retrieved using
    :meth:`getIndices` and :meth:`getDistances`.

    **Periodic Boundary Conditions**

    *Point search*

    A point search around a *center*, indicated with a question mark (``?``)
    below, involves making images of the point in cells sharing a wall or an
    edge with the unitcell that contains the system.  The search is performed
    for all images of the *center* (27 in 3-dimensional space) and unique
    indices with the minimum distance from them to the *center* are returned.
    ::

          _____________________________
         |        1|        2|        3|
         |       ? |       ? |      ?  |
         |_________|_________|_________|
         |        4|o  h h  5|        6| ? and H interact in periodic image 4
         |       ?H| h  o  ? |      ?  | but not in the original unitcell (5)
         |_________|_________|_________|
         |        7|        8|        9|
         |       ? |       ? |      ?  |
         |_________|_________|_________|

    There are two requirements for this approach to work: (i) the *center* must
    be in the original unitcell, and (ii) the system must be in the original
    unitcell with parts in its immediate periodic images.

    *Pair search*

    A pair search involves making 26 (or 8 in 2-d) replicas of the system
    coordinates.  A KDTree is built for the system (``O`` and ``H``) and all
    its replicas (``o`` and ``h``).  After pair search is performed, unique
    pairs of indices and minimum distance between them are returned.
    ::

          _____________________________
         |o  h h  1|o  h h  2|o  h h  3|
        h| h  o   h| h  o   h| h  o    |
         |_________|_________|_________|
         |o  h h  4|O  H H  5|o  h h  6|
        h| h  o   H| H  O   h| h  o    |
         |_________|_________|_________|
         |o  h h  7|o  h h  8|o  h h  9|
        h| h  o   h| h  o   h| h  o    |
         |_________|_________|_________|

    Only requirement for this approach to work is that the system must be
    in the original unitcell with parts in its immediate periodic images.


    .. seealso::
       :func:`.wrapAtoms` can be used for wrapping atoms into the single
       periodic image of the system."""
    def __init__(self, coords, **kwargs):
        """
        :arg coords: coordinate array with shape ``(N, 3)``, where N is number
            of atoms
        :type coords: :class:`numpy.ndarray`, :class:`.Atomic`, :class:`.Frame`

        :arg unitcell: orthorhombic unitcell dimension array with shape
            ``(3,)``
        :type unitcell: :class:`numpy.ndarray`

        :arg bucketsize: number of points per tree node, default is 10
        :type bucketsize: int"""

        unitcell = kwargs.get('unitcell')
        if not isinstance(coords, ndarray):
            if unitcell is None:
                try:
                    unitcell = coords.getUnitcell()
                except AttributeError:
                    pass
                else:
                    if unitcell is not None:
                        LOGGER.info('Unitcell information from {0} will be '
                                    'used.'.format(str(coords)))
            try:
                # using getCoords() because coords will be stored internally
                # and reused when needed, this will avoid unexpected results
                # due to changes made to coordinates externally
                coords = coords.getCoords()
            except AttributeError:
                raise TypeError('coords must be a Numpy array or must have '
                                'getCoords attribute')
        else:
            coords = coords.copy()

        if coords.ndim != 2:
            raise Exception('coords.ndim must be 2')
        if coords.shape[-1] != 3:
            raise Exception('coords.shape must be (N,3)')
        if coords.min() <= -1e6 or coords.max() >= 1e6:
            raise Exception('coords must be between -1e6 and 1e6')

        self._bucketsize = kwargs.get('bucketsize', 10)

        if not isinstance(self._bucketsize, int):
            raise TypeError('bucketsize must be an integer')
        if self._bucketsize < 1:
            raise ValueError('bucketsize must be a positive integer')

        self._coords = None
        self._unitcell = None
        self._neighbors = None
        if unitcell is None:
            self._kdtree = CKDTree(3, self._bucketsize)
            self._kdtree.set_data(coords)
        else:
            if not isinstance(unitcell, ndarray):
                raise TypeError('unitcell must be a Numpy array')
            if unitcell.shape != (3, ):
                raise ValueError('unitcell.shape must be (3,)')
            self._kdtree = CKDTree(3, self._bucketsize)
            self._kdtree.set_data(coords)
            self._coords = coords
            self._unitcell = unitcell
            self._replicate = REPLICATE * unitcell
            self._kdtree2 = None
            self._pbcdict = {}
            self._pbckeys = []
            self._n_atoms = coords.shape[0]
        self._none = kwargs.pop('none', lambda: None)
        try:
            self._none()
        except TypeError:
            raise TypeError('none argument must be callable')
        self._oncall = kwargs.pop('oncall', 'both')
        assert self._oncall in ('both', 'dist'), 'oncall must be both or dist'

    def __call__(self, radius, center=None):
        """Shorthand method for searching and retrieving results."""

        self.search(radius, center)
        if self._oncall == 'both':
            return self.getIndices(), self.getDistances()
        elif self._oncall == 'dist':
            return self.getDistances()

    def search(self, radius, center=None):
        """Search pairs within *radius* of each other or points within *radius*
        of *center*.

        :arg radius: distance (Å)
        :type radius: float

        :arg center: a point in Cartesian coordinate system
        :type center: :class:`numpy.ndarray`"""

        if not isinstance(radius, (float, int)):
            raise TypeError('radius must be a number')
        if radius <= 0:
            raise TypeError('radius must be a positive number')

        if center is not None:
            if not isinstance(center, ndarray):
                raise TypeError('center must be a Numpy array instance')
            if center.shape != (3, ):
                raise ValueError('center.shape must be (3,)')

            if self._unitcell is None:
                self._kdtree.search_center_radius(center, radius)
                self._neighbors = None

            else:
                kdtree = self._kdtree
                search = kdtree.search_center_radius
                get_radii = lambda: get_KDTree_radii(kdtree)
                get_indices = lambda: get_KDTree_indices(kdtree)
                get_count = kdtree.get_count

                _dict = {}
                _dict_get = _dict.get
                _dict_set = _dict.__setitem__
                for center in center + self._replicate:
                    search(center, radius)
                    if get_count():
                        [
                            _dict_set(i, min(r, _dict_get(i, 1e6)))
                            for i, r in zip(get_indices(), get_radii())
                        ]
                self._pbcdict = _dict
                self._pdbkeys = list(_dict)

        else:
            if self._unitcell is None:
                self._neighbors = self._kdtree.neighbor_search(radius)
            else:
                kdtree = self._kdtree2
                if kdtree is None:
                    coords = self._coords
                    coords = concatenate(
                        [coords + rep for rep in self._replicate])
                    kdtree = CKDTree(3, self._bucketsize)
                    kdtree.set_data(coords)
                    self._kdtree2 = kdtree
                n_atoms = len(self._coords)
                _dict = {}
                neighbors = kdtree.neighbor_search(radius)
                if kdtree.neighbor_get_count():
                    _get = _dict.get
                    _set = _dict.__setitem__

                    for nb in neighbors:
                        i = nb.index1 % n_atoms
                        j = nb.index2 % n_atoms
                        if i < j:
                            _set((i, j), min(nb.radius, _get((i, j), 1e6)))
                        elif j < i:
                            _set((j, i), min(nb.radius, _get((j, i), 1e6)))
                self._pbcdict = _dict
                self._pdbkeys = list(_dict)

    def getIndices(self):
        """Returns array of indices for points or pairs, depending on the type
        of the most recent search."""

        if self.getCount():
            if self._unitcell is None:
                if self._neighbors is None:
                    return get_KDTree_indices(self._kdtree)
                else:
                    return array([(n.index1, n.index2)
                                  for n in self._neighbors], int)
            else:
                return array(self._pdbkeys)
        return self._none()

    def getDistances(self):
        """Returns array of distances."""

        if self.getCount():
            if self._unitcell is None:
                if self._neighbors is None:
                    return get_KDTree_radii(self._kdtree)
                else:
                    return array([n.radius for n in self._neighbors])
            else:
                _dict = self._pbcdict
                return array([_dict[i] for i in self._pdbkeys])
        return self._none()

    def getCount(self):
        """Returns number of points or pairs."""

        if self._unitcell is None:
            if self._neighbors is None:
                return self._kdtree.get_count()
            else:
                return self._kdtree.neighbor_get_count()
        else:
            return len(self._pbcdict)
Example #3
0
class NeighborSearch(object):
    """Class for neighbor searching.

    This class can be used for two related purposes:

     1. To find all atoms/residues/chains/models/structures within radius
        of a given query position.
     2. To find all atoms/residues/chains/models/structures that are within
        a fixed radius of each other.

    NeighborSearch makes use of the KDTree class implemented in C for speed.
    """

    def __init__(self, atom_list, bucket_size=10):
        """Create the object.

        Arguments:
         - atom_list - list of atoms. This list is used in the queries.
           It can contain atoms from different structures.
         - bucket_size - bucket size of KD tree. You can play around
           with this to optimize speed if you feel like it.

        """
        from Bio.PDB.kdtrees import KDTree
        self.atom_list = atom_list
        # get the coordinates
        coord_list = [a.get_coord() for a in atom_list]
        # to Nx3 array of type float
        self.coords = numpy.array(coord_list, dtype="d")
        assert bucket_size > 1
        assert self.coords.shape[1] == 3
        self.kdt = KDTree(self.coords, bucket_size)

    # Private

    def _get_unique_parent_pairs(self, pair_list):
        # translate a list of (entity, entity) tuples to
        # a list of (parent entity, parent entity) tuples,
        # thereby removing duplicate (parent entity, parent entity)
        # pairs.
        # o pair_list - a list of (entity, entity) tuples
        parent_pair_list = []
        for (e1, e2) in pair_list:
            p1 = e1.get_parent()
            p2 = e2.get_parent()
            if p1 == p2:
                continue
            elif p1 < p2:
                parent_pair_list.append((p1, p2))
            else:
                parent_pair_list.append((p2, p1))
        return uniqueify(parent_pair_list)

    # Public

    def search(self, center, radius, level="A"):
        """Neighbor search.

        Return all atoms/residues/chains/models/structures
        that have at least one atom within radius of center.
        What entity level is returned (e.g. atoms or residues)
        is determined by level (A=atoms, R=residues, C=chains,
        M=models, S=structures).

        Arguments:
         - center - Numeric array
         - radius - float
         - level - char (A, R, C, M, S)

        """
        if level not in entity_levels:
            raise PDBException("%s: Unknown level" % level)
        center = numpy.require(center, dtype='d', requirements='C')
        if center.shape != (3,):
            raise Exception("Expected a 3-dimensional NumPy array")
        points = self.kdt.search(center, radius)
        atom_list = [self.atom_list[point.index] for point in points]
        if level == "A":
            return atom_list
        else:
            return unfold_entities(atom_list, level)

    def search_all(self, radius, level="A"):
        """All neighbor search.

        Search all entities that have atoms pairs within
        radius.

        Arguments:
         - radius - float
         - level - char (A, R, C, M, S)

        """
        if level not in entity_levels:
            raise PDBException("%s: Unknown level" % level)
        neighbors = self.kdt.neighbor_search(radius)
        atom_list = self.atom_list
        atom_pair_list = []
        for neighbor in neighbors:
            i1 = neighbor.index1
            i2 = neighbor.index2
            a1 = atom_list[i1]
            a2 = atom_list[i2]
            atom_pair_list.append((a1, a2))
        if level == "A":
            # return atoms
            return atom_pair_list
        next_level_pair_list = atom_pair_list
        for l in ["R", "C", "M", "S"]:
            next_level_pair_list = self._get_unique_parent_pairs(next_level_pair_list)
            if level == l:
                return next_level_pair_list
Example #4
0
    def search(self, radius, center=None):
        """Search pairs within *radius* of each other or points within *radius*
        of *center*.

        :arg radius: distance (Å)
        :type radius: float

        :arg center: a point in Cartesian coordinate system
        :type center: :class:`numpy.ndarray`"""

        if not isinstance(radius, (float, int)):
            raise TypeError('radius must be a number')
        if radius <= 0:
            raise TypeError('radius must be a positive number')

        if center is not None:
            if not isinstance(center, ndarray):
                raise TypeError('center must be a Numpy array instance')
            if center.shape != (3, ):
                raise ValueError('center.shape must be (3,)')

            if self._unitcell is None:
                self._kdtree.search_center_radius(center, radius)
                self._neighbors = None

            else:
                kdtree = self._kdtree
                search = kdtree.search_center_radius
                get_radii = lambda: get_KDTree_radii(kdtree)
                get_indices = lambda: get_KDTree_indices(kdtree)
                get_count = kdtree.get_count

                _dict = {}
                _dict_get = _dict.get
                _dict_set = _dict.__setitem__
                for center in center + self._replicate:
                    search(center, radius)
                    if get_count():
                        [
                            _dict_set(i, min(r, _dict_get(i, 1e6)))
                            for i, r in zip(get_indices(), get_radii())
                        ]
                self._pbcdict = _dict
                self._pdbkeys = list(_dict)

        else:
            if self._unitcell is None:
                self._neighbors = self._kdtree.neighbor_search(radius)
            else:
                kdtree = self._kdtree2
                if kdtree is None:
                    coords = self._coords
                    coords = concatenate(
                        [coords + rep for rep in self._replicate])
                    kdtree = CKDTree(3, self._bucketsize)
                    kdtree.set_data(coords)
                    self._kdtree2 = kdtree
                n_atoms = len(self._coords)
                _dict = {}
                neighbors = kdtree.neighbor_search(radius)
                if kdtree.neighbor_get_count():
                    _get = _dict.get
                    _set = _dict.__setitem__

                    for nb in neighbors:
                        i = nb.index1 % n_atoms
                        j = nb.index2 % n_atoms
                        if i < j:
                            _set((i, j), min(nb.radius, _get((i, j), 1e6)))
                        elif j < i:
                            _set((j, i), min(nb.radius, _get((j, i), 1e6)))
                self._pbcdict = _dict
                self._pdbkeys = list(_dict)