class NeighborSearch(object): """Class for neighbor searching. This class can be used for two related purposes: 1. To find all atoms/residues/chains/models/structures within radius of a given query position. 2. To find all atoms/residues/chains/models/structures that are within a fixed radius of each other. NeighborSearch makes use of the KDTree class implemented in C for speed. """ def __init__(self, atom_list, bucket_size=10): """Create the object. Arguments: - atom_list - list of atoms. This list is used in the queries. It can contain atoms from different structures. - bucket_size - bucket size of KD tree. You can play around with this to optimize speed if you feel like it. """ from Bio.PDB.kdtrees import KDTree self.atom_list = atom_list # get the coordinates coord_list = [a.get_coord() for a in atom_list] # to Nx3 array of type float self.coords = numpy.array(coord_list, dtype="d") assert bucket_size > 1 assert self.coords.shape[1] == 3 self.kdt = KDTree(self.coords, bucket_size) # Private def _get_unique_parent_pairs(self, pair_list): # translate a list of (entity, entity) tuples to # a list of (parent entity, parent entity) tuples, # thereby removing duplicate (parent entity, parent entity) # pairs. # o pair_list - a list of (entity, entity) tuples parent_pair_list = [] for (e1, e2) in pair_list: p1 = e1.get_parent() p2 = e2.get_parent() if p1 == p2: continue elif p1 < p2: parent_pair_list.append((p1, p2)) else: parent_pair_list.append((p2, p1)) return uniqueify(parent_pair_list) # Public def search(self, center, radius, level="A"): """Neighbor search. Return all atoms/residues/chains/models/structures that have at least one atom within radius of center. What entity level is returned (e.g. atoms or residues) is determined by level (A=atoms, R=residues, C=chains, M=models, S=structures). Arguments: - center - Numeric array - radius - float - level - char (A, R, C, M, S) """ if level not in entity_levels: raise PDBException("%s: Unknown level" % level) center = numpy.require(center, dtype='d', requirements='C') if center.shape != (3, ): raise Exception("Expected a 3-dimensional NumPy array") points = self.kdt.search(center, radius) atom_list = [self.atom_list[point.index] for point in points] if level == "A": return atom_list else: return unfold_entities(atom_list, level) def search_all(self, radius, level="A"): """All neighbor search. Search all entities that have atoms pairs within radius. Arguments: - radius - float - level - char (A, R, C, M, S) """ if level not in entity_levels: raise PDBException("%s: Unknown level" % level) neighbors = self.kdt.neighbor_search(radius) atom_list = self.atom_list atom_pair_list = [] for neighbor in neighbors: i1 = neighbor.index1 i2 = neighbor.index2 a1 = atom_list[i1] a2 = atom_list[i2] atom_pair_list.append((a1, a2)) if level == "A": # return atoms return atom_pair_list next_level_pair_list = atom_pair_list for l in ["R", "C", "M", "S"]: next_level_pair_list = self._get_unique_parent_pairs( next_level_pair_list) if level == l: return next_level_pair_list
class KDTree(object): """An interface to Thomas Hamelryck's C KDTree module that can handle periodic boundary conditions. Both point and pair search are performed using the single :meth:`search` method and results are retrieved using :meth:`getIndices` and :meth:`getDistances`. **Periodic Boundary Conditions** *Point search* A point search around a *center*, indicated with a question mark (``?``) below, involves making images of the point in cells sharing a wall or an edge with the unitcell that contains the system. The search is performed for all images of the *center* (27 in 3-dimensional space) and unique indices with the minimum distance from them to the *center* are returned. :: _____________________________ | 1| 2| 3| | ? | ? | ? | |_________|_________|_________| | 4|o h h 5| 6| ? and H interact in periodic image 4 | ?H| h o ? | ? | but not in the original unitcell (5) |_________|_________|_________| | 7| 8| 9| | ? | ? | ? | |_________|_________|_________| There are two requirements for this approach to work: (i) the *center* must be in the original unitcell, and (ii) the system must be in the original unitcell with parts in its immediate periodic images. *Pair search* A pair search involves making 26 (or 8 in 2-d) replicas of the system coordinates. A KDTree is built for the system (``O`` and ``H``) and all its replicas (``o`` and ``h``). After pair search is performed, unique pairs of indices and minimum distance between them are returned. :: _____________________________ |o h h 1|o h h 2|o h h 3| h| h o h| h o h| h o | |_________|_________|_________| |o h h 4|O H H 5|o h h 6| h| h o H| H O h| h o | |_________|_________|_________| |o h h 7|o h h 8|o h h 9| h| h o h| h o h| h o | |_________|_________|_________| Only requirement for this approach to work is that the system must be in the original unitcell with parts in its immediate periodic images. .. seealso:: :func:`.wrapAtoms` can be used for wrapping atoms into the single periodic image of the system.""" def __init__(self, coords, **kwargs): """ :arg coords: coordinate array with shape ``(N, 3)``, where N is number of atoms :type coords: :class:`numpy.ndarray`, :class:`.Atomic`, :class:`.Frame` :arg unitcell: orthorhombic unitcell dimension array with shape ``(3,)`` :type unitcell: :class:`numpy.ndarray` :arg bucketsize: number of points per tree node, default is 10 :type bucketsize: int""" unitcell = kwargs.get('unitcell') if not isinstance(coords, ndarray): if unitcell is None: try: unitcell = coords.getUnitcell() except AttributeError: pass else: if unitcell is not None: LOGGER.info('Unitcell information from {0} will be ' 'used.'.format(str(coords))) try: # using getCoords() because coords will be stored internally # and reused when needed, this will avoid unexpected results # due to changes made to coordinates externally coords = coords.getCoords() except AttributeError: raise TypeError('coords must be a Numpy array or must have ' 'getCoords attribute') else: coords = coords.copy() if coords.ndim != 2: raise Exception('coords.ndim must be 2') if coords.shape[-1] != 3: raise Exception('coords.shape must be (N,3)') if coords.min() <= -1e6 or coords.max() >= 1e6: raise Exception('coords must be between -1e6 and 1e6') self._bucketsize = kwargs.get('bucketsize', 10) if not isinstance(self._bucketsize, int): raise TypeError('bucketsize must be an integer') if self._bucketsize < 1: raise ValueError('bucketsize must be a positive integer') self._coords = None self._unitcell = None self._neighbors = None if unitcell is None: self._kdtree = CKDTree(3, self._bucketsize) self._kdtree.set_data(coords) else: if not isinstance(unitcell, ndarray): raise TypeError('unitcell must be a Numpy array') if unitcell.shape != (3, ): raise ValueError('unitcell.shape must be (3,)') self._kdtree = CKDTree(3, self._bucketsize) self._kdtree.set_data(coords) self._coords = coords self._unitcell = unitcell self._replicate = REPLICATE * unitcell self._kdtree2 = None self._pbcdict = {} self._pbckeys = [] self._n_atoms = coords.shape[0] self._none = kwargs.pop('none', lambda: None) try: self._none() except TypeError: raise TypeError('none argument must be callable') self._oncall = kwargs.pop('oncall', 'both') assert self._oncall in ('both', 'dist'), 'oncall must be both or dist' def __call__(self, radius, center=None): """Shorthand method for searching and retrieving results.""" self.search(radius, center) if self._oncall == 'both': return self.getIndices(), self.getDistances() elif self._oncall == 'dist': return self.getDistances() def search(self, radius, center=None): """Search pairs within *radius* of each other or points within *radius* of *center*. :arg radius: distance (Å) :type radius: float :arg center: a point in Cartesian coordinate system :type center: :class:`numpy.ndarray`""" if not isinstance(radius, (float, int)): raise TypeError('radius must be a number') if radius <= 0: raise TypeError('radius must be a positive number') if center is not None: if not isinstance(center, ndarray): raise TypeError('center must be a Numpy array instance') if center.shape != (3, ): raise ValueError('center.shape must be (3,)') if self._unitcell is None: self._kdtree.search_center_radius(center, radius) self._neighbors = None else: kdtree = self._kdtree search = kdtree.search_center_radius get_radii = lambda: get_KDTree_radii(kdtree) get_indices = lambda: get_KDTree_indices(kdtree) get_count = kdtree.get_count _dict = {} _dict_get = _dict.get _dict_set = _dict.__setitem__ for center in center + self._replicate: search(center, radius) if get_count(): [ _dict_set(i, min(r, _dict_get(i, 1e6))) for i, r in zip(get_indices(), get_radii()) ] self._pbcdict = _dict self._pdbkeys = list(_dict) else: if self._unitcell is None: self._neighbors = self._kdtree.neighbor_search(radius) else: kdtree = self._kdtree2 if kdtree is None: coords = self._coords coords = concatenate( [coords + rep for rep in self._replicate]) kdtree = CKDTree(3, self._bucketsize) kdtree.set_data(coords) self._kdtree2 = kdtree n_atoms = len(self._coords) _dict = {} neighbors = kdtree.neighbor_search(radius) if kdtree.neighbor_get_count(): _get = _dict.get _set = _dict.__setitem__ for nb in neighbors: i = nb.index1 % n_atoms j = nb.index2 % n_atoms if i < j: _set((i, j), min(nb.radius, _get((i, j), 1e6))) elif j < i: _set((j, i), min(nb.radius, _get((j, i), 1e6))) self._pbcdict = _dict self._pdbkeys = list(_dict) def getIndices(self): """Returns array of indices for points or pairs, depending on the type of the most recent search.""" if self.getCount(): if self._unitcell is None: if self._neighbors is None: return get_KDTree_indices(self._kdtree) else: return array([(n.index1, n.index2) for n in self._neighbors], int) else: return array(self._pdbkeys) return self._none() def getDistances(self): """Returns array of distances.""" if self.getCount(): if self._unitcell is None: if self._neighbors is None: return get_KDTree_radii(self._kdtree) else: return array([n.radius for n in self._neighbors]) else: _dict = self._pbcdict return array([_dict[i] for i in self._pdbkeys]) return self._none() def getCount(self): """Returns number of points or pairs.""" if self._unitcell is None: if self._neighbors is None: return self._kdtree.get_count() else: return self._kdtree.neighbor_get_count() else: return len(self._pbcdict)
class NeighborSearch(object): """Class for neighbor searching. This class can be used for two related purposes: 1. To find all atoms/residues/chains/models/structures within radius of a given query position. 2. To find all atoms/residues/chains/models/structures that are within a fixed radius of each other. NeighborSearch makes use of the KDTree class implemented in C for speed. """ def __init__(self, atom_list, bucket_size=10): """Create the object. Arguments: - atom_list - list of atoms. This list is used in the queries. It can contain atoms from different structures. - bucket_size - bucket size of KD tree. You can play around with this to optimize speed if you feel like it. """ from Bio.PDB.kdtrees import KDTree self.atom_list = atom_list # get the coordinates coord_list = [a.get_coord() for a in atom_list] # to Nx3 array of type float self.coords = numpy.array(coord_list, dtype="d") assert bucket_size > 1 assert self.coords.shape[1] == 3 self.kdt = KDTree(self.coords, bucket_size) # Private def _get_unique_parent_pairs(self, pair_list): # translate a list of (entity, entity) tuples to # a list of (parent entity, parent entity) tuples, # thereby removing duplicate (parent entity, parent entity) # pairs. # o pair_list - a list of (entity, entity) tuples parent_pair_list = [] for (e1, e2) in pair_list: p1 = e1.get_parent() p2 = e2.get_parent() if p1 == p2: continue elif p1 < p2: parent_pair_list.append((p1, p2)) else: parent_pair_list.append((p2, p1)) return uniqueify(parent_pair_list) # Public def search(self, center, radius, level="A"): """Neighbor search. Return all atoms/residues/chains/models/structures that have at least one atom within radius of center. What entity level is returned (e.g. atoms or residues) is determined by level (A=atoms, R=residues, C=chains, M=models, S=structures). Arguments: - center - Numeric array - radius - float - level - char (A, R, C, M, S) """ if level not in entity_levels: raise PDBException("%s: Unknown level" % level) center = numpy.require(center, dtype='d', requirements='C') if center.shape != (3,): raise Exception("Expected a 3-dimensional NumPy array") points = self.kdt.search(center, radius) atom_list = [self.atom_list[point.index] for point in points] if level == "A": return atom_list else: return unfold_entities(atom_list, level) def search_all(self, radius, level="A"): """All neighbor search. Search all entities that have atoms pairs within radius. Arguments: - radius - float - level - char (A, R, C, M, S) """ if level not in entity_levels: raise PDBException("%s: Unknown level" % level) neighbors = self.kdt.neighbor_search(radius) atom_list = self.atom_list atom_pair_list = [] for neighbor in neighbors: i1 = neighbor.index1 i2 = neighbor.index2 a1 = atom_list[i1] a2 = atom_list[i2] atom_pair_list.append((a1, a2)) if level == "A": # return atoms return atom_pair_list next_level_pair_list = atom_pair_list for l in ["R", "C", "M", "S"]: next_level_pair_list = self._get_unique_parent_pairs(next_level_pair_list) if level == l: return next_level_pair_list
def search(self, radius, center=None): """Search pairs within *radius* of each other or points within *radius* of *center*. :arg radius: distance (Å) :type radius: float :arg center: a point in Cartesian coordinate system :type center: :class:`numpy.ndarray`""" if not isinstance(radius, (float, int)): raise TypeError('radius must be a number') if radius <= 0: raise TypeError('radius must be a positive number') if center is not None: if not isinstance(center, ndarray): raise TypeError('center must be a Numpy array instance') if center.shape != (3, ): raise ValueError('center.shape must be (3,)') if self._unitcell is None: self._kdtree.search_center_radius(center, radius) self._neighbors = None else: kdtree = self._kdtree search = kdtree.search_center_radius get_radii = lambda: get_KDTree_radii(kdtree) get_indices = lambda: get_KDTree_indices(kdtree) get_count = kdtree.get_count _dict = {} _dict_get = _dict.get _dict_set = _dict.__setitem__ for center in center + self._replicate: search(center, radius) if get_count(): [ _dict_set(i, min(r, _dict_get(i, 1e6))) for i, r in zip(get_indices(), get_radii()) ] self._pbcdict = _dict self._pdbkeys = list(_dict) else: if self._unitcell is None: self._neighbors = self._kdtree.neighbor_search(radius) else: kdtree = self._kdtree2 if kdtree is None: coords = self._coords coords = concatenate( [coords + rep for rep in self._replicate]) kdtree = CKDTree(3, self._bucketsize) kdtree.set_data(coords) self._kdtree2 = kdtree n_atoms = len(self._coords) _dict = {} neighbors = kdtree.neighbor_search(radius) if kdtree.neighbor_get_count(): _get = _dict.get _set = _dict.__setitem__ for nb in neighbors: i = nb.index1 % n_atoms j = nb.index2 % n_atoms if i < j: _set((i, j), min(nb.radius, _get((i, j), 1e6))) elif j < i: _set((j, i), min(nb.radius, _get((j, i), 1e6))) self._pbcdict = _dict self._pdbkeys = list(_dict)