def __init__(self, atom_list, bucket_size=10): """Create the object. Arguments: - atom_list - list of atoms. This list is used in the queries. It can contain atoms from different structures. - bucket_size - bucket size of KD tree. You can play around with this to optimize speed if you feel like it. """ from Bio.PDB.kdtrees import KDTree self.atom_list = atom_list # get the coordinates coord_list = [a.get_coord() for a in atom_list] # to Nx3 array of type float self.coords = numpy.array(coord_list, dtype="d") assert bucket_size > 1 assert self.coords.shape[1] == 3 self.kdt = KDTree(self.coords, bucket_size)
class NeighborSearch(object): """Class for neighbor searching. This class can be used for two related purposes: 1. To find all atoms/residues/chains/models/structures within radius of a given query position. 2. To find all atoms/residues/chains/models/structures that are within a fixed radius of each other. NeighborSearch makes use of the KDTree class implemented in C for speed. """ def __init__(self, atom_list, bucket_size=10): """Create the object. Arguments: - atom_list - list of atoms. This list is used in the queries. It can contain atoms from different structures. - bucket_size - bucket size of KD tree. You can play around with this to optimize speed if you feel like it. """ from Bio.PDB.kdtrees import KDTree self.atom_list = atom_list # get the coordinates coord_list = [a.get_coord() for a in atom_list] # to Nx3 array of type float self.coords = numpy.array(coord_list, dtype="d") assert bucket_size > 1 assert self.coords.shape[1] == 3 self.kdt = KDTree(self.coords, bucket_size) # Private def _get_unique_parent_pairs(self, pair_list): # translate a list of (entity, entity) tuples to # a list of (parent entity, parent entity) tuples, # thereby removing duplicate (parent entity, parent entity) # pairs. # o pair_list - a list of (entity, entity) tuples parent_pair_list = [] for (e1, e2) in pair_list: p1 = e1.get_parent() p2 = e2.get_parent() if p1 == p2: continue elif p1 < p2: parent_pair_list.append((p1, p2)) else: parent_pair_list.append((p2, p1)) return uniqueify(parent_pair_list) # Public def search(self, center, radius, level="A"): """Neighbor search. Return all atoms/residues/chains/models/structures that have at least one atom within radius of center. What entity level is returned (e.g. atoms or residues) is determined by level (A=atoms, R=residues, C=chains, M=models, S=structures). Arguments: - center - Numeric array - radius - float - level - char (A, R, C, M, S) """ if level not in entity_levels: raise PDBException("%s: Unknown level" % level) center = numpy.require(center, dtype='d', requirements='C') if center.shape != (3, ): raise Exception("Expected a 3-dimensional NumPy array") points = self.kdt.search(center, radius) atom_list = [self.atom_list[point.index] for point in points] if level == "A": return atom_list else: return unfold_entities(atom_list, level) def search_all(self, radius, level="A"): """All neighbor search. Search all entities that have atoms pairs within radius. Arguments: - radius - float - level - char (A, R, C, M, S) """ if level not in entity_levels: raise PDBException("%s: Unknown level" % level) neighbors = self.kdt.neighbor_search(radius) atom_list = self.atom_list atom_pair_list = [] for neighbor in neighbors: i1 = neighbor.index1 i2 = neighbor.index2 a1 = atom_list[i1] a2 = atom_list[i2] atom_pair_list.append((a1, a2)) if level == "A": # return atoms return atom_pair_list next_level_pair_list = atom_pair_list for l in ["R", "C", "M", "S"]: next_level_pair_list = self._get_unique_parent_pairs( next_level_pair_list) if level == l: return next_level_pair_list
def compute(self, entity, level="A"): """Calculate surface accessibility surface area for an entity. The resulting atomic surface accessibility values are attached to the .sasa attribute of each entity (or atom), depending on the level. For example, if level="R", all residues will have a .sasa attribute. Atoms will always be assigned a .sasa attribute with their individual values. :param entity: input entity. :type entity: Bio.PDB.Entity, e.g. Residue, Chain, ... :param level: the level at which ASA values are assigned, which can be one of "A" (Atom), "R" (Residue), "C" (Chain), "M" (Model), or "S" (Structure). The ASA value of an entity is the sum of all ASA values of its children. Defaults to "A". :type entity: Bio.PDB.Entity >>> from Bio.PDB import PDBParser >>> from Bio.PDB.SASA import ShrakeRupley >>> p = PDBParser(QUIET=1) >>> # This assumes you have a local copy of 1LCD.pdb in a directory called "PDB" >>> struct = p.get_structure("1LCD", "PDB/1LCD.pdb") >>> sr = ShrakeRupley() >>> sr.compute(struct, level="S") >>> print(round(struct.sasa, 2)) 7053.43 >>> print(round(struct[0]["A"][11]["OE1"].sasa, 2)) 9.64 """ is_valid = hasattr(entity, "level") and entity.level in {"R", "C", "M", "S"} if not is_valid: raise ValueError(f"Invalid entity type '{type(entity)}'. " "Must be Residue, Chain, Model, or Structure") if level not in _ENTITY_HIERARCHY: raise ValueError( f"Invalid level '{level}'. Must be A, R, C, M, or S.") elif _ENTITY_HIERARCHY[level] > _ENTITY_HIERARCHY[entity.level]: raise ValueError( f"Level '{level}' must be equal or smaller than input entity: {entity.level}" ) # Get atoms onto list for lookup atoms = list(entity.get_atoms()) n_atoms = len(atoms) if not n_atoms: raise ValueError("Entity has no child atoms.") # Get coordinates as a numpy array # We trust DisorderedAtom and friends to pick representatives. coords = np.array([a.coord for a in atoms], dtype=np.float64) # Pre-compute atom neighbors using KDTree kdt = KDTree(coords, 10) # Pre-compute radius * probe table radii_dict = self.radii_dict radii = np.array([radii_dict[a.element] for a in atoms], dtype=np.float64) radii += self.probe_radius twice_maxradii = np.max(radii) * 2 # Calculate ASAs asa_array = np.zeros((n_atoms, 1), dtype=np.int) ptset = set(range(self.n_points)) for i in range(n_atoms): r_i = radii[i] # Move sphere to atom s_on_i = (np.array(self._sphere, copy=True) * r_i) + coords[i] available_set = ptset.copy() # KDtree for sphere points kdt_sphere = KDTree(s_on_i, 10) # Iterate over neighbors of atom i for jj in kdt.search(coords[i], twice_maxradii): j = jj.index if i == j: continue if jj.radius < (r_i + radii[j]): # Remove overlapping points on sphere from available set available_set -= { pt.index for pt in kdt_sphere.search(coords[j], radii[j]) } asa_array[i] = len(available_set) # update counts # Convert accessible point count to surface area in A**2 f = radii * radii * (4 * np.pi / self.n_points) asa_array = asa_array * f[:, np.newaxis] # Set atom .sasa for i, atom in enumerate(atoms): atom.sasa = asa_array[i, 0] # Aggregate values per entity level if necessary if level != "A": entities = set(atoms) target = _ENTITY_HIERARCHY[level] for _ in range(target): entities = {e.parent for e in entities} atomdict = {a.full_id: idx for idx, a in enumerate(atoms)} for e in entities: e_atoms = [atomdict[a.full_id] for a in e.get_atoms()] e.sasa = asa_array[e_atoms].sum()
class NeighborSearch(object): """Class for neighbor searching. This class can be used for two related purposes: 1. To find all atoms/residues/chains/models/structures within radius of a given query position. 2. To find all atoms/residues/chains/models/structures that are within a fixed radius of each other. NeighborSearch makes use of the KDTree class implemented in C for speed. """ def __init__(self, atom_list, bucket_size=10): """Create the object. Arguments: - atom_list - list of atoms. This list is used in the queries. It can contain atoms from different structures. - bucket_size - bucket size of KD tree. You can play around with this to optimize speed if you feel like it. """ from Bio.PDB.kdtrees import KDTree self.atom_list = atom_list # get the coordinates coord_list = [a.get_coord() for a in atom_list] # to Nx3 array of type float self.coords = numpy.array(coord_list, dtype="d") assert bucket_size > 1 assert self.coords.shape[1] == 3 self.kdt = KDTree(self.coords, bucket_size) # Private def _get_unique_parent_pairs(self, pair_list): # translate a list of (entity, entity) tuples to # a list of (parent entity, parent entity) tuples, # thereby removing duplicate (parent entity, parent entity) # pairs. # o pair_list - a list of (entity, entity) tuples parent_pair_list = [] for (e1, e2) in pair_list: p1 = e1.get_parent() p2 = e2.get_parent() if p1 == p2: continue elif p1 < p2: parent_pair_list.append((p1, p2)) else: parent_pair_list.append((p2, p1)) return uniqueify(parent_pair_list) # Public def search(self, center, radius, level="A"): """Neighbor search. Return all atoms/residues/chains/models/structures that have at least one atom within radius of center. What entity level is returned (e.g. atoms or residues) is determined by level (A=atoms, R=residues, C=chains, M=models, S=structures). Arguments: - center - Numeric array - radius - float - level - char (A, R, C, M, S) """ if level not in entity_levels: raise PDBException("%s: Unknown level" % level) center = numpy.require(center, dtype='d', requirements='C') if center.shape != (3,): raise Exception("Expected a 3-dimensional NumPy array") points = self.kdt.search(center, radius) atom_list = [self.atom_list[point.index] for point in points] if level == "A": return atom_list else: return unfold_entities(atom_list, level) def search_all(self, radius, level="A"): """All neighbor search. Search all entities that have atoms pairs within radius. Arguments: - radius - float - level - char (A, R, C, M, S) """ if level not in entity_levels: raise PDBException("%s: Unknown level" % level) neighbors = self.kdt.neighbor_search(radius) atom_list = self.atom_list atom_pair_list = [] for neighbor in neighbors: i1 = neighbor.index1 i2 = neighbor.index2 a1 = atom_list[i1] a2 = atom_list[i2] atom_pair_list.append((a1, a2)) if level == "A": # return atoms return atom_pair_list next_level_pair_list = atom_pair_list for l in ["R", "C", "M", "S"]: next_level_pair_list = self._get_unique_parent_pairs(next_level_pair_list) if level == l: return next_level_pair_list
def __init__(self, coords, **kwargs): """ :arg coords: coordinate array with shape ``(N, 3)``, where N is number of atoms :type coords: :class:`numpy.ndarray`, :class:`.Atomic`, :class:`.Frame` :arg unitcell: orthorhombic unitcell dimension array with shape ``(3,)`` :type unitcell: :class:`numpy.ndarray` :arg bucketsize: number of points per tree node, default is 10 :type bucketsize: int""" unitcell = kwargs.get('unitcell') if not isinstance(coords, ndarray): if unitcell is None: try: unitcell = coords.getUnitcell() except AttributeError: pass else: if unitcell is not None: LOGGER.info('Unitcell information from {0} will be ' 'used.'.format(str(coords))) try: # using getCoords() because coords will be stored internally # and reused when needed, this will avoid unexpected results # due to changes made to coordinates externally coords = coords.getCoords() except AttributeError: raise TypeError('coords must be a Numpy array or must have ' 'getCoords attribute') else: coords = coords.copy() if coords.ndim != 2: raise Exception('coords.ndim must be 2') if coords.shape[-1] != 3: raise Exception('coords.shape must be (N,3)') if coords.min() <= -1e6 or coords.max() >= 1e6: raise Exception('coords must be between -1e6 and 1e6') self._bucketsize = kwargs.get('bucketsize', 10) if not isinstance(self._bucketsize, int): raise TypeError('bucketsize must be an integer') if self._bucketsize < 1: raise ValueError('bucketsize must be a positive integer') self._coords = None self._unitcell = None self._neighbors = None if unitcell is None: self._kdtree = CKDTree(3, self._bucketsize) self._kdtree.set_data(coords) else: if not isinstance(unitcell, ndarray): raise TypeError('unitcell must be a Numpy array') if unitcell.shape != (3, ): raise ValueError('unitcell.shape must be (3,)') self._kdtree = CKDTree(3, self._bucketsize) self._kdtree.set_data(coords) self._coords = coords self._unitcell = unitcell self._replicate = REPLICATE * unitcell self._kdtree2 = None self._pbcdict = {} self._pbckeys = [] self._n_atoms = coords.shape[0] self._none = kwargs.pop('none', lambda: None) try: self._none() except TypeError: raise TypeError('none argument must be callable') self._oncall = kwargs.pop('oncall', 'both') assert self._oncall in ('both', 'dist'), 'oncall must be both or dist'
class KDTree(object): """An interface to Thomas Hamelryck's C KDTree module that can handle periodic boundary conditions. Both point and pair search are performed using the single :meth:`search` method and results are retrieved using :meth:`getIndices` and :meth:`getDistances`. **Periodic Boundary Conditions** *Point search* A point search around a *center*, indicated with a question mark (``?``) below, involves making images of the point in cells sharing a wall or an edge with the unitcell that contains the system. The search is performed for all images of the *center* (27 in 3-dimensional space) and unique indices with the minimum distance from them to the *center* are returned. :: _____________________________ | 1| 2| 3| | ? | ? | ? | |_________|_________|_________| | 4|o h h 5| 6| ? and H interact in periodic image 4 | ?H| h o ? | ? | but not in the original unitcell (5) |_________|_________|_________| | 7| 8| 9| | ? | ? | ? | |_________|_________|_________| There are two requirements for this approach to work: (i) the *center* must be in the original unitcell, and (ii) the system must be in the original unitcell with parts in its immediate periodic images. *Pair search* A pair search involves making 26 (or 8 in 2-d) replicas of the system coordinates. A KDTree is built for the system (``O`` and ``H``) and all its replicas (``o`` and ``h``). After pair search is performed, unique pairs of indices and minimum distance between them are returned. :: _____________________________ |o h h 1|o h h 2|o h h 3| h| h o h| h o h| h o | |_________|_________|_________| |o h h 4|O H H 5|o h h 6| h| h o H| H O h| h o | |_________|_________|_________| |o h h 7|o h h 8|o h h 9| h| h o h| h o h| h o | |_________|_________|_________| Only requirement for this approach to work is that the system must be in the original unitcell with parts in its immediate periodic images. .. seealso:: :func:`.wrapAtoms` can be used for wrapping atoms into the single periodic image of the system.""" def __init__(self, coords, **kwargs): """ :arg coords: coordinate array with shape ``(N, 3)``, where N is number of atoms :type coords: :class:`numpy.ndarray`, :class:`.Atomic`, :class:`.Frame` :arg unitcell: orthorhombic unitcell dimension array with shape ``(3,)`` :type unitcell: :class:`numpy.ndarray` :arg bucketsize: number of points per tree node, default is 10 :type bucketsize: int""" unitcell = kwargs.get('unitcell') if not isinstance(coords, ndarray): if unitcell is None: try: unitcell = coords.getUnitcell() except AttributeError: pass else: if unitcell is not None: LOGGER.info('Unitcell information from {0} will be ' 'used.'.format(str(coords))) try: # using getCoords() because coords will be stored internally # and reused when needed, this will avoid unexpected results # due to changes made to coordinates externally coords = coords.getCoords() except AttributeError: raise TypeError('coords must be a Numpy array or must have ' 'getCoords attribute') else: coords = coords.copy() if coords.ndim != 2: raise Exception('coords.ndim must be 2') if coords.shape[-1] != 3: raise Exception('coords.shape must be (N,3)') if coords.min() <= -1e6 or coords.max() >= 1e6: raise Exception('coords must be between -1e6 and 1e6') self._bucketsize = kwargs.get('bucketsize', 10) if not isinstance(self._bucketsize, int): raise TypeError('bucketsize must be an integer') if self._bucketsize < 1: raise ValueError('bucketsize must be a positive integer') self._coords = None self._unitcell = None self._neighbors = None if unitcell is None: self._kdtree = CKDTree(3, self._bucketsize) self._kdtree.set_data(coords) else: if not isinstance(unitcell, ndarray): raise TypeError('unitcell must be a Numpy array') if unitcell.shape != (3, ): raise ValueError('unitcell.shape must be (3,)') self._kdtree = CKDTree(3, self._bucketsize) self._kdtree.set_data(coords) self._coords = coords self._unitcell = unitcell self._replicate = REPLICATE * unitcell self._kdtree2 = None self._pbcdict = {} self._pbckeys = [] self._n_atoms = coords.shape[0] self._none = kwargs.pop('none', lambda: None) try: self._none() except TypeError: raise TypeError('none argument must be callable') self._oncall = kwargs.pop('oncall', 'both') assert self._oncall in ('both', 'dist'), 'oncall must be both or dist' def __call__(self, radius, center=None): """Shorthand method for searching and retrieving results.""" self.search(radius, center) if self._oncall == 'both': return self.getIndices(), self.getDistances() elif self._oncall == 'dist': return self.getDistances() def search(self, radius, center=None): """Search pairs within *radius* of each other or points within *radius* of *center*. :arg radius: distance (Å) :type radius: float :arg center: a point in Cartesian coordinate system :type center: :class:`numpy.ndarray`""" if not isinstance(radius, (float, int)): raise TypeError('radius must be a number') if radius <= 0: raise TypeError('radius must be a positive number') if center is not None: if not isinstance(center, ndarray): raise TypeError('center must be a Numpy array instance') if center.shape != (3, ): raise ValueError('center.shape must be (3,)') if self._unitcell is None: self._kdtree.search_center_radius(center, radius) self._neighbors = None else: kdtree = self._kdtree search = kdtree.search_center_radius get_radii = lambda: get_KDTree_radii(kdtree) get_indices = lambda: get_KDTree_indices(kdtree) get_count = kdtree.get_count _dict = {} _dict_get = _dict.get _dict_set = _dict.__setitem__ for center in center + self._replicate: search(center, radius) if get_count(): [ _dict_set(i, min(r, _dict_get(i, 1e6))) for i, r in zip(get_indices(), get_radii()) ] self._pbcdict = _dict self._pdbkeys = list(_dict) else: if self._unitcell is None: self._neighbors = self._kdtree.neighbor_search(radius) else: kdtree = self._kdtree2 if kdtree is None: coords = self._coords coords = concatenate( [coords + rep for rep in self._replicate]) kdtree = CKDTree(3, self._bucketsize) kdtree.set_data(coords) self._kdtree2 = kdtree n_atoms = len(self._coords) _dict = {} neighbors = kdtree.neighbor_search(radius) if kdtree.neighbor_get_count(): _get = _dict.get _set = _dict.__setitem__ for nb in neighbors: i = nb.index1 % n_atoms j = nb.index2 % n_atoms if i < j: _set((i, j), min(nb.radius, _get((i, j), 1e6))) elif j < i: _set((j, i), min(nb.radius, _get((j, i), 1e6))) self._pbcdict = _dict self._pdbkeys = list(_dict) def getIndices(self): """Returns array of indices for points or pairs, depending on the type of the most recent search.""" if self.getCount(): if self._unitcell is None: if self._neighbors is None: return get_KDTree_indices(self._kdtree) else: return array([(n.index1, n.index2) for n in self._neighbors], int) else: return array(self._pdbkeys) return self._none() def getDistances(self): """Returns array of distances.""" if self.getCount(): if self._unitcell is None: if self._neighbors is None: return get_KDTree_radii(self._kdtree) else: return array([n.radius for n in self._neighbors]) else: _dict = self._pbcdict return array([_dict[i] for i in self._pdbkeys]) return self._none() def getCount(self): """Returns number of points or pairs.""" if self._unitcell is None: if self._neighbors is None: return self._kdtree.get_count() else: return self._kdtree.neighbor_get_count() else: return len(self._pbcdict)
def search(self, radius, center=None): """Search pairs within *radius* of each other or points within *radius* of *center*. :arg radius: distance (Å) :type radius: float :arg center: a point in Cartesian coordinate system :type center: :class:`numpy.ndarray`""" if not isinstance(radius, (float, int)): raise TypeError('radius must be a number') if radius <= 0: raise TypeError('radius must be a positive number') if center is not None: if not isinstance(center, ndarray): raise TypeError('center must be a Numpy array instance') if center.shape != (3, ): raise ValueError('center.shape must be (3,)') if self._unitcell is None: self._kdtree.search_center_radius(center, radius) self._neighbors = None else: kdtree = self._kdtree search = kdtree.search_center_radius get_radii = lambda: get_KDTree_radii(kdtree) get_indices = lambda: get_KDTree_indices(kdtree) get_count = kdtree.get_count _dict = {} _dict_get = _dict.get _dict_set = _dict.__setitem__ for center in center + self._replicate: search(center, radius) if get_count(): [ _dict_set(i, min(r, _dict_get(i, 1e6))) for i, r in zip(get_indices(), get_radii()) ] self._pbcdict = _dict self._pdbkeys = list(_dict) else: if self._unitcell is None: self._neighbors = self._kdtree.neighbor_search(radius) else: kdtree = self._kdtree2 if kdtree is None: coords = self._coords coords = concatenate( [coords + rep for rep in self._replicate]) kdtree = CKDTree(3, self._bucketsize) kdtree.set_data(coords) self._kdtree2 = kdtree n_atoms = len(self._coords) _dict = {} neighbors = kdtree.neighbor_search(radius) if kdtree.neighbor_get_count(): _get = _dict.get _set = _dict.__setitem__ for nb in neighbors: i = nb.index1 % n_atoms j = nb.index2 % n_atoms if i < j: _set((i, j), min(nb.radius, _get((i, j), 1e6))) elif j < i: _set((j, i), min(nb.radius, _get((j, i), 1e6))) self._pbcdict = _dict self._pdbkeys = list(_dict)