def __init__(self, atom_list, bucket_size=10): """Create the object. Arguments: - atom_list - list of atoms. This list is used in the queries. It can contain atoms from different structures. - bucket_size - bucket size of KD tree. You can play around with this to optimize speed if you feel like it. """ from Bio.PDB.kdtrees import KDTree self.atom_list = atom_list # get the coordinates coord_list = [a.get_coord() for a in atom_list] # to Nx3 array of type float self.coords = numpy.array(coord_list, dtype="d") assert bucket_size > 1 assert self.coords.shape[1] == 3 self.kdt = KDTree(self.coords, bucket_size)
def compute(self, entity, level="A"): """Calculate surface accessibility surface area for an entity. The resulting atomic surface accessibility values are attached to the .sasa attribute of each entity (or atom), depending on the level. For example, if level="R", all residues will have a .sasa attribute. Atoms will always be assigned a .sasa attribute with their individual values. :param entity: input entity. :type entity: Bio.PDB.Entity, e.g. Residue, Chain, ... :param level: the level at which ASA values are assigned, which can be one of "A" (Atom), "R" (Residue), "C" (Chain), "M" (Model), or "S" (Structure). The ASA value of an entity is the sum of all ASA values of its children. Defaults to "A". :type entity: Bio.PDB.Entity >>> from Bio.PDB import PDBParser >>> from Bio.PDB.SASA import ShrakeRupley >>> p = PDBParser(QUIET=1) >>> # This assumes you have a local copy of 1LCD.pdb in a directory called "PDB" >>> struct = p.get_structure("1LCD", "PDB/1LCD.pdb") >>> sr = ShrakeRupley() >>> sr.compute(struct, level="S") >>> print(round(struct.sasa, 2)) 7053.43 >>> print(round(struct[0]["A"][11]["OE1"].sasa, 2)) 9.64 """ is_valid = hasattr(entity, "level") and entity.level in {"R", "C", "M", "S"} if not is_valid: raise ValueError(f"Invalid entity type '{type(entity)}'. " "Must be Residue, Chain, Model, or Structure") if level not in _ENTITY_HIERARCHY: raise ValueError( f"Invalid level '{level}'. Must be A, R, C, M, or S.") elif _ENTITY_HIERARCHY[level] > _ENTITY_HIERARCHY[entity.level]: raise ValueError( f"Level '{level}' must be equal or smaller than input entity: {entity.level}" ) # Get atoms onto list for lookup atoms = list(entity.get_atoms()) n_atoms = len(atoms) if not n_atoms: raise ValueError("Entity has no child atoms.") # Get coordinates as a numpy array # We trust DisorderedAtom and friends to pick representatives. coords = np.array([a.coord for a in atoms], dtype=np.float64) # Pre-compute atom neighbors using KDTree kdt = KDTree(coords, 10) # Pre-compute radius * probe table radii_dict = self.radii_dict radii = np.array([radii_dict[a.element] for a in atoms], dtype=np.float64) radii += self.probe_radius twice_maxradii = np.max(radii) * 2 # Calculate ASAs asa_array = np.zeros((n_atoms, 1), dtype=np.int) ptset = set(range(self.n_points)) for i in range(n_atoms): r_i = radii[i] # Move sphere to atom s_on_i = (np.array(self._sphere, copy=True) * r_i) + coords[i] available_set = ptset.copy() # KDtree for sphere points kdt_sphere = KDTree(s_on_i, 10) # Iterate over neighbors of atom i for jj in kdt.search(coords[i], twice_maxradii): j = jj.index if i == j: continue if jj.radius < (r_i + radii[j]): # Remove overlapping points on sphere from available set available_set -= { pt.index for pt in kdt_sphere.search(coords[j], radii[j]) } asa_array[i] = len(available_set) # update counts # Convert accessible point count to surface area in A**2 f = radii * radii * (4 * np.pi / self.n_points) asa_array = asa_array * f[:, np.newaxis] # Set atom .sasa for i, atom in enumerate(atoms): atom.sasa = asa_array[i, 0] # Aggregate values per entity level if necessary if level != "A": entities = set(atoms) target = _ENTITY_HIERARCHY[level] for _ in range(target): entities = {e.parent for e in entities} atomdict = {a.full_id: idx for idx, a in enumerate(atoms)} for e in entities: e_atoms = [atomdict[a.full_id] for a in e.get_atoms()] e.sasa = asa_array[e_atoms].sum()