def test_all_search(nr_points, dim, bucket_size, query_radius): """Test fixed neighbor search. Search all point pairs that are within radius. Arguments: - nr_points: number of points used in test - dim: dimension of coords - bucket_size: nr of points per tree node - query_radius: radius of search Returns true if the test passes. """ kdt = KDTree(dim, bucket_size) coords = random.random((nr_points, dim)) kdt.set_coords(coords) kdt.all_search(query_radius) indices = kdt.all_get_indices() if indices is None: l1 = 0 else: l1 = len(indices) radii = kdt.all_get_radii() if radii is None: l2 = 0 else: l2 = len(radii) if l1 == l2: return True else: return False
def _apply_KDTree(self, group): """Selection using KDTree but periodic = True not supported. """ sel = self.sel.apply(group) ref = sel.center_of_geometry() kdtree = KDTree(dim=3, bucket_size=10) kdtree.set_coords(group.positions) kdtree.search(ref, self.exRadius) found_ExtIndices = kdtree.get_indices() kdtree.search(ref, self.inRadius) found_IntIndices = kdtree.get_indices() found_indices = list(set(found_ExtIndices) - set(found_IntIndices)) return unique(group[found_indices])
def __init__(self, atom_group, bucket_size=10): """ Parameters ---------- atom_list : AtomGroup list of atoms bucket_size : int Number of entries in leafs of the KDTree. If you suffer poor performance you can play around with this number. Increasing the `bucket_size` will speed up the construction of the KDTree but slow down the search. """ self.atom_group = atom_group self.kdtree = KDTree(dim=3, bucket_size=bucket_size) self.kdtree.set_coords(atom_group.positions)
def __init__(self, atom_group, box=None, bucket_size=10): """ Parameters ---------- atom_list : AtomGroup list of atoms box : array-like or ``None``, optional, default ``None`` Simulation cell dimensions in the form of :attr:`MDAnalysis.trajectory.base.Timestep.dimensions` when periodic boundary conditions should be taken into account for the calculation of contacts. bucket_size : int Number of entries in leafs of the KDTree. If you suffer poor performance you can play around with this number. Increasing the `bucket_size` will speed up the construction of the KDTree but slow down the search. """ self.atom_group = atom_group self._u = atom_group.universe self._box = box if box is None: self.kdtree = KDTree(dim=3, bucket_size=bucket_size) else: self.kdtree = PeriodicKDTree(box, bucket_size=bucket_size) self.kdtree.set_coords(atom_group.positions)
def __init__(self, atom_list, bucket_size=10): """ o atom_list - list of atoms. This list is used in the queries. It can contain atoms from different structures. o bucket_size - bucket size of KD tree. You can play around with this to optimize speed if you feel like it. """ self.atom_list = atom_list # get the coordinates coord_list = [a.get_coord() for a in atom_list] # to Nx3 array of type float self.coords = numpy.array(coord_list).astype("f") assert (bucket_size > 1) assert (self.coords.shape[1] == 3) self.kdt = KDTree(3, bucket_size) self.kdt.set_coords(self.coords)
def __init__(self, atom_group, bucket_size=10): """ :Arguments: *atom_list* list of atoms (:class: `~MDAnalysis.core.AtomGroup.AtomGroup`) *bucket_size* Number of entries in leafs of the KDTree. If you suffer poor performance you can play around with this number. Increasing the `bucket_size` will speed up the construction of the KDTree but slow down the search. """ self.atom_group = atom_group if not hasattr(atom_group, 'coordinates'): raise TypeError('atom_group must have a coordinates() method' '(eq a AtomGroup from a selection)') self.kdtree = KDTree(dim=3, bucket_size=bucket_size) self.kdtree.set_coords(atom_group.coordinates())
def _apply_KDTree(self, group): kdtree = KDTree(dim=3, bucket_size=10) kdtree.set_coords(group.positions) kdtree.search(self.ref, self.cutoff) found_indices = kdtree.get_indices() return unique(group[found_indices])
def test_search(nr_points, dim, bucket_size, radius): """Test search all points within radius of center. Search all point pairs that are within radius. Arguments: - nr_points: number of points used in test - dim: dimension of coords - bucket_size: nr of points per tree node - radius: radius of search Returns true if the test passes. """ kdt = KDTree(dim, bucket_size) coords = random.random((nr_points, dim)) kdt.set_coords(coords) kdt.search(coords[0], radius * 100) radii = kdt.get_radii() l1 = 0 for i in range(0, nr_points): p = coords[i] if _dist(p, coords[0]) <= radius * 100: l1 = l1 + 1 if l1 == len(radii): return True else: return False
def _apply_KDTree(self, group): box = group.dimensions if self.periodic else None if box is None: kdtree = KDTree(dim=3, bucket_size=10) else: kdtree = PeriodicKDTree(box, bucket_size=10) kdtree.set_coords(group.positions) kdtree.search(self.ref, self.cutoff) found_indices = kdtree.get_indices() return group[found_indices].unique
def test_KDTree_exceptions(self): kdt = KDTree(dim, bucket_size) with self.assertRaises(Exception) as context: kdt.set_coords(random.random((nr_points, dim)) * 100000000000000) self.assertTrue( "Points should lie between -1e6 and 1e6" in str(context.exception)) with self.assertRaises(Exception) as context: kdt.set_coords(random.random((nr_points, dim - 2))) self.assertTrue("Expected a Nx%i NumPy array" % dim in str(context.exception)) with self.assertRaises(Exception) as context: kdt.search(array([0, 0, 0]), radius) self.assertTrue("No point set specified" in str(context.exception))
def _apply_KDTree(self, group): """KDTree based selection is about 7x faster than distmat for typical problems. Limitations: always ignores periodicity """ # group is wrong, should be universe (?!) sel_atoms = self.sel._apply(group) # list needed for back-indexing sys_atoms_list = [a for a in (self._group_atoms - sel_atoms)] sel_indices = numpy.array([a.index for a in sel_atoms], dtype=int) sys_indices = numpy.array([a.index for a in sys_atoms_list], dtype=int) sel_coor = Selection.coord[sel_indices] kdtree = KDTree(dim=3, bucket_size=10) kdtree.set_coords(Selection.coord[sys_indices]) found_indices = [] for atom in numpy.array(sel_coor): kdtree.search(atom, self.cutoff) found_indices.append(kdtree.get_indices()) # the list-comprehension here can be understood as a nested loop. # for list in found_indices: # for i in list: # yield sys_atoms_list[i] # converting found_indices to a numpy array won't reallt work since # each we will find a different number of neighbors for each center in # sel_coor. res_atoms = [sys_atoms_list[i] for list in found_indices for i in list] return set(res_atoms)
def test_KDTree_exceptions(self): kdt = KDTree(dim, bucket_size) with self.assertRaises(Exception) as context: kdt.set_coords(random.random((nr_points, dim)) * 100000000000000) self.assertTrue("Points should lie between -1e6 and 1e6" in str(context.exception)) with self.assertRaises(Exception) as context: kdt.set_coords(random.random((nr_points, dim - 2))) self.assertTrue("Expected a Nx%i NumPy array" % dim in str(context.exception)) with self.assertRaises(Exception) as context: kdt.search(array([0, 0, 0]), radius) self.assertTrue("No point set specified" in str(context.exception))
def _apply_KDTree(self, group): """Selection using KDTree and PeriodicKDTree for aperiodic and fully-periodic systems, respectively. """ sel = self.sel.apply(group) box = self.validate_dimensions(group.dimensions) ref = sel.center_of_geometry(pbc=self.periodic) if box is None: kdtree = KDTree(dim=3, bucket_size=10) else: kdtree = PeriodicKDTree(box, bucket_size=10) kdtree.set_coords(group.positions) kdtree.search(ref, self.cutoff) found_indices = kdtree.get_indices() return group[found_indices].unique
def _apply_KDTree(self, group): """Selection using KDTree but periodic = True not supported. (KDTree routine is ca 15% slower than the distance matrix one) """ sel = self.sel.apply(group) ref = sel.center_of_geometry() kdtree = KDTree(dim=3, bucket_size=10) kdtree.set_coords(group.positions) kdtree.search(ref, self.cutoff) found_indices = kdtree.get_indices() return unique(group[found_indices])
def _apply_KDTree(self, group): """Selection using KDTree but periodic = True not supported. """ sys_indices = numpy.array([a.index for a in self._group_atoms_list]) sys_coor = Selection.coord[sys_indices] # group is wrong, should be universe (?!) sel_atoms = self.sel._apply(group) sel_CoG = AtomGroup(sel_atoms).center_of_geometry() self.ref = numpy.array((sel_CoG[0], sel_CoG[1], sel_CoG[2])) if self.periodic: pass # or warn? -- no periodic functionality with KDTree search kdtree = KDTree(dim=3, bucket_size=10) kdtree.set_coords(sys_coor) kdtree.search(self.ref, self.exRadius) found_ExtIndices = kdtree.get_indices() kdtree.search(self.ref, self.inRadius) found_IntIndices = kdtree.get_indices() found_indices = list(set(found_ExtIndices) - set(found_IntIndices)) res_atoms = [self._group_atoms_list[i] for i in found_indices] return set(res_atoms)
def __init__(self, atom_list, bucket_size=10): """ o atom_list - list of atoms. This list is used in the queries. It can contain atoms from different structures. o bucket_size - bucket size of KD tree. You can play around with this to optimize speed if you feel like it. """ self.atom_list=atom_list # get the coordinates coord_list = [a.get_coord() for a in atom_list] # to Nx3 array of type float self.coords=numpy.array(coord_list).astype("f") assert(bucket_size>1) assert(self.coords.shape[1]==3) self.kdt=KDTree(3, bucket_size) self.kdt.set_coords(self.coords)
def _apply_KDTree(self, group): """KDTree based selection is about 7x faster than distmat for typical problems. Limitations: always ignores periodicity """ sel = self.sel.apply(group) # All atoms in group that aren't in sel sys = group[~np.in1d(group.indices, sel.indices)] kdtree = KDTree(dim=3, bucket_size=10) kdtree.set_coords(sys.positions) found_indices = [] for atom in sel.positions: kdtree.search(atom, self.cutoff) found_indices.append(kdtree.get_indices()) # These are the indices from SYS that were seen when # probing with SEL unique_idx = np.unique(np.concatenate(found_indices)) return unique(sys[unique_idx.astype(np.int32)])
def _apply_KDTree(self, group): """Selection using KDTree but periodic = True not supported. (KDTree routine is ca 15% slower than the distance matrix one) """ sys_indices = numpy.array([a.index for a in self._group_atoms_list]) sys_coor = Selection.coord[sys_indices] sel_atoms = self.sel._apply(group) # group is wrong, should be universe (?!) sel_CoG = AtomGroup(sel_atoms).center_of_geometry() self.ref = numpy.array((sel_CoG[0], sel_CoG[1], sel_CoG[2])) if self.periodic: pass # or warn? -- no periodic functionality with KDTree search kdtree = KDTree(dim=3, bucket_size=10) kdtree.set_coords(sys_coor) kdtree.search(self.ref, self.cutoff) found_indices = kdtree.get_indices() res_atoms = [self._group_atoms_list[i] for i in found_indices] return set(res_atoms)
def _apply_KDTree(self, group): """KDTree based selection is about 7x faster than distmat for typical problems. Limitations: always ignores periodicity """ sel = self.sel.apply(group) # All atoms in group that aren't in sel sys = group[~np.in1d(group.indices, sel.indices)] kdtree = KDTree(dim=3, bucket_size=10) kdtree.set_coords(sys.positions) found_indices = [] for atom in sel.positions: kdtree.search(atom, self.cutoff) found_indices.append(kdtree.get_indices()) # These are the indices from SYS that were seen when # probing with SEL unique_idx = np.unique(np.concatenate(found_indices)) return sys[unique_idx.astype(np.int32)].unique
class AtomNeighborSearch(): """This class can be used to find all atoms/residues/segements within the radius of a given query position. This class is using the BioPython KDTree for the neighborsearch. This class also does not apply PBC to the distance calculattions. So you have to ensure yourself that the trajectory has been corrected for PBC artifacts. """ def __init__(self, atom_group, bucket_size=10): """ :Arguments: *atom_list* list of atoms (:class: `~MDAnalysis.core.AtomGroup.AtomGroup`) *bucket_size* Number of entries in leafs of the KDTree. If you suffer poor performance you can play around with this number. Increasing the `bucket_size` will speed up the construction of the KDTree but slow down the search. """ self.atom_group = atom_group if not hasattr(atom_group, 'coordinates'): raise TypeError('atom_group must have a coordinates() method' '(eq a AtomGroup from a selection)') self.kdtree = KDTree(dim=3, bucket_size=bucket_size) self.kdtree.set_coords(atom_group.coordinates()) def search(self, atoms, radius, level='A'): """ Return all atoms/residues/segments that are within *radius* of the atoms in *atoms*. :Arguments: *atoms* list of atoms (:class: `~MDAnalysis.core.AtomGroup.AtomGroup`) *radius* float. Radius for search in Angstrom. *level* (optional) char (A, R, S). Return atoms(A), residues(R) or segments(S) within *radius* of *atoms*. """ indices = [] for atom in atoms.coordinates(): self.kdtree.search(atom, radius) indices.append(self.kdtree.get_indices()) unique_idx = numpy.unique([i for l in indices for i in l]) return self._index2level(unique_idx, level) def _index2level(self, indices, level): """ Convert list of atom_indices in a AtomGroup to either the Atoms or segments/residues containing these atoms. :Arguments: *indices* list of atom indices *level* char (A, R, S). Return atoms(A), residues(R) or segments(S) within *radius* of *atoms*. """ n_atom_list = [self.atom_group[i] for i in indices] if level == 'A': if len(n_atom_list) == 0: return [] else: return AtomGroup(n_atom_list) elif level == 'R': return list(set([a.residue for a in n_atom_list])) elif level == 'S': return list(set([a.segment for a in n_atom_list])) else: raise NotImplementedError('{}: level not implemented'.format(level))
class AtomNeighborSearch(object): """This class can be used to find all atoms/residues/segements within the radius of a given query position. This class is using the BioPython KDTree for the neighborsearch. This class also does not apply PBC to the distance calculattions. So you have to ensure yourself that the trajectory has been corrected for PBC artifacts. """ def __init__(self, atom_group, bucket_size=10): """ :Arguments: *atom_list* list of atoms (:class: `~MDAnalysis.core.AtomGroup.AtomGroup`) *bucket_size* Number of entries in leafs of the KDTree. If you suffer poor performance you can play around with this number. Increasing the `bucket_size` will speed up the construction of the KDTree but slow down the search. """ self.atom_group = atom_group if not hasattr(atom_group, 'coordinates'): raise TypeError('atom_group must have a coordinates() method' '(eq a AtomGroup from a selection)') self.kdtree = KDTree(dim=3, bucket_size=bucket_size) self.kdtree.set_coords(atom_group.coordinates()) def search(self, atoms, radius, level='A'): """ Return all atoms/residues/segments that are within *radius* of the atoms in *atoms*. :Arguments: *atoms* list of atoms (:class: `~MDAnalysis.core.AtomGroup.AtomGroup`) *radius* float. Radius for search in Angstrom. *level* (optional) char (A, R, S). Return atoms(A), residues(R) or segments(S) within *radius* of *atoms*. """ indices = [] for atom in atoms.coordinates(): self.kdtree.search(atom, radius) indices.append(self.kdtree.get_indices()) unique_idx = np.unique([i for l in indices for i in l]) return self._index2level(unique_idx, level) def _index2level(self, indices, level): """ Convert list of atom_indices in a AtomGroup to either the Atoms or segments/residues containing these atoms. :Arguments: *indices* list of atom indices *level* char (A, R, S). Return atoms(A), residues(R) or segments(S) within *radius* of *atoms*. """ n_atom_list = [self.atom_group[i] for i in indices] if level == 'A': if len(n_atom_list) == 0: return [] else: return AtomGroup(n_atom_list) elif level == 'R': return list(set([a.residue for a in n_atom_list])) elif level == 'S': return list(set([a.segment for a in n_atom_list])) else: raise NotImplementedError( '{0}: level not implemented'.format(level))
class NeighborSearch(object): """Class for neighbor searching, This class can be used for two related purposes: 1. To find all atoms/residues/chains/models/structures within radius of a given query position. 2. To find all atoms/residues/chains/models/structures that are within a fixed radius of each other. NeighborSearch makes use of the Bio.KDTree C++ module, so it's fast. """ def __init__(self, atom_list, bucket_size=10): """Create the object. Arguments: - atom_list - list of atoms. This list is used in the queries. It can contain atoms from different structures. - bucket_size - bucket size of KD tree. You can play around with this to optimize speed if you feel like it. """ self.atom_list = atom_list # get the coordinates coord_list = [a.get_coord() for a in atom_list] # to Nx3 array of type float self.coords = numpy.array(coord_list).astype("f") assert (bucket_size > 1) assert (self.coords.shape[1] == 3) self.kdt = KDTree(3, bucket_size) self.kdt.set_coords(self.coords) # Private def _get_unique_parent_pairs(self, pair_list): # translate a list of (entity, entity) tuples to # a list of (parent entity, parent entity) tuples, # thereby removing duplicate (parent entity, parent entity) # pairs. # o pair_list - a list of (entity, entity) tuples parent_pair_list = [] for (e1, e2) in pair_list: p1 = e1.get_parent() p2 = e2.get_parent() if p1 == p2: continue elif p1 < p2: parent_pair_list.append((p1, p2)) else: parent_pair_list.append((p2, p1)) return uniqueify(parent_pair_list) # Public def search(self, center, radius, level="A"): """Neighbor search. Return all atoms/residues/chains/models/structures that have at least one atom within radius of center. What entity level is returned (e.g. atoms or residues) is determined by level (A=atoms, R=residues, C=chains, M=models, S=structures). Arguments: - center - Numeric array - radius - float - level - char (A, R, C, M, S) """ if level not in entity_levels: raise PDBException("%s: Unknown level" % level) self.kdt.search(center, radius) indices = self.kdt.get_indices() n_atom_list = [] atom_list = self.atom_list for i in indices: a = atom_list[i] n_atom_list.append(a) if level == "A": return n_atom_list else: return unfold_entities(n_atom_list, level) def search_all(self, radius, level="A"): """All neighbor search. Search all entities that have atoms pairs within radius. Arguments: - radius - float - level - char (A, R, C, M, S) """ if level not in entity_levels: raise PDBException("%s: Unknown level" % level) self.kdt.all_search(radius) indices = self.kdt.all_get_indices() atom_list = self.atom_list atom_pair_list = [] for i1, i2 in indices: a1 = atom_list[i1] a2 = atom_list[i2] atom_pair_list.append((a1, a2)) if level == "A": # return atoms return atom_pair_list next_level_pair_list = atom_pair_list for l in ["R", "C", "M", "S"]: next_level_pair_list = self._get_unique_parent_pairs( next_level_pair_list) if level == l: return next_level_pair_list
class AtomNeighborSearch(object): """This class can be used to find all atoms/residues/segements within the radius of a given query position. This class is using the BioPython KDTree for the neighborsearch. This class also does not apply PBC to the distance calculattions. So you have to ensure yourself that the trajectory has been corrected for PBC artifacts. """ def __init__(self, atom_group, bucket_size=10): """ Parameters ---------- atom_list : AtomGroup list of atoms bucket_size : int Number of entries in leafs of the KDTree. If you suffer poor performance you can play around with this number. Increasing the `bucket_size` will speed up the construction of the KDTree but slow down the search. """ self.atom_group = atom_group self.kdtree = KDTree(dim=3, bucket_size=bucket_size) self.kdtree.set_coords(atom_group.positions) def search(self, atoms, radius, level='A'): """ Return all atoms/residues/segments that are within *radius* of the atoms in *atoms*. Parameters ---------- atoms : AtomGroup list of atoms radius : float Radius for search in Angstrom. level : str char (A, R, S). Return atoms(A), residues(R) or segments(S) within *radius* of *atoms*. """ indices = [] for atom in atoms.coordinates(): self.kdtree.search(atom, radius) indices.append(self.kdtree.get_indices()) unique_idx = np.unique([i for l in indices for i in l]) return self._index2level(unique_idx, level) def _index2level(self, indices, level): """Convert list of atom_indices in a AtomGroup to either the Atoms or segments/residues containing these atoms. Parameters ---------- indices list of atom indices level : str char (A, R, S). Return atoms(A), residues(R) or segments(S) within *radius* of *atoms*. """ n_atom_list = [self.atom_group[i] for i in indices] if level == 'A': if len(n_atom_list) == 0: return [] else: return AtomGroup(n_atom_list) elif level == 'R': return list({a.residue for a in n_atom_list}) elif level == 'S': return list(set([a.segment for a in n_atom_list])) else: raise NotImplementedError('{0}: level not implemented'.format(level))
def anal_jointdist(prot, rfirst, mols, cutoff, midz, box, mat, matburr, buried=None): """ Calculates the joint probability of residue interactions Parameters ---------- prot : AtomSelection the protein atoms rfirst : NumpyArray the first atom of each residue in the protein mols : NumpyArray the centroid of the molecule of interest cutoff : float the contact cut-off midz : float the middle of the bilayer box : NumpyArray the box sides mat : NumpyArray the joint probability for non-buried molecules matburr : NumpyArray the joint probability for buried molecules buried : NumpyArray of boolean, optional flag to indicate buried molecule """ imat = np.zeros(mat.shape) imatburr = np.zeros(matburr.shape) # Calculate all distances at once if box is not None: dist_all = distances.distance_array(np.array(mols), prot.get_positions(), box) else: kdtree = KDTree(dim=3, bucket_size=10) kdtree.set_coords(prot.get_positions()) # Loop over all molecules for mi, mol in enumerate(mols): if box is not None: dist = dist_all[mi, :] else: dist = np.ones(prot.get_positions().shape[0]) + cutoff kdtree.search(np.array([mol]), cutoff) for i in kdtree.get_indices(): dist[i] = 0.0 # Check if this molecule is on if dist.min() >= cutoff: continue # Check contacts for reach residue for i in range(len(rfirst) - 1): if dist[rfirst[i]:rfirst[i + 1]].min() >= cutoff: continue if (buried is not None and buried[mi]): imatburr[i, i] = 1 else: imat[i, i] = 1 for j in range(i + 1, len(rfirst) - 1): if dist[rfirst[j]:rfirst[j + 1]].min() >= cutoff: continue if (buried is not None and buried[mi]): imatburr[i, j] = 1 imatburr[j, i] = 1 else: imat[i, j] = 1 imat[j, i] = 1 return (mat + imat, matburr + imatburr)
def anal_contacts(prot, rfirst, mols, cutoff, midz, box, molfile, resfile, burresfile=None, buried=None, reslist=None, reslistfile=None): """ Calculates a range of contacts and write out contact vectors to files Parameters ---------- prot : AtomSelection the protein atoms rfirst : NumpyArray the first atom of each residue in the protein mols : NumpyArray the centroid of the molecule of interest cutoff : float the contact cut-off midz : float the middle of the bilayer box : NumpyArray the box sides molfile : fileobject the file to write molecular contacts to resfile : fileobject the file to write residue contacts to burresfile : fileobject, optional the file to write buried residue contacts to buried : NumpyArray of boolean, optional flag to indicate buried molecule reslist : list a list of residues to write out individual mol contacts reslistfile : fileobject the file to write out individual mol contacts to """ molon = np.zeros(len(mols), dtype=bool) reson = np.zeros(len(rfirst) - 1, dtype=bool) if buried is not None: burreson = np.zeros(len(rfirst) - 1, dtype=bool) if reslist is not None: resonlist = np.zeros([len(reslist), len(mols)], dtype=bool) # Calculate all distances at once if box is not None: dist_all = distances.distance_array(np.array(mols), prot.get_positions(), box) else: kdtree = KDTree(dim=3, bucket_size=10) kdtree.set_coords(prot.get_positions()) # Loop over all molecules for mi, mol in enumerate(mols): if box is not None: dist = dist_all[mi, :] else: dist = np.ones(prot.get_positions().shape[0]) + cutoff kdtree.search(np.array([mol]), cutoff) for i in kdtree.get_indices(): dist[i] = 0.0 # Check if this molecule is on molon[mi] = dist.min() < cutoff if molon[mi]: # Check contacts for reach residue for i in range(len(rfirst) - 1): if dist[rfirst[i]:rfirst[i + 1]].min() < cutoff: if (burresfile is not None and buried[mi]): burreson[i] = True else: reson[i] = True if reslist is not None and i in reslist: resonlist[reslist.index(i), mi] = True # Write state information to file write_booleans(molfile, molon) write_booleans(resfile, reson) if buried is not None: write_booleans(burresfile, burreson) if reslist is not None: write_booleans(reslistfile, resonlist.reshape(len(reslist) * len(mols)))
def _apply_KDTree(self, group): """KDTree based selection is about 7x faster than distmat for typical problems. """ sel = self.sel.apply(group) # All atoms in group that aren't in sel sys = group[~np.in1d(group.indices, sel.indices)] box = self.validate_dimensions(group.dimensions) if box is None: kdtree = KDTree(dim=3, bucket_size=10) kdtree.set_coords(sys.positions) found_indices = [] for atom in sel.positions: kdtree.search(atom, self.cutoff) found_indices.append(kdtree.get_indices()) unique_idx = np.unique(np.concatenate(found_indices)) else: kdtree = PeriodicKDTree(box, bucket_size=10) kdtree.set_coords(sys.positions) kdtree.search(sel.positions, self.cutoff) unique_idx = np.asarray(kdtree.get_indices()) # These are the indices from SYS that were seen when # probing with SEL return sys[unique_idx.astype(np.int32)].unique
def anal_contacts(prot,rfirst,mols,cutoff,midz,box,molfile,resfile, burresfile=None,buried=None,reslist=None,reslistfile=None) : """ Calculates a range of contacts and write out contact vectors to files Parameters ---------- prot : AtomSelection the protein atoms rfirst : NumpyArray the first atom of each residue in the protein mols : NumpyArray the centroid of the molecule of interest cutoff : float the contact cut-off midz : float the middle of the bilayer box : NumpyArray the box sides molfile : fileobject the file to write molecular contacts to resfile : fileobject the file to write residue contacts to burresfile : fileobject, optional the file to write buried residue contacts to buried : NumpyArray of boolean, optional flag to indicate buried molecule reslist : list a list of residues to write out individual mol contacts reslistfile : fileobject the file to write out individual mol contacts to """ molon = np.zeros(len(mols),dtype=bool) reson = np.zeros(len(rfirst)-1,dtype=bool) if buried is not None : burreson = np.zeros(len(rfirst)-1,dtype=bool) if reslist is not None : resonlist = np.zeros([len(reslist),len(mols)],dtype=bool) # Calculate all distances at once if box is not None : dist_all = distances.distance_array(np.array(mols), prot.get_positions(), box) else : kdtree = KDTree(dim=3, bucket_size=10) kdtree.set_coords(prot.get_positions()) # Loop over all molecules for mi,mol in enumerate(mols) : if box is not None : dist = dist_all[mi,:] else : dist = np.ones(prot.get_positions().shape[0])+cutoff kdtree.search(np.array([mol]), cutoff) for i in kdtree.get_indices() : dist[i] = 0.0 # Check if this molecule is on molon[mi] = dist.min() < cutoff if molon[mi] : # Check contacts for reach residue for i in range(len(rfirst)-1) : if dist[rfirst[i]:rfirst[i+1]].min() < cutoff : if (burresfile is not None and buried[mi]) : burreson[i] = True else : reson[i] = True if reslist is not None and i in reslist: resonlist[reslist.index(i),mi] = True # Write state information to file write_booleans(molfile,molon) write_booleans(resfile,reson) if buried is not None : write_booleans(burresfile,burreson) if reslist is not None: write_booleans(reslistfile,resonlist.reshape(len(reslist)*len(mols)))
def anal_jointdist(prot,rfirst,mols,cutoff,midz,box,mat,matburr,buried=None) : """ Calculates the joint probability of residue interactions Parameters ---------- prot : AtomSelection the protein atoms rfirst : NumpyArray the first atom of each residue in the protein mols : NumpyArray the centroid of the molecule of interest cutoff : float the contact cut-off midz : float the middle of the bilayer box : NumpyArray the box sides mat : NumpyArray the joint probability for non-buried molecules matburr : NumpyArray the joint probability for buried molecules buried : NumpyArray of boolean, optional flag to indicate buried molecule """ imat = np.zeros(mat.shape) imatburr = np.zeros(matburr.shape) # Calculate all distances at once if box is not None : dist_all = distances.distance_array(np.array(mols), prot.get_positions(), box) else : kdtree = KDTree(dim=3, bucket_size=10) kdtree.set_coords(prot.get_positions()) # Loop over all molecules for mi,mol in enumerate(mols) : if box is not None : dist = dist_all[mi,:] else : dist = np.ones(prot.get_positions().shape[0])+cutoff kdtree.search(np.array([mol]), cutoff) for i in kdtree.get_indices() : dist[i] = 0.0 # Check if this molecule is on if dist.min() >= cutoff : continue # Check contacts for reach residue for i in range(len(rfirst)-1) : if dist[rfirst[i]:rfirst[i+1]].min() >= cutoff : continue if (buried is not None and buried[mi]) : imatburr[i,i] = 1 else : imat[i,i] = 1 for j in range(i+1,len(rfirst)-1) : if dist[rfirst[j]:rfirst[j+1]].min() >= cutoff : continue if (buried is not None and buried[mi]) : imatburr[i,j] = 1 imatburr[j,i] = 1 else : imat[i,j] = 1 imat[j,i] = 1 return (mat+imat,matburr+imatburr)
class NeighborSearch(object): """Class for neighbor searching, This class can be used for two related purposes: 1. To find all atoms/residues/chains/models/structures within radius of a given query position. 2. To find all atoms/residues/chains/models/structures that are within a fixed radius of each other. NeighborSearch makes use of the Bio.KDTree C++ module, so it's fast. """ def __init__(self, atom_list, bucket_size=10): """Create the object. Arguments: - atom_list - list of atoms. This list is used in the queries. It can contain atoms from different structures. - bucket_size - bucket size of KD tree. You can play around with this to optimize speed if you feel like it. """ self.atom_list = atom_list # get the coordinates coord_list = [a.get_coord() for a in atom_list] # to Nx3 array of type float self.coords = numpy.array(coord_list).astype("f") assert bucket_size > 1 assert self.coords.shape[1] == 3 self.kdt = KDTree(3, bucket_size) self.kdt.set_coords(self.coords) # Private def _get_unique_parent_pairs(self, pair_list): # translate a list of (entity, entity) tuples to # a list of (parent entity, parent entity) tuples, # thereby removing duplicate (parent entity, parent entity) # pairs. # o pair_list - a list of (entity, entity) tuples parent_pair_list = [] for (e1, e2) in pair_list: p1 = e1.get_parent() p2 = e2.get_parent() if p1 == p2: continue elif p1 < p2: parent_pair_list.append((p1, p2)) else: parent_pair_list.append((p2, p1)) return uniqueify(parent_pair_list) # Public def search(self, center, radius, level="A"): """Neighbor search. Return all atoms/residues/chains/models/structures that have at least one atom within radius of center. What entity level is returned (e.g. atoms or residues) is determined by level (A=atoms, R=residues, C=chains, M=models, S=structures). Arguments: - center - Numeric array - radius - float - level - char (A, R, C, M, S) """ if level not in entity_levels: raise PDBException("%s: Unknown level" % level) self.kdt.search(center, radius) indices = self.kdt.get_indices() n_atom_list = [] atom_list = self.atom_list for i in indices: a = atom_list[i] n_atom_list.append(a) if level == "A": return n_atom_list else: return unfold_entities(n_atom_list, level) def search_all(self, radius, level="A"): """All neighbor search. Search all entities that have atoms pairs within radius. Arguments: - radius - float - level - char (A, R, C, M, S) """ if level not in entity_levels: raise PDBException("%s: Unknown level" % level) self.kdt.all_search(radius) indices = self.kdt.all_get_indices() atom_list = self.atom_list atom_pair_list = [] for i1, i2 in indices: a1 = atom_list[i1] a2 = atom_list[i2] atom_pair_list.append((a1, a2)) if level == "A": # return atoms return atom_pair_list next_level_pair_list = atom_pair_list for l in ["R", "C", "M", "S"]: next_level_pair_list = self._get_unique_parent_pairs(next_level_pair_list) if level == l: return next_level_pair_list
class AtomNeighborSearch(object): """This class can be used to find all atoms/residues/segements within the radius of a given query position. This class is using the BioPython KDTree for the neighborsearch. This class also does not apply PBC to the distance calculattions. So you have to ensure yourself that the trajectory has been corrected for PBC artifacts. """ def __init__(self, atom_group, bucket_size=10): """ Parameters ---------- atom_list : AtomGroup list of atoms bucket_size : int Number of entries in leafs of the KDTree. If you suffer poor performance you can play around with this number. Increasing the `bucket_size` will speed up the construction of the KDTree but slow down the search. """ self.atom_group = atom_group self._u = atom_group.universe self.kdtree = KDTree(dim=3, bucket_size=bucket_size) self.kdtree.set_coords(atom_group.positions) def search(self, atoms, radius, level='A'): """ Return all atoms/residues/segments that are within *radius* of the atoms in *atoms*. Parameters ---------- atoms : AtomGroup, MDAnalysis.core.groups.Atom list of atoms radius : float Radius for search in Angstrom. level : str char (A, R, S). Return atoms(A), residues(R) or segments(S) within *radius* of *atoms*. """ if isinstance(atoms, Atom): positions = atoms.position.reshape(1, 3) else: positions = atoms.positions indices = [] for pos in positions: self.kdtree.search(pos, radius) indices.append(self.kdtree.get_indices()) unique_idx = np.unique([i for l in indices for i in l]).astype(np.int64) return self._index2level(unique_idx, level) def _index2level(self, indices, level): """Convert list of atom_indices in a AtomGroup to either the Atoms or segments/residues containing these atoms. Parameters ---------- indices list of atom indices level : str char (A, R, S). Return atoms(A), residues(R) or segments(S) within *radius* of *atoms*. """ n_atom_list = self.atom_group[indices] if level == 'A': if not n_atom_list: return [] else: return n_atom_list elif level == 'R': return list({a.residue for a in n_atom_list}) elif level == 'S': return list(set([a.segment for a in n_atom_list])) else: raise NotImplementedError('{0}: level not implemented'.format(level))