def dbscan(points, epsilon=3, min_points=6): ''' Simple implementation of the DBSCAN algorithm for clustering points in 3D space. ''' import numpy from chimerax.geometry import find_close_points core = set() neighbors = [] for i, p in enumerate(points): _, close_i = find_close_points([points[i]], points, max_distance=epsilon) neighbors.append(set(close_i)) if len(close_i) > min_points: core.add(i) remainder = set(range(len(points))) clusters = [] # final clusters # Assign core groups (those with at least min_points points) to clusters while len(core) > 0: remainder_old = remainder.copy() idx = core.pop() qp = [idx] remainder.remove(idx) while len(qp) > 0: q = qp.pop(0) nq = neighbors[q] if len(nq) >= min_points: delta = nq.intersection(remainder) qp.extend(delta) remainder.difference_update(delta) new_cluster = remainder_old.difference(remainder) clusters.append(new_cluster) core.difference_update(new_cluster) clusters = list(sorted(clusters, key=lambda c: len(c), reverse=True)) # Add remaining points to clusters for cluster in clusters: close = [1] while len(close): remaining = numpy.array(list(remainder)) rpoints = points[remaining] idx_map = {i: idx for i, idx in enumerate(remaining)} cpoints = points[numpy.array(list(cluster))] _, close = find_close_points(cpoints, rpoints, epsilon) if len(close): ci = set([idx_map[i] for i in close]) cluster.update(ci) remainder.difference_update(ci) clusters = list(sorted(clusters, key=lambda c: len(c), reverse=True)) clusters = [numpy.array(list(c)) for c in clusters] noise = numpy.array(list(remainder)) return clusters, noise
def _zone_atoms(atoms, near_atoms, distance): axyz = atoms.scene_coords naxyz = near_atoms.scene_coords from chimerax.geometry import find_close_points i1,i2 = find_close_points(axyz, naxyz, distance) za = atoms[i1] return za
def sidechain_buried_score(residue): ''' Defines how "buried" a sidechain is by counting the number of heavy atoms from other residues coming within 4A of any heavy atom from the sidechain. The returned score is a value ranging from 0 to 1, where 0 indicates no contact with other atoms, and 1 indicates 3 or more other atoms per sidechain atoms. The score scales linearly with the number of contacting atoms in between these values. ''' from chimerax.geometry import find_close_points from chimerax.atomic import Residues import numpy r = residue m = r.structure other_residues = m.residues.subtract(Residues([r])) sidechain_atoms = r.atoms[numpy.logical_not( numpy.in1d(r.atoms.names, ['N', 'C', 'CA', 'O']))] if not len(sidechain_atoms): return 0 other_atoms = other_residues.atoms cp = find_close_points(sidechain_atoms.coords, other_atoms.coords, 4.0)[1] score = (len(cp) / len(sidechain_atoms)) / 3 if score > 1: score = 1 return score
def apply_restraints(trs, rrs, adjust_for_confidence, confidence_type): template_as = [] restrained_as = [] for tr, rr in zip(trs, rrs): ta_names = set(tr.atoms.names).intersection(atom_names) ra_names = set(rr.atoms.names).intersection(atom_names) common_names = list(ta_names.intersection(ra_names)) template_as.extend([tr.find_atom(name) for name in common_names]) restrained_as.extend([rr.find_atom(name) for name in common_names]) # template_as.append(tr.atoms[numpy.in1d(tr.atoms.names, common_names)]) # restrained_as.append(rr.atoms[numpy.in1d(rr.atoms.names, common_names)]) from chimerax.atomic import Atoms template_as = Atoms(template_as) restrained_as = Atoms(restrained_as) template_coords = template_as.coords from math import sqrt for i, ra1 in enumerate(restrained_as): query_coord = numpy.array([template_coords[i]]) indices = find_close_points(query_coord, template_coords, distance_cutoff)[1] indices = indices[indices != i] for ind in indices: ra2 = restrained_as[ind] if ra1.residue == ra2.residue: continue if adjust_for_confidence: if confidence_type == 'plddt': scores = [ template_as[i].bfactor * confidence_multiplier, template_as[ind].bfactor * confidence_multiplier ] elif confidence_type == 'pae': scores = [ pae_matrix[template_as[i].residue.number - 1, template_as[ind].residue.number - 1] ] kappa_adj, tol_adj, falloff_adj = adjust_distance_restraint_terms_by_confidence( scores, confidence_type) if kappa_adj == 0: continue else: kappa_adj = tol_adj = 1 falloff_adj = 0 try: dr = adrm.add_restraint(ra1, ra2) except ValueError: continue dist = distance(query_coord[0], template_coords[ind]) dr.tolerance = tolerance * dist * tol_adj dr.target = dist dr.c = max(sqrt(dist) * well_half_width, 0.1) #dr.effective_spring_constant = spring_constant dr.kappa = kappa * kappa_adj from math import log dr.alpha = -1 - fall_off * log( (max(dist - 1, 1))) - falloff_adj dr.enabled = True
def restrain_ca_distances_to_template(template_residues, restrained_residues, distance_cutoff=8, spring_constant=500): ''' Creates a "web" of distance restraints between nearby CA atoms, restraining one set of residues to the same spatial organisation as another. Args: * template_residues: - a :class:`chimerax.atomic.Residues` instance. All residues must be from a single model, but need no be contiguous * restrained_residues: - a :class:`chimerax.atomic.Residues` instance. All residues must be from a single model (which may or may not be the same model as for `template_residues`). May be the same array as `template_residues` (which will just restrain all distances to their current values). * distance_cutoff (default = 8): - for each CA atom in `restrained_residues`, a distance restraint will be created between it and every other CA atom where the equivalent atom in `template_residues` is within `distance_cutoff` of its template equivalent. * spring_constant (default = 500): - the strength of each restraint, in :math:`kJ mol^{-1} nm^{-2}` ''' from chimerax.isolde import session_extensions as sx if len(template_residues) != len(restrained_residues): raise TypeError( 'Template and restrained residue arrays must be the same length!') template_us = template_residues.unique_structures if len(template_us) != 1: raise TypeError('Template residues must be from a single model!') restrained_us = restrained_residues.unique_structures if len(restrained_us) != 1: raise TypeError('Restrained residues must be from a single model!') restrained_model = restrained_us[0] template_cas = template_residues.atoms[template_residues.atoms.names == 'CA'] restrained_cas = restrained_residues.atoms[restrained_residues.atoms.names == 'CA'] template_coords = template_cas.coords drm = sx.get_distance_restraint_mgr(restrained_model) from chimerax.geometry import find_close_points, distance for i, rca1 in enumerate(restrained_cas): query_coord = numpy.array([template_coords[i]]) indices = find_close_points(query_coord, template_coords, distance_cutoff)[1] indices = indices[indices != i] for ind in indices: rca2 = restrained_cas[ind] dr = drm.add_restraint(rca1, rca2) dr.spring_constant = spring_constant dr.target = distance(query_coord[0], template_coords[ind]) dr.enabled = True
def get_shell_of_residues(model, existing_sel, dist_cutoff): from chimerax.geometry import find_close_points from chimerax.atomic import selected_atoms, Atoms, concatenate selatoms = existing_sel allatoms = model.atoms unselected_atoms = allatoms.subtract(selatoms) selcoords = selatoms.coords unselcoords = unselected_atoms.coords ignore, shell_indices = find_close_points(selcoords, unselcoords, dist_cutoff) shell_atoms = unselected_atoms[shell_indices].unique_residues.atoms return shell_atoms
def refmac_distance_restraints(session, model, distance_cutoff=4.5, include_waters=False, file_name='RESTRAINTS.txt'): import numpy m = model from chimerax.atomic import AtomicStructures if isinstance(m, AtomicStructures): if len(m) != 1: from chimerax.core.errors import UserError raise UserError('Please specify a single atomic model!') m = m[0] residues = m.residues if not include_waters: residues = residues[residues.names != 'HOH'] atoms = residues.atoms[residues.atoms.element_names != 'H'] coords = atoms.coords from chimerax.geometry import find_close_points seen = set() with open(file_name, 'wt') as rfile: rfile.write('# ISOLDE Restraints File\n' '# \n' '# Restraints to ISOLDE output geometry\n') for i, atom in enumerate(atoms): query_coord = numpy.array([coords[i]]) indices = find_close_points(query_coord, coords, distance_cutoff)[1] for ind in indices: atom2 = atoms[ind] if atom2 == atom: continue # Do not include restraints for neighbors or 1-3 relationships if any([ atom2 == n or atom2 in n.neighbors for n in atom.neighbors ]): continue # Don't double-count pair = frozenset((atom, atom2)) if pair in seen: continue rfile.write(refmac_distance_restraint(atom, atom2) + '\n') seen.add(pair) import os session.logger.info( f'Top-out distance restraints file for REFMAC5 written to {file_name}. ' f'This is essentially equivalent to a ProSMART restraints file, restraining ' f'interatomic distances to their current values. Use it in the "External restraints" ' f'section of a Refmac5 job in the CCP-EM GUI, or at the command line as: \n' f'refmac5 {{all other arguments}} < {file_name}')
def pick_closest_to_line(session, mx, my, atoms, cutoff, displayed_only=True, hydrogens=False): ''' Pick the atom coming closest to the ray projected from the mouse pointer away from the camera. Only atoms found between the near and far clipping planes and within cutoff of the line will be considered. Optionally the selection can be further limited to include only displayed atoms and/or exclude hydrogens. ''' closest = None if atoms is None: return None xyz1, xyz2 = session.main_view.clip_plane_points(mx, my) import numpy # Create an array of coordinates with spacing cutoff/2 length = numpy.linalg.norm(xyz2 - xyz1) numpoints = numpy.ceil(length / cutoff * 2).astype(int) xvals = numpy.linspace(xyz1[0], xyz2[0], num=numpoints) yvals = numpy.linspace(xyz1[1], xyz2[1], num=numpoints) zvals = numpy.linspace(xyz1[2], xyz2[2], num=numpoints) xyzlist = [] for xyz in zip(xvals, yvals, zvals): xyzlist.append(xyz) xyzlist = numpy.array(xyzlist) if displayed_only: atoms = atoms.filter(atoms.visibles) if not hydrogens: atoms = atoms.filter(atoms.element_names != 'H') atomic_coords = atoms.scene_coords from chimerax.geometry import find_close_points line_indices, atom_indices = find_close_points(xyzlist, atomic_coords, cutoff) line_shortlist = xyzlist[line_indices] ac_shortlist = atomic_coords[atom_indices] atom_shortlist = atoms[atom_indices] min_dist = cutoff for lxyz in line_shortlist: for axyz, atom in zip(ac_shortlist, atom_shortlist): d = numpy.linalg.norm(axyz - lxyz) if d < min_dist: closest = atom min_dist = d return closest
def surface_geometry(triangles, tolerance=1e-5): from numpy import array, reshape, single as floatc, intc varray = reshape(triangles, (3 * len(triangles), 3)).astype(floatc) uindex = {} unique = [] from chimerax.geometry import find_close_points for v in range(len(varray)): if not v in uindex: i1, i2 = find_close_points(varray[v:v + 1, :], varray, tolerance) for i in i2: if not i in uindex: uindex[i] = len(unique) unique.append(varray[v]) uvarray = array(unique, floatc) tlist = [(uindex[3 * t], uindex[3 * t + 1], uindex[3 * t + 2]) for t in range(len(triangles))] tarray = array(tlist, intc) return uvarray, tarray
def cluster_unbound_ligands(model, unbound, cutoff=5): from chimerax.geometry import find_close_points from chimerax.atomic import Residue, Residues from collections import defaultdict import numpy m = model chain_ids = m.residues.unique_chain_ids other_residues = m.residues.subtract(unbound) #polymeric = m.residues[m.residues.polymer_types!=Residue.PT_NONE] ligand_atoms = unbound.atoms[unbound.atoms.element_names != 'H'] chain_map = {} for cid in chain_ids: cres = other_residues[other_residues.chain_ids == cid] catoms = cres.atoms[cres.atoms.element_names != 'H'] ci, li = find_close_points(catoms.coords, ligand_atoms.coords, cutoff) close_ligand_atoms = ligand_atoms[li] weights = numpy.ones(len(close_ligand_atoms), numpy.double) weights[close_ligand_atoms.element_names == 'C'] = _carbon_weight weights[close_ligand_atoms.elements.is_metal] = _metal_weight chain_map[cid] = Weighted_Counter( [a.residue for a in close_ligand_atoms], weights) unclassified = [] closest_chain_map = defaultdict(list) for r in unbound: max_atoms = 0 closest = None for cid in chain_ids: close = chain_map[cid].get(r, None) if close is not None: if close > max_atoms: closest = cid max_atoms = close if closest is not None: closest_chain_map[closest].append(r) else: unclassified.append(r) return { cid: Residues(residues) for cid, residues in closest_chain_map.items() }, Residues(unclassified)
def get_shell_of_residues(residues, dist_cutoff): ''' Get a shell of whole residues from the same model as the atoms in residues, within a user-defined cut-off distance surrounding residues. Expects all residues to be within the same model. ''' from chimerax.geometry import find_close_points from chimerax.atomic import selected_atoms, Atoms, concatenate us = residues.unique_structures selatoms = residues.atoms if len(us) != 1: raise Exception( 'selection should contain atoms from a single molecule!') allres = us[0].residues unsel_residues = allres.subtract(residues) unselected_atoms = unsel_residues.atoms selcoords = selatoms.coords unselcoords = unselected_atoms.coords ignore, shell_indices = find_close_points(selcoords, unselcoords, dist_cutoff) shell_residues = unselected_atoms[shell_indices].unique_residues return shell_residues
def restrain_small_ligands(model, distance_cutoff=4, heavy_atom_limit=3, spring_constant=5000, bond_to_carbon=False): ''' Residues with a small number of heavy atoms can be problematic in MDFF if unrestrained, since if knocked out of density they tend to simply keep going. It is best to restrain them with distance restraints to suitable surrounding atoms or, failing that, to their starting positions. Args: * model: - a :class:`chimerax.atomic.AtomicStructure` instance * distance_cutoff (default = 3.5): - radius in Angstroms to look for candidate heavy atoms for distance restraints. If no candidates are found, a position restraint will be applied instead. * heavy_atom_limit (default = 3): - Only residues with a number of heavy atoms less than or equal to `heavy_atom_limit` will be restrained * spring_constant (default = 500): - strength of each restraint, in :math:`kJ mol^{-1} nm^{-2}` * bond_to_carbon (default = `False`): - if `True`, only non-carbon heavy atoms will be restrained using distance restraints. ''' from chimerax.atomic import Residue, Residues residues = model.residues ligands = residues[residues.polymer_types == Residue.PT_NONE] small_ligands = Residues([ r for r in ligands if len(r.atoms[r.atoms.element_names != 'H']) < heavy_atom_limit ]) from .. import session_extensions as sx drm = sx.get_distance_restraint_mgr(model) prm = sx.get_position_restraint_mgr(model) all_heavy_atoms = model.atoms[model.atoms.element_names != 'H'] if not bond_to_carbon: all_heavy_atoms = all_heavy_atoms[all_heavy_atoms.element_names != 'C'] all_heavy_coords = all_heavy_atoms.coords from chimerax.geometry import find_close_points, distance for r in small_ligands: r_heavy_atoms = r.atoms[r.atoms.element_names != 'H'] if not bond_to_carbon: r_non_carbon_atoms = r_heavy_atoms[ r_heavy_atoms.element_names != 'C'] if not len(r_non_carbon_atoms): # No non-carbon heavy atoms. Apply position restraints prs = prm.get_restraints(r_heavy_atoms) prs.targets = prs.atoms.coords prs.spring_constants = spring_constant prs.enableds = True continue r_heavy_atoms = r_non_carbon_atoms r_indices = all_heavy_atoms.indices(r_heavy_atoms) r_coords = r_heavy_atoms.coords applied_drs = False for ra, ri, rc in zip(r_heavy_atoms, r_indices, r_coords): _, found_i = find_close_points([rc], all_heavy_coords, distance_cutoff) found_i = found_i[found_i != ri] num_drs = 0 for fi in found_i: if fi in r_indices: continue dr = drm.add_restraint(ra, all_heavy_atoms[fi]) dr.spring_constant = spring_constant dr.target = distance(rc, all_heavy_coords[fi]) dr.enabled = True num_drs += 1 # applied_drs = True if num_drs < 3: # Really need at least 3 distance restraints (probably 4, actually, # but we don't want to be *too* restrictive) to be stable in 3D # space. If we have fewer than that, add position restraints to be # sure. prs = prm.add_restraints(r_heavy_atoms) prs.targets = prs.atoms.coords prs.spring_constants = spring_constant prs.enableds = True
def find_clashes(session, test_atoms, assumed_max_vdw=2.1, attr_name=defaults["attr_name"], bond_separation=defaults["bond_separation"], clash_threshold=defaults["clash_threshold"], distance_only=None, hbond_allowance=defaults["clash_hbond_allowance"], inter_model=True, inter_submodel=False, intra_model=True, intra_res=False, intra_mol=True, res_separation=None, restrict="any"): """Detect steric clashes/contacts 'test_atoms' should be an Atoms collection. 'restrict' can be one of: - 'any': interactions involving at least one atom from 'test_atoms' will be found - 'both': interactions involving only atoms from 'test_atoms' will be found - 'cross': interactions involving exactly one atom from 'test_atoms' will be found - an Atoms collection : interactions between 'test_atoms' and the 'restrict' atoms will be found The "clash value" is the sum of the VDW radii minus the distance, which must exceed 'clash_threshold'. 'hbond_allowance' is how much the clash value is reduced if one atom is a donor and the other an acceptor. If 'distance_only' is set (in which case it must be a positive numeric value), then both VDW radii, clash_threshold, and hbond_allowance are ignored and the center-center distance between the atoms must be <= the given value. Atom pairs are eliminated from consideration if they are less than or equal to 'bond_separation' bonds apart. Intra-residue clashes are ignored unless intra_res is True. Intra-model clashes are ignored unless intra_model is True. Intra-molecule (covalently connected fragment) clashes are ignored unless intra_mol is True. Inter-(sibling)submodel clashes are ignored unless inter_submodel is True. Inter-model clashes are ignored unless inter_model is True. If res_separation is not None, it should be a positive integer -- in which case for residues in the same chain, clashes/contacts are ignored unless the residues are at least that far apart in the sequence. Returns a dictionary keyed on atoms, with values that are dictionaries keyed on clashing atom with value being the clash value. """ from chimerax.atomic import Structure use_scene_coords = inter_model and len( [m for m in session.models if isinstance(m, Structure)]) > 1 # use the fast _closepoints module to cut down candidate atoms if we # can (since _closepoints doesn't know about "non-bonded" it isn't as # useful as it might otherwise be) if restrict == "any": if inter_model: from chimerax.atomic import all_atoms search_atoms = all_atoms(session) else: from chimerax.atomic import structure_atoms search_atoms = structure_atoms(test_atoms.unique_structures) elif restrict == "cross": if inter_model: from chimerax.atomic import all_atoms universe_atoms = all_atoms(session) else: from chimerax.atomic import structure_atoms universe_atoms = structure_atoms(test_atoms.unique_structures) other_atoms = universe_atoms.subtract(test_atoms) if distance_only: cutoff = distance_only else: cutoff = 2.0 * assumed_max_vdw - clash_threshold if use_scene_coords: test_coords = test_atoms.scene_coords other_coords = other_atoms.scene_coords else: test_coords = test_atoms.coords other_coords = other_atoms.coords from chimerax.geometry import find_close_points t_close, o_close = find_close_points(test_coords, other_coords, cutoff) test_atoms = test_atoms[t_close] search_atoms = other_atoms[o_close] elif not isinstance(restrict, str): search_atoms = restrict else: search_atoms = test_atoms if res_separation is not None: chain_pos = {} for s in test_atoms.unique_structures: for c in s.chains: for i, r in enumerate(c.residues): if r: chain_pos[r] = i from chimerax.atom_search import AtomSearchTree tree = AtomSearchTree(search_atoms, scene_coords=inter_model) clashes = {} from chimerax.geometry import distance intra_mol_map = {} for a in test_atoms: if distance_only: cutoff = distance_only else: cutoff = a.radius + assumed_max_vdw - clash_threshold crd = a.scene_coord if use_scene_coords else a.coord nearby = tree.search(crd, cutoff) if not nearby: continue need_expansion = [a] exclusions = set(need_expansion) for i in range(bond_separation): next_need = [] for expand in need_expansion: for n in expand.neighbors: if n in exclusions: continue exclusions.add(n) next_need.append(n) need_expansion = next_need if not intra_mol and a not in intra_mol_map: connected = set([a]) to_do = list(a.neighbors) while to_do: conn = to_do.pop() connected.add(conn) for nb in conn.neighbors: if nb not in connected: to_do.append(nb) for ca in connected: intra_mol_map[ca] = connected for nb in nearby: if nb in exclusions: continue if not intra_res and a.residue == nb.residue: continue if not intra_mol and nb in intra_mol_map[a]: continue if not inter_model and a.structure != nb.structure: continue if not intra_model and a.structure == nb.structure: continue if a in clashes and nb in clashes[a]: continue if res_separation is not None: if a.residue.chain is not None and a.residue.chain == nb.residue.chain: if abs(chain_pos[a.residue] - chain_pos[nb.residue]) < res_separation: continue if not inter_submodel \ and a.structure.id and nb.structure.id \ and a.structure.id[0] == nb.structure.id[0] \ and a.structure.id[:-1] == nb.structure.id[:-1] \ and a.structure.id[1:] != nb.structure.id[1:]: continue if use_scene_coords: a_crd, nb_crd = a.scene_coord, nb.scene_coord else: a_crd, nb_crd = a.coord, nb.coord if distance_only: clash = distance_only - distance(a_crd, nb_crd) else: clash = a.radius + nb.radius - distance(a_crd, nb_crd) if hbond_allowance and not distance_only: if (_donor(a) and _acceptor(nb)) or (_donor(nb) and _acceptor(a)): clash -= hbond_allowance if distance_only: if clash < 0.0: continue elif clash < clash_threshold: continue clashes.setdefault(a, {})[nb] = clash clashes.setdefault(nb, {})[a] = clash return clashes