def shrake_rupley(uni, probe_radius=0.14, n_sphere_points=960): # mode = 'atom' xyz = uni.trajectory[-1].positions xyz = np.array([xyz]) #xyz = np.ascontiguousarray(xyz, dtype=np.float32) xyz = ensure_type(xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) # if (xyz.shape != (None, None, 3)): # raise Exception("Shape of xyz is "+str(xyz.shape)+" ... should be (None, None, 3)") #if (xyz.dtype != 'float32'): # xyz = xyz.astype('float32') #if mode == 'atom': dim1 = xyz.shape[1] atom_mapping = np.arange(dim1, dtype=np.int32) #elif mode == 'residue': #dim1 = traj.n_residues #atom_mapping = np.array( # [a.residue.index for a in traj.top.atoms], dtype=np.int32) #if not np.all(np.unique(atom_mapping) == # np.arange(1 + np.max(atom_mapping))): # raise ValueError('residues must have contiguous integer indices ' # 'starting from zero') #else # raise ValueError('mode must be one of "residue", "atom". "%s" supplied' % # mode) out = np.zeros((xyz.shape[0], dim1), dtype=np.float32) atom_radii = [_ATOMIC_RADII[atom.type] for atom in u.atoms] radii = np.array(atom_radii, np.float32) + probe_radius _geometry._sasa(xyz, radii, int(n_sphere_points), atom_mapping, out) return out
def getSASA(protein_snapshot, cover_atom_coords=None): """ Calculate the absolute solvent accessible surface area. First calculate the SASA of the receptor by itself, then subtract it with sasa with the AAC. AAC are set to resemble Carbon with a radii - 0.17 """ probe_radius = 0.14 n_sphere_points = 960 if cover_atom_coords is None: xyz = np.array(protein_snapshot.xyz, dtype=np.float32) atom_radii = [_ATOMIC_RADII[atom.element.symbol] for atom in protein_snapshot.topology.atoms] else: xyz = np.array(np.expand_dims(np.concatenate((protein_snapshot.xyz[0], cover_atom_coords), axis=0), axis=0), dtype=np.float32) atom_radii = [_ATOMIC_RADII[atom.element.symbol] for atom in protein_snapshot.topology.atoms] + [0.17 for _ in range( xyz.shape[ 1])] radii = np.array(atom_radii, np.float32) + probe_radius atom_mapping = np.arange(xyz.shape[1], dtype=np.int32) out = np.zeros((1, xyz.shape[1]), dtype=np.float32) _geometry._sasa(xyz, radii, int(n_sphere_points), atom_mapping, out) return out[:, :protein_snapshot.xyz.shape[1]][0]
def shrake_rupley(traj, probe_radius=0.14, n_sphere_points=960): """Compute the solvent accessible surface area of each atom in each simulation frame. Parameters ---------- traj : Trajectory An mtraj trajectory. probe_radius : float, optional The radius of the probe, in nm. n_sphere_pts : int, optional The number of points representing the surface of each atom, higher values leads to more accuracy. Returns ------- areas : np.array, shape=(n_frames, n_atoms) The accessible surface area of each atom in every frame Notes ----- This code implements the Shrake and Rupley algorithm, with the Golden Section Spiral algorithm to generate the sphere points. The basic idea is to great a mesh of points representing the surface of each atom (at a distance of the van der waals radius plus the probe radius from the nuclei), and then count the number of such mesh points that are on the molecular surface -- i.e. not within the radius of another atom. Assuming that the points are evenly distributed, the number of points is directly proportional to the accessible surface area (its just 4*pi*r^2 time the fraction of the points that are accessible). There are a number of different ways to generate the points on the sphere -- possibly the best way would be to do a little "molecular dyanmics" : put the points on the sphere, and then run MD where all the points repel one another and wait for them to get to an energy minimum. But that sounds expensive. This code uses the golden section spiral algorithm (picture at http://xsisupport.com/2012/02/25/evenly-distributing-points-on-a-sphere-with-the-golden-sectionspiral/) where you make this spiral that traces out the unit sphere and then put points down equidistant along the spiral. It's cheap, but not perfect. The gromacs utility g_sas uses a slightly different algorithm for generating points on the sphere, which is based on an icosahedral tesselation. roughly, the icosahedral tesselation works something like this http://www.ziyan.info/2008/11/sphere-tessellation-using-icosahedron.html References ---------- .. [1] Shrake, A; Rupley, JA. (1973) J Mol Biol 79 (2): 351--71. """ if not _geometry._processor_supports_sse41(): raise RuntimeError('This CPU does not support the required instruction set (SSE4.1)') xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) out = np.zeros((xyz.shape[0], xyz.shape[1]), dtype=np.float32) atom_radii = [_ATOMIC_RADII[atom.element.symbol] for atom in traj.topology.atoms] radii = np.array(atom_radii, np.float32) + probe_radius _geometry._sasa(xyz, radii, int(n_sphere_points), out) return out
def shrake_rupley(traj, probe_radius=0.14, n_sphere_points=960): """Compute the solvent accessible surface area of each atom in each simulation frame. Parameters ---------- traj : Trajectory An mtraj trajectory. probe_radius : float, optional The radius of the probe, in nm. n_sphere_pts : int, optional The number of points representing the surface of each atom, higher values leads to more accuracy. Returns ------- areas : np.array, shape=(n_frames, n_atoms) The accessible surface area of each atom in every frame Notes ----- This code implements the Shrake and Rupley algorithm, with the Golden Section Spiral algorithm to generate the sphere points. The basic idea is to great a mesh of points representing the surface of each atom (at a distance of the van der waals radius plus the probe radius from the nuclei), and then count the number of such mesh points that are on the molecular surface -- i.e. not within the radius of another atom. Assuming that the points are evenly distributed, the number of points is directly proportional to the accessible surface area (its just 4*pi*r^2 time the fraction of the points that are accessible). There are a number of different ways to generate the points on the sphere -- possibly the best way would be to do a little "molecular dyanmics" : put the points on the sphere, and then run MD where all the points repel one another and wait for them to get to an energy minimum. But that sounds expensive. This code uses the golden section spiral algorithm (picture at http://xsisupport.com/2012/02/25/evenly-distributing-points-on-a-sphere-with-the-golden-sectionspiral/) where you make this spiral that traces out the unit sphere and then put points down equidistant along the spiral. It's cheap, but not perfect. The gromacs utility g_sas uses a slightly different algorithm for generating points on the sphere, which is based on an icosahedral tesselation. roughly, the icosahedral tesselation works something like this http://www.ziyan.info/2008/11/sphere-tessellation-using-icosahedron.html References ---------- .. [1] Shrake, A; Rupley, JA. (1973) J Mol Biol 79 (2): 351--71. """ xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) out = np.zeros((xyz.shape[0], xyz.shape[1]), dtype=np.float32) atom_radii = [_ATOMIC_RADII[atom.element.symbol] for atom in traj.topology.atoms] radii = np.array(atom_radii, np.float32) + probe_radius _geometry._sasa(xyz, radii, int(n_sphere_points), out) return out
def _tessellation(**kwargs): """ This is the main AlphaSpace function, it's self contained so you can run it in multiprocessing module. """ receptor_xyz = kwargs['receptor_xyz'] binder_xyz = kwargs['binder_xyz'] atom_radii = kwargs['atom_radii'] config = kwargs['config'] snapshot_idx = kwargs['snapshot_idx'] try: cluster_method = config.cluster_method except: cluster_method = 'average_linkage' # Generate Raw Tessellation simplexes raw_alpha_lining_idx = Delaunay(receptor_xyz).simplices # Take coordinates from xyz file raw_alpha_lining_xyz = np.take(receptor_xyz, raw_alpha_lining_idx[:, 0].flatten(), axis=0) # generate alpha atom coordinates raw_alpha_xyz = Voronoi(receptor_xyz).vertices # Calculate alpha sphere radii raw_alpha_sphere_radii = np.linalg.norm(raw_alpha_lining_xyz - raw_alpha_xyz, axis=1) # Filter the data based on radii cutoff filtered_alpha_idx = np.where(np.logical_and(config.min_r<= raw_alpha_sphere_radii, raw_alpha_sphere_radii <= config.max_r))[0] filtered_alpha_radii = np.take(raw_alpha_sphere_radii, filtered_alpha_idx) alpha_lining = np.take(raw_alpha_lining_idx, filtered_alpha_idx, axis=0) filtered_alpha_xyz = np.take(raw_alpha_xyz, filtered_alpha_idx, axis=0) if cluster_method == 'average_linkage': # cluster the remaining vertices to assign index of belonging pockets zmat = linkage(filtered_alpha_xyz, method='average') alpha_pocket_index = fcluster(zmat, config.clust_dist / 10, criterion='distance') - 1 # because cluster index start from 1 elif cluster_method == 'hdbscan': import hdbscan clusterer = hdbscan.HDBSCAN(metric='euclidean', min_samples=config.hdbscan_min_samples) clusterer.fit(filtered_alpha_xyz) alpha_pocket_index = clusterer.labels_ else: raise Exception('Known Clustering Method: {}'.format(cluster_method)) # Load trajectories filtered_lining_xyz = np.take(receptor_xyz, alpha_lining, axis=0) # calculate the polarity of alpha atoms _total_space = np.array( [_getTetrahedronVolume(i) for i in filtered_lining_xyz]) * 1000 # here the 1000 is to convert nm^3 to A^3 _nonpolar_space = _polar_space = _total_space / 2 if binder_xyz is not None: """ Calculate the contact matrix, and link each alpha with closest atom """ dist_matrix = cdist(filtered_alpha_xyz, binder_xyz) min_idx = np.argmin(dist_matrix, axis=1) mins = np.min(dist_matrix, axis=1) * 10 # nm to A is_contact = mins < config.hit_dist else: min_idx = np.zeros(filtered_alpha_xyz.shape[0]) mins = np.zeros(filtered_alpha_xyz.shape[0]) is_contact = np.zeros(filtered_alpha_xyz.shape[0]) """lining atom asa""" _xyz = np.array(np.expand_dims(receptor_xyz, axis=0), dtype=np.float32) dim1 = _xyz.shape[1] atom_mapping = np.arange(dim1, dtype=np.int32) asa = np.zeros((1, dim1), dtype=np.float32) radii = np.array(atom_radii, np.float32) + config.probe_radius _geometry._sasa(_xyz, radii, int(config.n_sphere_points), atom_mapping, asa) alpha_lining_asa = np.take(asa[0], alpha_lining).sum(axis=1) * 100 # nm2 to A2 """set contact to active if use ligand contact is True""" is_active = is_contact if config.screen_by_lig_cntct else np.zeros_like(alpha_pocket_index) data = np.concatenate((np.array([range(alpha_pocket_index.shape[0])]).transpose(), # 0 idx np.full((alpha_pocket_index.shape[0], 1), snapshot_idx, dtype=int), # 1 snapshot_idx filtered_alpha_xyz, # 2 3 4 x y z alpha_lining, # 5 6 7 8 lining_atom_idx_1 - 4 np.expand_dims(_polar_space, axis=1), # 9 polar_space 0 np.expand_dims(_nonpolar_space, axis=1), # 10 nonpolar_space 0 np.expand_dims(is_active, axis=1), # 11 is_active 1 np.expand_dims(is_contact, axis=1), # 12 isContact 0 np.expand_dims(alpha_pocket_index, axis=1), # 13 pocket_idx np.expand_dims(filtered_alpha_radii, axis=1), # 14 radii np.expand_dims(min_idx, axis=1), # 15 closest atom idx np.expand_dims(mins, axis=1), # 16 closest atom dist np.expand_dims(alpha_lining_asa, axis=1) # 17 total lining atom asa ), axis=-1) print('{} snapshot processed'.format(snapshot_idx + 1)) return data
def shrake_rupley(traj, probe_radius=0.14, n_sphere_points=960, mode='atom'): """Compute the solvent accessible surface area of each atom or residue in each simulation frame. Parameters ---------- traj : Trajectory An mtraj trajectory. probe_radius : float, optional The radius of the probe, in nm. n_sphere_pts : int, optional The number of points representing the surface of each atom, higher values leads to more accuracy. mode : {'atom', 'residue'} In mode == 'atom', the extracted areas are resolved per-atom In mode == 'residue', this is consolidated down to the per-residue SASA by summing over the atoms in each residue. Returns ------- areas : np.array, shape=(n_frames, n_features) The accessible surface area of each atom or residue in every frame. If mode == 'atom', the second dimension will index the atoms in the trajectory, whereas if mode == 'residue', the second dimension will index the residues. Notes ----- This code implements the Shrake and Rupley algorithm, with the Golden Section Spiral algorithm to generate the sphere points. The basic idea is to great a mesh of points representing the surface of each atom (at a distance of the van der waals radius plus the probe radius from the nuclei), and then count the number of such mesh points that are on the molecular surface -- i.e. not within the radius of another atom. Assuming that the points are evenly distributed, the number of points is directly proportional to the accessible surface area (its just 4*pi*r^2 time the fraction of the points that are accessible). There are a number of different ways to generate the points on the sphere -- possibly the best way would be to do a little "molecular dyanmics" : put the points on the sphere, and then run MD where all the points repel one another and wait for them to get to an energy minimum. But that sounds expensive. This code uses the golden section spiral algorithm (picture at http://xsisupport.com/2012/02/25/evenly-distributing-points-on-a-sphere-with-the-golden-sectionspiral/) where you make this spiral that traces out the unit sphere and then put points down equidistant along the spiral. It's cheap, but not perfect. The gromacs utility g_sas uses a slightly different algorithm for generating points on the sphere, which is based on an icosahedral tesselation. roughly, the icosahedral tesselation works something like this http://www.ziyan.info/2008/11/sphere-tessellation-using-icosahedron.html References ---------- .. [1] Shrake, A; Rupley, JA. (1973) J Mol Biol 79 (2): 351--71. """ if not _geometry._processor_supports_sse41(): raise RuntimeError( 'This CPU does not support the required instruction set (SSE4.1)') xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) if mode == 'atom': dim1 = xyz.shape[1] atom_mapping = np.arange(dim1, dtype=np.int32) elif mode == 'residue': dim1 = traj.n_residues atom_mapping = np.array([a.residue.index for a in traj.top.atoms], dtype=np.int32) if not np.all( np.unique(atom_mapping) == np.arange(1 + np.max(atom_mapping))): raise ValueError('residues must have contiguous integer indices ' 'starting from zero') else: raise ValueError( 'mode must be one of "residue", "atom". "%s" supplied' % mode) out = np.zeros((xyz.shape[0], dim1), dtype=np.float32) atom_radii = [ _ATOMIC_RADII[atom.element.symbol] for atom in traj.topology.atoms ] radii = np.array(atom_radii, np.float32) + probe_radius _geometry._sasa(xyz, radii, int(n_sphere_points), atom_mapping, out) return out
def shrake_rupley(self, traj, probe_radius=0.14, n_sphere_points=960, mode='residue', change_radii=None): """Compute the solvent accessible surface area of each atom or residue in each simulation frame. Modified from MDTraj Parameters ---------- traj : Trajectory An mtraj trajectory. probe_radius : float, optional The radius of the probe, in nm. n_sphere_points : int, optional The number of points representing the surface of each atom, higher values leads to more accuracy. mode : {'atom', 'residue'} In mode == 'atom', the extracted areas are resolved per-atom In mode == 'residue', this is consolidated down to the per-residue SASA by summing over the atoms in each residue. change_radii : dict, optional A partial or complete dict containing the radii to change from the defaults. Should take the form {"Symbol" : radii_in_nm }, e.g. {"Cl" : 0.181 } to change the radii of Chlorine to 181 pm for the ionic Cl-. Returns ------- areas : np.array, shape=(n_frames, n_features) The accessible surface area of each atom or residue in every frame. If mode == 'atom', the second dimension will index the atoms in the trajectory, whereas if mode == 'residue', the second dimension will index the residues. """ _ATOMIC_RADII = { 'H': 0.120, 'He': 0.140, 'Li': 0.076, 'Be': 0.059, 'B': 0.192, 'C': 0.170, 'N': 0.155, 'O': 0.152, 'F': 0.147, 'Ne': 0.154, 'Na': 0.102, 'Mg': 0.086, 'Al': 0.184, 'Si': 0.210, 'P': 0.180, 'S': 0.180, 'Cl': 0.175, 'Ar': 0.188, 'K': 0.138, 'Ca': 0.114, 'Sc': 0.211, 'Ti': 0.200, 'V': 0.200, 'Cr': 0.200, 'Mn': 0.200, 'Fe': 0.200, 'Co': 0.200, 'Ni': 0.163, 'Cu': 0.140, 'Zn': 0.139, 'Ga': 0.187, 'Ge': 0.211, 'As': 0.185, 'Se': 0.190, 'Br': 0.185, 'Kr': 0.202, 'Rb': 0.303, 'Sr': 0.249, 'Y': 0.200, 'Zr': 0.200, 'Nb': 0.200, 'Mo': 0.200, 'Tc': 0.200, 'Ru': 0.200, 'Rh': 0.200, 'Pd': 0.163, 'Ag': 0.172, 'Cd': 0.158, 'In': 0.193, 'Sn': 0.217, 'Sb': 0.206, 'Te': 0.206, 'I': 0.198, 'Xe': 0.216, 'Cs': 0.167, 'Ba': 0.149, 'La': 0.200, 'Ce': 0.200, 'Pr': 0.200, 'Nd': 0.200, 'Pm': 0.200, 'Sm': 0.200, 'Eu': 0.200, 'Gd': 0.200, 'Tb': 0.200, 'Dy': 0.200, 'Ho': 0.200, 'Er': 0.200, 'Tm': 0.200, 'Yb': 0.200, 'Lu': 0.200, 'Hf': 0.200, 'Ta': 0.200, 'W': 0.200, 'Re': 0.200, 'Os': 0.200, 'Ir': 0.200, 'Pt': 0.175, 'Au': 0.166, 'Hg': 0.155, 'Tl': 0.196, 'Pb': 0.202, 'Bi': 0.207, 'Po': 0.197, 'At': 0.202, 'Rn': 0.220, 'Fr': 0.348, 'Ra': 0.283, 'Ac': 0.200, 'Th': 0.200, 'Pa': 0.200, 'U': 0.186, 'Np': 0.200, 'Pu': 0.200, 'Am': 0.200, 'Cm': 0.200, 'Bk': 0.200, 'Cf': 0.200, 'Es': 0.200, 'Fm': 0.200, 'Md': 0.200, 'No': 0.200, 'Lr': 0.200, 'Rf': 0.200, 'Db': 0.200, 'Sg': 0.200, 'Bh': 0.200, 'Hs': 0.200, 'Mt': 0.200, 'Ds': 0.200, 'Rg': 0.200, 'Cn': 0.200, 'Uut': 0.200, 'Fl': 0.200, 'Uup': 0.200, 'Lv': 0.200, 'Uus': 0.200, 'Uuo': 0.200 } xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) if mode == 'atom': dim1 = xyz.shape[1] atom_mapping = np.arange(dim1, dtype=np.int32) elif mode == 'residue': dim1 = traj.n_residues if dim1 == 1: atom_mapping = np.array([0] * xyz.shape[1], dtype=np.int32) else: atom_mapping = np.array( [a.residue.index for a in traj.top.atoms], dtype=np.int32) if not np.all( np.unique(atom_mapping) == np.arange( 1 + np.max(atom_mapping))): raise ValueError( 'residues must have contiguous integer indices starting from zero' ) else: raise ValueError( 'mode must be one of "residue", "atom". "{}" supplied'.format( mode)) modified_radii = {} if change_radii is not None: # in case _ATOMIC_RADII is in use elsehwere... modified_radii = deepcopy(_ATOMIC_RADII) # Now, modify the values specified in 'change_radii' for k, v in change_radii.items(): modified_radii[k] = v out = np.zeros((xyz.shape[0], dim1), dtype=np.float32) atom_radii = [] for atom in traj.topology.atoms: atom_name = "{}".format(atom).split('-')[1] element = ''.join(i for i in atom_name if not i.isdigit()) if bool(modified_radii): atom_radii.append(modified_radii[element]) else: atom_radii.append(_ATOMIC_RADII[element]) radii = np.array(atom_radii, np.float32) + probe_radius _geometry._sasa(xyz, radii, int(n_sphere_points), atom_mapping, out) return out
def shrake_rupley(traj, probe_radius=0.14, n_sphere_points=960, mode='atom'): """Compute the solvent accessible surface area of each atom or residue in each simulation frame. Parameters ---------- traj : Trajectory An mtraj trajectory. probe_radius : float, optional The radius of the probe, in nm. n_sphere_points : int, optional The number of points representing the surface of each atom, higher values leads to more accuracy. mode : {'atom', 'residue'} In mode == 'atom', the extracted areas are resolved per-atom In mode == 'residue', this is consolidated down to the per-residue SASA by summing over the atoms in each residue. Returns ------- areas : np.array, shape=(n_frames, n_features) The accessible surface area of each atom or residue in every frame. If mode == 'atom', the second dimension will index the atoms in the trajectory, whereas if mode == 'residue', the second dimension will index the residues. Notes ----- This code implements the Shrake and Rupley algorithm, with the Golden Section Spiral algorithm to generate the sphere points. The basic idea is to great a mesh of points representing the surface of each atom (at a distance of the van der waals radius plus the probe radius from the nuclei), and then count the number of such mesh points that are on the molecular surface -- i.e. not within the radius of another atom. Assuming that the points are evenly distributed, the number of points is directly proportional to the accessible surface area (its just 4*pi*r^2 time the fraction of the points that are accessible). There are a number of different ways to generate the points on the sphere -- possibly the best way would be to do a little "molecular dyanmics" : put the points on the sphere, and then run MD where all the points repel one another and wait for them to get to an energy minimum. But that sounds expensive. This code uses the golden section spiral algorithm (picture at http://xsisupport.com/2012/02/25/evenly-distributing-points-on-a-sphere-with-the-golden-sectionspiral/) where you make this spiral that traces out the unit sphere and then put points down equidistant along the spiral. It's cheap, but not perfect. The gromacs utility g_sas uses a slightly different algorithm for generating points on the sphere, which is based on an icosahedral tesselation. roughly, the icosahedral tesselation works something like this http://www.ziyan.info/2008/11/sphere-tessellation-using-icosahedron.html References ---------- .. [1] Shrake, A; Rupley, JA. (1973) J Mol Biol 79 (2): 351--71. """ xyz = ensure_type(traj.xyz, dtype=np.float32, ndim=3, name='traj.xyz', shape=(None, None, 3), warn_on_cast=False) if mode == 'atom': dim1 = xyz.shape[1] atom_mapping = np.arange(dim1, dtype=np.int32) elif mode == 'residue': dim1 = traj.n_residues atom_mapping = np.array( [a.residue.index for a in traj.top.atoms], dtype=np.int32) if not np.all(np.unique(atom_mapping) == np.arange(1 + np.max(atom_mapping))): raise ValueError('residues must have contiguous integer indices ' 'starting from zero') else: raise ValueError('mode must be one of "residue", "atom". "%s" supplied' % mode) out = np.zeros((xyz.shape[0], dim1), dtype=np.float32) atom_radii = [_ATOMIC_RADII[atom.element.symbol] for atom in traj.topology.atoms] radii = np.array(atom_radii, np.float32) + probe_radius _geometry._sasa(xyz, radii, int(n_sphere_points), atom_mapping, out) return out