def get_projected_coordinates(feature_type, feature_coordinates, protein, settings): """ for a given polar feature, the nearest h-bonding partner on the protein is located. :param protein: a :class:`ccdc.protein.Protein` instance :return: feature_coordinates for hydrogen-bonding partner """ if feature_type == 'donor': atms = [a for a in protein.atoms if a.is_acceptor] else: atms = [a for a in protein.atoms if a.is_donor] near_atoms = {} for atm in atms: dist = Helper.get_distance(atm.coordinates, feature_coordinates) if dist < settings.max_hbond_dist: if dist in near_atoms.keys(): near_atoms[dist].append(atm) else: near_atoms.update({dist: [atm]}) else: continue if len(near_atoms.keys()) == 0: return None else: closest = sorted(near_atoms.keys())[0] select = near_atoms[closest][0] return select.coordinates
def neighbourhood(i, j, k, high, catchment=1): """ find the neighbourhood of a given indice. Neighbourhood is defined by all points within 1 step of the specified indice. This includes the cubic diagonals. :param i: i indice :param j: j indice :param k: k indice :param catchment: number of steps from the centre :type i: int :type j: int :type k: int :type catchment: int :return: indices of the neighbourhood :rtype: list """ low = (0, 0, 0) i_values = [ a for a in range(i - catchment, i + catchment + 1) if low[0] <= a < high[0] ] j_values = [ b for b in range(j - catchment, j + catchment + 1) if low[1] <= b < high[1] ] k_values = [ c for c in range(k - catchment, k + catchment + 1) if low[2] <= c < high[2] ] return [[a, b, c] for a in i_values for b in j_values for c in k_values if Helper.get_distance([a, b, c], [i, j, k]) == 1]
def deduplicate(self, major, threshold=12, tolerance=2): """ method to deduplicate two grids, used for charged-polar deduplication :param `ccdc.utilities.Grid` major: overriding grid :param int threshold: values above this value :param int tolerance: search radius for determining feature overlap :return: """ if self.bounding_box[0] != major.bounding_box[0] or self.bounding_box[1] != major.bounding_box[1]: self = major.common_boundaries(self) all_islands = set([jsland for jsland in self.islands(threshold=threshold)]) bin_islands = set([jsland for jsland in all_islands for island in major.islands(threshold=threshold) if jsland.contains_point(island.centroid(), tolerance=tolerance) or jsland.count_grid() <= 8 or Helper.get_distance(jsland.centroid(), island.centroid()) < 4]) retained_jslands = list(all_islands - bin_islands) if len(retained_jslands) == 0: blank = major.copy_and_clear() return blank else: temp = Grid.super_grid(0, *retained_jslands) blank = self.copy_and_clear() return blank.common_boundaries(temp)
def get_priority_atom(self, molecule): """ Select priority atom. Select polar atom. If multiple polar atoms, select the one furthest from the centre of geometry. If no polar atoms, select atom furthest from centre of geometry :param molecule: a :class: `ccdc.molecule.Molecule` instance :return: a :class: `ccdc.molecule.Molecule` instance, str atom type """ c = molecule.centre_of_geometry() polar_atoms = [ a for a in molecule.atoms if a.is_donor or a.is_acceptor ] atom_by_distance = {} if len(polar_atoms) > 0: for a in polar_atoms: d = Helper.get_distance(c, a.coordinates) atom_by_distance[d] = a else: for a in molecule.atoms: d = Helper.get_distance(c, a.coordinates) atom_by_distance[d] = a greatest_distance = sorted(atom_by_distance.keys())[0] priority_atom = atom_by_distance[greatest_distance] pa_type = None if priority_atom.formal_charge != 0: if priority_atom.formal_charge < 0: pa_type = "negative" elif priority_atom.formal_charge > 0: pa_type = "positive" else: if priority_atom.is_acceptor: pa_type = "acceptor" elif priority_atom.is_donor: pa_type = "donor" else: pa_type = "apolar" return priority_atom, pa_type
def get_polar_cluster_hits(hits_df, clusters_df, hits_dir): """ :param hits_df: :param clusters_df: :return: """ clust_hitlist = {} fu_id_list = [] fu_smiles_list = [] mean_hs_scores = [] for i, row in hits_df.iterrows(): scored_mols = Path(hits_dir, row['followup_id'], 'concat_ranked_docked_ligands_hs-scored.mol2') pose = int(row['pose_id'].split('_')[-1]) ccdc_lig = MoleculeReader(str(scored_mols))[pose] fu_id_list.append(row['pose_id']) fu_smiles_list.append(row['followup_smiles']) mean_hs_scores.append(row['mean_hs_score']) for ic, rowc in clusters_df.iterrows(): probe_type = rowc['probe_type'] if probe_type == 'acceptor': tar_atoms = [a for a in ccdc_lig.heavy_atoms if a.is_acceptor] elif probe_type == 'donor': tar_atoms = [a for a in ccdc_lig.heavy_atoms if a.is_donor] c_coords = rowc['centre_of_mass'] if type(c_coords) is str: x_coord = float(c_coords.split('x=')[1].split(',')[0]) y_coord = float(c_coords.split('y=')[1].split(',')[0]) z_coord = float(c_coords.split('z=')[1].split(')')[0]) c_coords = Coordinates(x=x_coord, y=y_coord, z=z_coord) dists = [ Helper.get_distance(at.coordinates, c_coords) for at in tar_atoms ] if (len(dists) > 0) and (min(dists) < rowc['cluster_radius'] + 1): hit = 1 else: hit = 0 try: # clust_hitlist[rowc['cluster_id']].append((min(dists))) clust_hitlist[rowc['cluster_id']].append(hit) except KeyError: # clust_hitlist[rowc['cluster_id']] = [(min(dists))] clust_hitlist[rowc['cluster_id']] = [hit] scored_df = pd.DataFrame() cols = clusters_df['cluster_id'].values scored_df['followup_id'] = fu_id_list scored_df['followup_smiles'] = fu_smiles_list scored_df['mean_hs_score'] = mean_hs_scores for cl in cols: scored_df[cl] = clust_hitlist[cl] hits_list = [] for _, rowr in scored_df.iterrows(): num_hits = sum(rowr[co] for co in cols) hits_list.append(num_hits) scored_df['number_hits'] = hits_list return scored_df