def get_residue_name(imol, res_spec): return coot.residue_name(imol, rsu.residue_spec_to_chain_id(res_spec), rsu.residue_spec_to_res_no(res_spec), "")
def find_the_sites(self, file_name_comp_id_list): # main line # coords_with_spec = [] for fn_comp_id in file_name_comp_id_list: fn = fn_comp_id[0] comp_id = fn_comp_id[1] imol = coot.handle_read_draw_molecule_with_recentre(fn_comp_id[0], 0) # what are the residue specs for the given comp_ids? residue_specs = coot.get_residue_specs_in_mol_py(imol, comp_id) print fn, residue_specs for spec in residue_specs: # centre = residue_centre_from_spec_py(imol, spec) chain_id = rsu.residue_spec_to_chain_id(spec) res_no = rsu.residue_spec_to_res_no(spec) ins_code = "" res_info = coot.residue_info_py(imol, chain_id, res_no, ins_code) for atom in res_info: coords_with_spec.append([rsu.residue_atom_to_position(atom), imol, spec]) # print coords_with_spec # now cluster coords. There will be 1 (usually), maybe 2 possibly 3 sites if len(coords_with_spec) < 3: return False else: coords = [x[0] for x in coords_with_spec] positions_np = np.array(coords) n_components = self.optimize_n(positions_np, len(positions_np)) print "optimize_n for sites::::::::::::", n_components dpgmm = mixture.GMM(n_components, covariance_type="full", n_iter=40) dpgmm.fit(positions_np) cluster_assignments = dpgmm.predict(positions_np) means = dpgmm.means_ weights = dpgmm.weights_ print cluster_assignments print means print weights print "cluster_assignments", cluster_assignments merge_map = self.find_mergeable_clusters(means, weights) # which key (i.e. cluster index) has the most number of other clusters # that can be merged in? # # convert to a list of ints (not <type 'numpy.int64'>) (because, on decoding Python->C++ object # we do a PyInt_Check for the site_idx (and a <type 'numpy.int64'> fails that test) # new_cluster_assignments = [int(x) for x in self.merge_clusters(cluster_assignments, merge_map)] print "new cluster_assignments", new_cluster_assignments specs = [x[1:] for x in coords_with_spec] cluster_assignments_with_specs = zip(new_cluster_assignments, specs) sites = coot.chemical_feature_clusters_accept_site_clusters_info_py(cluster_assignments_with_specs) # show me them if True: # debug o = coot.new_generic_object_number("site clusters") for mean in means: cluster_star_obj(o, mean, 2, 2) # coot.set_display_generic_object(o, 1) this is for debugging self.sites = sites
def cfc_process_site(site_number, imol_ligand_specs, first_ligand_spec): imol_first = imol_ligand_specs[0][0] # others are lsq fitted to this env_residue_specs = coot.residues_near_residue_py(imol_first, first_ligand_spec, 6) protein_res_specs = [r for r in env_residue_specs if get_residue_name(imol_first, r) != "HOH"] # only lsq the first (0th) one - that one has the most ligands in the site # if site_number == 0: print "DEBUG:: protein_res_specs (for lsqing):" for spec in protein_res_specs: print " ", spec, get_residue_name(imol_first, spec) for res_spec in protein_res_specs: chain_id = rsu.residue_spec_to_chain_id(res_spec) res_no = rsu.residue_spec_to_res_no(res_spec) coot.add_lsq_match(res_no, res_no, chain_id, res_no, res_no, chain_id, 1) for imol in imol_ligand_specs[1:]: # lsq fit others to the first in the list coot.apply_lsq_matches_py(imol_first, imol[0]) ligand_centre = coot.residue_centre_py( imol_first, rsu.residue_spec_to_chain_id(first_ligand_spec), rsu.residue_spec_to_res_no(first_ligand_spec), "", ) coot.set_go_to_atom_molecule(imol_first) coot.set_rotation_centre(*ligand_centre) combo_list = [] try: # we have a large radius for the water selection radius = 10 # water must be within radius of it's own ligand radius_2 = 5 # water must be with radius_2 of any ligand atom (not just its own) combo_list = coot.chemical_feature_clusters_py(env_residue_specs, imol_ligand_specs, radius, radius_2) except TypeError as e: print e # the rest is unlikely to work if we get here if True: water_position_list = combo_list[0] chemical_feature_list = combo_list[1] # residues_sidechains_list = combo_list[1] # ----------- handle waters ----------- w_positions_list = [] for item in [wat[2] for wat in water_position_list]: w_positions_list.append(item) for item in [wat[2] for wat in water_position_list]: delta = 0.1 p1 = [item[0], item[1], item[2] + delta] p2 = [item[0], item[1], item[2] - delta] p3 = [item[0], item[1] + delta, item[2]] p4 = [item[0], item[1] - delta, item[2]] p5 = [item[0] + delta, item[1], item[2]] p6 = [item[0] - delta, item[1], item[2]] w_positions_list.append(p1) w_positions_list.append(p2) w_positions_list.append(p3) w_positions_list.append(p4) w_positions_list.append(p5) w_positions_list.append(p6) w_positions_np = np.array(w_positions_list) # move these to the origin # w_positions_np = w_positions_np_at_ligand # for pos in w_positions_np: # pos -= np.array(ligand_centre) # dpgmm = mixture.DPGMM(n_components=25, covariance_type='spherical', alpha=1.101, # n_iter=40000, params='wmc', init_params='wmc', tol=1e-4, # verbose=0) # # the number of clusters is highly related to the dist_cutoff (the # distance of an accepted water atom to any any atom in any of the # ligands = currently 4.2) # gmm, cluster_assignments = cluster_and_display_waters(site_number, w_positions_np) means = gmm.means_ cvs = gmm._get_covars() weights = gmm.weights_ print "water means:" for mean in means: print " ", mean # each water has been assigned a cluster, that is the cluster_assignments # # need to convert the array cluster_assignments to a list of items: # [imol water_residue_spec cluster_number] # water_cluster_info_for_input = [] for i, water_pos in enumerate(water_position_list): # print water_pos, cluster_assignments[i] item = [water_pos[0], water_pos[1], cluster_assignments[i]] water_cluster_info_for_input.append(item) # cluster_info is a list of # list of water cluster info # list of [mean, weight, length] where length is the eigenvalue v[0], # (same as v[1], v[2] - all the same for spherical model) # list of cluster predictions for then input positions # ci = zip([[l[0], l[1], l[2]] for l in means], weights, [cv[0][0] for cv in cvs]) water_cluster_info = [ci, water_cluster_info_for_input] # give those results back to c++ so that we can use them for display # coot.set_display_generic_objects_as_solid(1) # ----------- handle chemical features ----------- # make a dictionary from the list of chemical features chemical_features_dict = {} for item in chemical_feature_list: for type in ["Donor", "Acceptor", "Aromatic", "Hydrophobe", "LumpedHydrophobe"]: if item[0] == type: try: chemical_features_dict[type].append(item[1:]) except KeyError: chemical_features_dict[type] = [item[1:]] chemical_feature_clusters_info = [] for key in chemical_features_dict: # list of [type, features-annotated-by-cluster-number, cluster_means] clusters = cluster_and_display_chemical_features(site_number, key, chemical_features_dict[key]) chemical_feature_clusters_info.append(clusters) # print 'water_cluster_info' # for wc in water_cluster_info: # print wc cluster_info = [water_cluster_info, chemical_feature_clusters_info] coot.chemical_feature_clusters_accept_info_py(site_number, protein_res_specs, imol_ligand_specs, cluster_info)
def find_the_sites(self, file_name_comp_id_list): # main line # coords_with_spec = [] for fn_comp_id in file_name_comp_id_list: fn = fn_comp_id[0] comp_id = fn_comp_id[1] imol = coot.handle_read_draw_molecule_with_recentre( fn_comp_id[0], 0) # what are the residue specs for the given comp_ids? residue_specs = coot.get_residue_specs_in_mol_py(imol, comp_id) print(fn, residue_specs) for spec in residue_specs: # centre = residue_centre_from_spec_py(imol, spec) chain_id = rsu.residue_spec_to_chain_id(spec) res_no = rsu.residue_spec_to_res_no(spec) ins_code = '' res_info = coot.residue_info_py(imol, chain_id, res_no, ins_code) for atom in res_info: coords_with_spec.append( [rsu.residue_atom_to_position(atom), imol, spec]) # print coords_with_spec # now cluster coords. There will be 1 (usually), maybe 2 possibly 3 sites if len(coords_with_spec) < 3: return False else: coords = [x[0] for x in coords_with_spec] positions_np = np.array(coords) n_components = self.optimize_n(positions_np, len(positions_np)) print("optimize_n for sites::::::::::::", n_components) dpgmm = mixture.GMM(n_components, covariance_type='full', n_iter=40) dpgmm.fit(positions_np) cluster_assignments = dpgmm.predict(positions_np) means = dpgmm.means_ weights = dpgmm.weights_ print(cluster_assignments) print(means) print(weights) print("cluster_assignments", cluster_assignments) merge_map = self.find_mergeable_clusters(means, weights) # which key (i.e. cluster index) has the most number of other clusters # that can be merged in? # # convert to a list of ints (not <type 'numpy.int64'>) (because, on decoding Python->C++ object # we do a PyInt_Check for the site_idx (and a <type 'numpy.int64'> fails that test) # new_cluster_assignments = [ int(x) for x in self.merge_clusters(cluster_assignments, merge_map) ] print("new cluster_assignments", new_cluster_assignments) specs = [x[1:] for x in coords_with_spec] cluster_assignments_with_specs = zip(new_cluster_assignments, specs) sites = coot.chemical_feature_clusters_accept_site_clusters_info_py( cluster_assignments_with_specs) # show me them if True: # debug o = coot.new_generic_object_number("site clusters") for mean in means: cluster_star_obj(o, mean, 2, 2) # coot.set_display_generic_object(o, 1) this is for debugging self.sites = sites
def cfc_process_site(site_number, imol_ligand_specs, imol_first, first_ligand_spec): print("debug:: in cfc_process_site with imol_ligand_specs", imol_ligand_specs) print("debug:: in cfc_process_site with non-first imol_ligand_specs", imol_ligand_specs[1:]) # print("calling residues_near_residue_py", imol_first, first_ligand_spec) env_residue_specs = coot.residues_near_residue_py(imol_first, first_ligand_spec, 6) # print("env_residue_specs", env_residue_specs) protein_res_specs = [ r for r in env_residue_specs if get_residue_name(imol_first, r) != "HOH" ] # only lsq the first (0th) one - that one has the most ligands in the site # if site_number == 0: # print("protein_res_specs (for lsqing):") # for spec in protein_res_specs: # print(" ", spec, get_residue_name(imol_first, spec)) for res_spec in protein_res_specs: chain_id = rsu.residue_spec_to_chain_id(res_spec) res_no = rsu.residue_spec_to_res_no(res_spec) coot.add_lsq_match(res_no, res_no, chain_id, res_no, res_no, chain_id, 1) for imol_and_spec in imol_ligand_specs[ 1:]: # lsq fit others to the first in the list print('============================ lsq-match ', imol_first, imol_and_spec, imol_and_spec[0]) imol, spec = imol_and_spec # coot.apply_lsq_matches_py(imol_first, imol_and_spec[0]) coot.apply_lsq_matches_py(imol_first, imol) make_ball_and_stick_by_spec(imol, spec) # pass print("Here with first_ligand_spec:", first_ligand_spec) ligand_centre = coot.residue_centre_py( imol_first, rsu.residue_spec_to_chain_id(first_ligand_spec), rsu.residue_spec_to_res_no(first_ligand_spec), '') coot.set_go_to_atom_molecule(imol_first) coot.set_rotation_centre(*ligand_centre) combo_list = [] try: # we have a large radius for the water selection radius = 10 # water must be within radius of it's own ligand radius_2 = 5 # water must be with radius_2 of any ligand atom (not just its own) combo_list = coot.chemical_feature_clusters_py(env_residue_specs, imol_ligand_specs, radius, radius_2) except TypeError as e: print(e) # the rest is unlikely to work if we get here if True: water_position_list = combo_list[0] chemical_feature_list = combo_list[1] # residues_sidechains_list = combo_list[1] # ----------- handle waters ----------- w_positions_list = [] for item in [wat[2] for wat in water_position_list]: w_positions_list.append(item) for item in [wat[2] for wat in water_position_list]: delta = 0.1 p1 = [item[0], item[1], item[2] + delta] p2 = [item[0], item[1], item[2] - delta] p3 = [item[0], item[1] + delta, item[2]] p4 = [item[0], item[1] - delta, item[2]] p5 = [item[0] + delta, item[1], item[2]] p6 = [item[0] - delta, item[1], item[2]] w_positions_list.append(p1) w_positions_list.append(p2) w_positions_list.append(p3) w_positions_list.append(p4) w_positions_list.append(p5) w_positions_list.append(p6) w_positions_np = np.array(w_positions_list) # move these to the origin # w_positions_np = w_positions_np_at_ligand # for pos in w_positions_np: # pos -= np.array(ligand_centre) # dpgmm = mixture.DPGMM(n_components=25, covariance_type='spherical', alpha=1.101, # n_iter=40000, params='wmc', init_params='wmc', tol=1e-4, # verbose=0) # # the number of clusters is highly related to the dist_cutoff (the # distance of an accepted water atom to any any atom in any of the # ligands = currently 4.2) # gmm, cluster_assignments = cluster_and_display_waters( site_number, w_positions_np) means = gmm.means_ cvs = gmm._get_covars() weights = gmm.weights_ print("water means:") for mean in means: print(" ", mean) # each water has been assigned a cluster, that is the cluster_assignments # # need to convert the array cluster_assignments to a list of items: # [imol water_residue_spec cluster_number] # water_cluster_info_for_input = [] for i, water_pos in enumerate(water_position_list): # print water_pos, cluster_assignments[i] item = [water_pos[0], water_pos[1], cluster_assignments[i]] water_cluster_info_for_input.append(item) # cluster_info is a list of # list of water cluster info # list of [mean, weight, length] where length is the eigenvalue v[0], # (same as v[1], v[2] - all the same for spherical model) # list of cluster predictions for then input positions # ci = list( zip([[l[0], l[1], l[2]] for l in means], weights, [cv[0][0] for cv in cvs])) water_cluster_info = [ci, water_cluster_info_for_input] # give those results back to c++ so that we can use them for display # coot.set_display_generic_objects_as_solid(1) # ----------- handle chemical features ----------- # make a dictionary from the list of chemical features chemical_features_dict = {} for item in chemical_feature_list: for type in [ 'Donor', 'Acceptor', 'Aromatic', 'Hydrophobe', 'LumpedHydrophobe' ]: if item[0] == type: try: chemical_features_dict[type].append(item[1:]) except KeyError: chemical_features_dict[type] = [item[1:]] chemical_feature_clusters_info = [] for key in chemical_features_dict: # list of [type, features-annotated-by-cluster-number, cluster_means] clusters = cluster_and_display_chemical_features( site_number, key, chemical_features_dict[key]) chemical_feature_clusters_info.append(clusters) # print 'water_cluster_info' # for wc in water_cluster_info: # print wc cluster_info = [water_cluster_info, chemical_feature_clusters_info] coot.chemical_feature_clusters_accept_info_py(site_number, protein_res_specs, imol_ligand_specs, cluster_info)