def __drawOrigCoords(origCoords): """Display the original pre-rotamerize coordinates as a generic object ARGUMENTS: origCoords - a chain object containing the atoms to be drawn RETURNS: drawNum - the generic object number EFFECTS: creates and displays a new Coot generic object """ #create a new generic display object drawNum = new_generic_object_number("Pre-rotamerize coordinates") set_display_generic_object(drawNum, 1) lineList = [] #a list of all the bonds to be drawn #draw bonds for each nucleotide prevO3 = None for curNuc in origCoords.nucs: #draw each bond if both atoms are present for (atom1, atom2) in (BOND_LIST_FULL["backbone"] + BOND_LIST_FULL[curNuc.type]): if curNuc.hasAtom(atom1) and curNuc.hasAtom(atom2): atom1Coords = curNuc.atoms[atom1] atom2Coords = curNuc.atoms[atom2] lineList.append(atom1Coords + atom2Coords) #draw a bond between O5' and the previous phosphate if both atoms are present if prevO3 is not None and curNuc.hasAtom("P"): lineList.append(prevO3 + curNuc.atoms["P"]) #update prevPhos if curNuc.hasAtom("O3'"): prevO3 = curNuc.atoms["O3'"] else: prevO3 = None #actually draw the bonds for curLine in lineList: to_generic_object_add_line(drawNum, ORIG_COORDS_COLOR, 6, *curLine) graphics_draw() return drawNum
def cluster_and_display_waters(site_number, w_positions_np): def optimize_n(positions_np, n_data): bic = {} for n in [x + 1 for x in range(20)]: if n < len(positions_np): gmm = mixture.GMM(n_components=n, covariance_type="spherical", n_iter=20) gmm.fit(positions_np) score = sum(gmm.score(positions_np)) lambda_c = 15 # 3 too few bic_l = score - lambda_c * 0.5 * math.log(n_data) * n bic[n] = bic_l for key in bic: print " water bic", key, bic[key] key, value = max(bic.iteritems(), key=lambda x: x[1]) return key n_components = optimize_n(w_positions_np, len(w_positions_np)) print "optimize_n for water:::::::::::::", n_components dpgmm = mixture.GMM(n_components, covariance_type="spherical", n_iter=40) dpgmm.fit(w_positions_np) cluster_assignments = dpgmm.predict(w_positions_np) color_list = [ "green", "greentint", "sea", "yellow", "yellowtint", "aquamarine", "forestgreen", "goldenrod", "orangered", "orange", "cyan", "red", "blue", ] color_list.extend(color_list) color_list.extend(color_list) color_list.extend(color_list) color_list.extend(color_list) color_list.extend(color_list) color_list.extend(color_list) color_list.extend(color_list) color_list.extend(color_list) color_list.extend(color_list) color_list.extend(color_list) color_list.extend(color_list) means = dpgmm.means_ cvs = dpgmm._get_covars() weights = dpgmm.weights_ obj = coot.new_generic_object_number("CFC Site " + str(site_number) + " selected waters") for i, pos in enumerate(w_positions_np): mean = means[cluster_assignments[i]] # reject spheres at the origin - (from DPGMM strangeness) d = mean[0] * mean[0] + mean[1] * mean[1] + mean[2] * mean[2] if d > 1.0: col = color_list[cluster_assignments[i]] coot.to_generic_object_add_point(obj, col, 10, pos[0], pos[1], pos[2]) else: print "reject prediction", i, "for cluster", cluster_assignments[i] # set_display_generic_object(obj, 1) obj = coot.new_generic_object_number("CFC Site " + str(site_number) + " water cluster means") for i, cv in enumerate(cvs): mean = means[i] d = mean[0] * mean[0] + mean[1] * mean[1] + mean[2] * mean[2] v, w = linalg.eigh(cv) # print "mean ", mean # print "weight", weights[i], "prec", precs[i] # print "weight", weights[i] # print "v", v if d > 1.0: pos = mean thick = 2 cluster_star_obj(obj, pos, thick, v[0]) else: print "reject", mean, v coot.set_display_generic_object(obj, 1) cluster_assignments_as_list = [int(x) for x in cluster_assignments] return (dpgmm, cluster_assignments_as_list)
def cluster_and_display_chemical_features(site_number, type, chemical_features_list): def optimize_n(type, positions_np, n_data): print "cluster_and_display_chemical_features.optimize_n called " "with n_data = ", n_data bic = {} for n in [x + 1 for x in range(10)]: if n < n_data: gmm = mixture.GMM(n_components=n, covariance_type="spherical", n_iter=20) gmm.fit(positions_np) score = sum(gmm.score(positions_np)) lambda_c = 15 if type == "Aromatic": lambda_c = 20 bic_l = score - lambda_c * 0.5 * math.log(n_data) * n bic[n] = bic_l if len(bic) > 1: key, value = max(bic.iteritems(), key=lambda x: x[1]) return key else: return 1 def analyse_bic(type, positions_np, n_data): for n in [x + 1 for x in range(14)]: gmm = mixture.GMM(n_components=n, covariance_type="spherical", n_iter=20) gmm.fit(positions_np) score = sum(gmm.score(positions_np)) lambda_c = 3 if type == "Aromatic": lambda_c = 3000 bic = score - lambda_c * 0.5 * n_data * n print type, len(positions_np), n, "converged?", gmm.converged_, "score:", score, "bic", bic def get_cfc_col(type): if type == "Donor": return "blue" if type == "Acceptor": return "red" if type == "Hydrophobe": return "yellow" if type == "Aromatic": return "orange" return "grey" # --- main line ---- # no fake points # positions_np = np.array([item[0] for item in chemical_features_list]) ext_chemical_features_list = [item[0] for item in chemical_features_list] for item_b in chemical_features_list: delta = 0.25 item = item_b[0] p1 = [item[0], item[1], item[2] + delta] p2 = [item[0], item[1], item[2] - delta] p3 = [item[0], item[1] + delta, item[2]] p4 = [item[0], item[1] - delta, item[2]] p5 = [item[0] + delta, item[1], item[2]] p6 = [item[0] - delta, item[1], item[2]] ext_chemical_features_list.append(p1) ext_chemical_features_list.append(p2) ext_chemical_features_list.append(p3) ext_chemical_features_list.append(p4) ext_chemical_features_list.append(p5) ext_chemical_features_list.append(p6) positions_np = np.array(ext_chemical_features_list) # analyse_bic(type, positions_np, len(chemical_features_list)) n_data = len(chemical_features_list) n = 1 if n_data > 1: n = optimize_n(type, positions_np, n_data) if n <= len(chemical_features_list): gmm = mixture.GMM(n_components=n, covariance_type="spherical", n_iter=20) gmm.fit(positions_np) print type, len(positions_np), n, "converged? ", gmm.converged_, "score:", sum(gmm.score(positions_np)) cluster_assignments = gmm.predict(positions_np) features = [] for i, cf in enumerate(chemical_features_list): # print " ", cf, cluster_assignments[i] features.append([cf, int(cluster_assignments[i])]) means = gmm.means_ means_as_list = [[x[0], x[1], x[2]] for x in means] obj_name = "CFC Site " + str(site_number) + " " + type + " pharmacophore-clusters" cfc_obj = coot.new_generic_object_number(obj_name) cfc_col = get_cfc_col(type) for mean in means_as_list: # coot.to_generic_object_add_dodecahedron(cfc_obj, cfc_col, 0.2, mean[0], mean[1], mean[2]) coot.to_generic_object_add_pentakis_dodecahedron(cfc_obj, cfc_col, 2.3, 0.1, mean[0], mean[1], mean[2]) coot.set_display_generic_object(cfc_obj, 1) return [type, features, means_as_list] # oops too many parameters for the model return False
def cluster_and_display_waters(site_number, w_positions_np): def optimize_n(positions_np, n_data): bic = {} for n in [x + 1 for x in range(20)]: if n < len(positions_np): gmm = mixture.GMM(n_components=n, covariance_type='spherical', n_iter=20) gmm.fit(positions_np) score = sum(gmm.score(positions_np)) lambda_c = 15 # 3 too few bic_l = score - lambda_c * 0.5 * math.log(n_data) * n bic[n] = bic_l for key in bic: print(" water bic", key, bic[key]) key, value = max(iter(bic.items()), key=lambda x: x[1]) return key n_components = optimize_n(w_positions_np, len(w_positions_np)) print("optimize_n for water:::::::::::::", n_components) dpgmm = mixture.GMM(n_components, covariance_type='spherical', n_iter=40) dpgmm.fit(w_positions_np) cluster_assignments = dpgmm.predict(w_positions_np) color_list = [ 'green', 'greentint', "sea", 'yellow', "yellowtint", "aquamarine", "forestgreen", "goldenrod", "orangered", "orange", "cyan", 'red', "blue" ] color_list.extend(color_list) color_list.extend(color_list) color_list.extend(color_list) color_list.extend(color_list) color_list.extend(color_list) color_list.extend(color_list) color_list.extend(color_list) color_list.extend(color_list) color_list.extend(color_list) color_list.extend(color_list) color_list.extend(color_list) means = dpgmm.means_ cvs = dpgmm._get_covars() weights = dpgmm.weights_ obj = coot.new_generic_object_number("CFC Site " + str(site_number) + " selected waters") for i, pos in enumerate(w_positions_np): mean = means[cluster_assignments[i]] # reject spheres at the origin - (from DPGMM strangeness) d = mean[0] * mean[0] + mean[1] * mean[1] + mean[2] * mean[2] if d > 1.0: col = color_list[cluster_assignments[i]] coot.to_generic_object_add_point(obj, col, 10, pos[0], pos[1], pos[2]) else: print("reject prediction", i, "for cluster", cluster_assignments[i]) # set_display_generic_object(obj, 1) obj = coot.new_generic_object_number("CFC Site " + str(site_number) + " water cluster means") for i, cv in enumerate(cvs): mean = means[i] d = mean[0] * mean[0] + mean[1] * mean[1] + mean[2] * mean[2] v, w = linalg.eigh(cv) # print "mean ", mean # print "weight", weights[i], "prec", precs[i] # print "weight", weights[i] # print "v", v if d > 1.0: pos = mean thick = 2 cluster_star_obj(obj, pos, thick, v[0]) else: print("reject", mean, v) coot.set_display_generic_object(obj, 1) cluster_assignments_as_list = [int(x) for x in cluster_assignments] return (dpgmm, cluster_assignments_as_list)
def cluster_and_display_chemical_features(site_number, type, chemical_features_list): def optimize_n(type, positions_np, n_data): print("cluster_and_display_chemical_features.optimize_n called " \ "with n_data = ", n_data) bic = {} for n in [x + 1 for x in range(10)]: if n < n_data: gmm = mixture.GMM(n_components=n, covariance_type='spherical', n_iter=20) gmm.fit(positions_np) score = sum(gmm.score(positions_np)) lambda_c = 15 if type == 'Aromatic': lambda_c = 20 bic_l = score - lambda_c * 0.5 * math.log(n_data) * n bic[n] = bic_l if len(bic) > 1: key, value = max(iter(bic.items()), key=lambda x: x[1]) return key else: return 1 def analyse_bic(type, positions_np, n_data): for n in [x + 1 for x in range(14)]: gmm = mixture.GMM(n_components=n, covariance_type='spherical', n_iter=20) gmm.fit(positions_np) score = sum(gmm.score(positions_np)) lambda_c = 3 if type == 'Aromatic': lambda_c = 3000 bic = score - lambda_c * 0.5 * n_data * n print(type, len(positions_np), n, "converged?", gmm.converged_, "score:", score, "bic", bic) def get_cfc_col(type): if type == "Donor": return "blue" if type == "Acceptor": return "red" if type == "Hydrophobe": return "yellow" if type == "Aromatic": return "orange" return "grey" # --- main line ---- # no fake points # positions_np = np.array([item[0] for item in chemical_features_list]) ext_chemical_features_list = [item[0] for item in chemical_features_list] for item_b in chemical_features_list: delta = 0.25 item = item_b[0] p1 = [item[0], item[1], item[2] + delta] p2 = [item[0], item[1], item[2] - delta] p3 = [item[0], item[1] + delta, item[2]] p4 = [item[0], item[1] - delta, item[2]] p5 = [item[0] + delta, item[1], item[2]] p6 = [item[0] - delta, item[1], item[2]] ext_chemical_features_list.append(p1) ext_chemical_features_list.append(p2) ext_chemical_features_list.append(p3) ext_chemical_features_list.append(p4) ext_chemical_features_list.append(p5) ext_chemical_features_list.append(p6) positions_np = np.array(ext_chemical_features_list) # analyse_bic(type, positions_np, len(chemical_features_list)) n_data = len(chemical_features_list) n = 1 if n_data > 1: n = optimize_n(type, positions_np, n_data) if n <= len(chemical_features_list): gmm = mixture.GMM(n_components=n, covariance_type='spherical', n_iter=20) gmm.fit(positions_np) print(type, len(positions_np), n, "converged? ", gmm.converged_, "score:", sum(gmm.score(positions_np))) cluster_assignments = gmm.predict(positions_np) features = [] for i, cf in enumerate(chemical_features_list): # print " ", cf, cluster_assignments[i] features.append([cf, int(cluster_assignments[i])]) means = gmm.means_ means_as_list = [[x[0], x[1], x[2]] for x in means] obj_name = "CFC Site " + str( site_number) + " " + type + " pharmacophore-clusters" cfc_obj = coot.new_generic_object_number(obj_name) cfc_col = get_cfc_col(type) for mean in means_as_list: # coot.to_generic_object_add_dodecahedron(cfc_obj, cfc_col, 0.2, mean[0], mean[1], mean[2]) coot.to_generic_object_add_pentakis_dodecahedron( cfc_obj, cfc_col, 2.3, 0.1, mean[0], mean[1], mean[2]) coot.set_display_generic_object(cfc_obj, 1) return [type, features, means_as_list] # oops too many parameters for the model return False