def get_best_clustering(results_file): """ Loads and returns the best clustering from a results file. """ results = convert_to_utf8(json.loads(open(results_file).read())) best_clustering_id = results["best_clustering"] best_clustering_dic = results["selected"][best_clustering_id] return Clustering.from_dic(best_clustering_dic["clustering"])
def get_best_clustering(results_file): """ Loads and returns the best clustering from a results file. """ results = convert_to_utf8(json.loads(open(results_file).read())) best_clustering_id =results["best_clustering"] best_clustering_dic = results["selected"][best_clustering_id] return Clustering.from_dic(best_clustering_dic["clustering"])
def load_clustering(self, parameters): best_clustering = { "clustering": Clustering.from_dic( parameters["clustering"]["generation"]["parameters"]) } return ("loaded_clustering", { "loaded_clustering": best_clustering }, {}, None)
def testSilhouetteSpecialCase(self): clustering = Clustering.from_dic(data.clustering_01) mh = MatrixHandler({ "method": "load", "parameters":{ "path": "data/example_clustering_1_matrix" } } ) s = SilhouetteCoefficientCalculator() matrix = mh.create_matrix(None) print s.evaluate(clustering, matrix)
def get_best_clustering(self, parameters): best_clustering = None ############################## # Do the actual clustering ############################## clustering_results = None ############################## # Load the clustering ############################## if parameters["clustering"]["generation"]["method"] == "load": best_clustering = {"clustering":Clustering.from_dic(parameters["clustering"]["generation"])} ############################## # Or generate it ############################## elif parameters["clustering"]["generation"]["method"] == "generate": clustering_results = ClusteringProtocol(self.timer, self.observer).run(parameters, self.matrixHandler, self.workspaceHandler, self.trajectoryHandler) best_clustering = None abort = False if clustering_results != None: best_clustering_id, selected, not_selected, scores = clustering_results # @UnusedVariable ################################# # Abort if no clusters were found ################################# if best_clustering_id is None: abort = True best_clustering = selected[best_clustering_id] else: abort = True if abort: self.notify("SHUTDOWN", "Improductive clustering search. Relax evaluation constraints.") print "[FATAL Driver:get_best_clustering] Improductive clustering search. Exiting..." exit() return best_clustering, clustering_results
def load_clustering(self, parameters): best_clustering = {"clustering":Clustering.from_dic(parameters["clustering"]["generation"]["parameters"])} return ( "loaded_clustering", {"loaded_clustering":best_clustering}, {}, None)
"minimum_clusters"], parameters["clustering"]["evaluation"][ "maximum_clusters"] if dataset_name in data.criteria: parameters["clustering"]["evaluation"][ "evaluation_criteria"] = data.criteria[dataset_name] else: parameters["clustering"]["evaluation"][ "evaluation_criteria"] = data.criteria["default"] Driver(Observer()).run(parameters) for dataset_name in ['concentric_circles']: #data.all_datasets: results_file = os.path.join(os.path.abspath("./tmp/%s" % dataset_name), "results/results.json") results = convert_to_utf8(json.loads(open(results_file).read())) best = results["best_clustering"] clustering = Clustering.from_dic( results["selected"][best]["clustering"]) vtools.show_2D_dataset_clusters( all_observations[dataset_name], clustering, scale=20, margin=20).save("clustering_images/%s.jpg" % dataset_name, "JPEG") print dataset_name, results["selected"][best]["type"], results[ "selected"][best]["clustering"]["number_of_clusters"], results[ "selected"][best]["evaluation"][ "Noise level"], #results["selected"][best]["parameters"] # look for the best criteria criteria_scores = [] for criteria in results["scores"]: criteria_scores.append( (results["scores"][criteria][best], criteria)) print criteria_scores print "\nDone"
parameters = ProtocolParameters.get_params_from_json(script_str) # And change another hypothesis stuff parameters["clustering"]["evaluation"]["maximum_noise"] = data.noise[dataset_name] parameters["clustering"]["evaluation"]["minimum_cluster_size"] = data.minsize[dataset_name] parameters["clustering"]["evaluation"]["minimum_clusters"] = data.num_cluster_ranges[dataset_name][0] parameters["clustering"]["evaluation"]["maximum_clusters"] = data.num_cluster_ranges[dataset_name][1] print parameters["clustering"]["evaluation"]["minimum_clusters"], parameters["clustering"]["evaluation"]["maximum_clusters"] if dataset_name in data.criteria: parameters["clustering"]["evaluation"]["evaluation_criteria"] = data.criteria[dataset_name] else: parameters["clustering"]["evaluation"]["evaluation_criteria"] = data.criteria["default"] Driver(Observer()).run(parameters) for dataset_name in ['concentric_circles']: #data.all_datasets: results_file = os.path.join(os.path.abspath("./tmp/%s"%dataset_name),"results/results.json") results = convert_to_utf8(json.loads(open(results_file).read())) best = results["best_clustering"] clustering = Clustering.from_dic(results["selected"][best]["clustering"]) vtools.show_2D_dataset_clusters(all_observations[dataset_name], clustering, scale = 20, margin = 20).save("clustering_images/%s.jpg"%dataset_name, "JPEG") print dataset_name,results["selected"][best]["type"],results["selected"][best]["clustering"]["number_of_clusters"], results["selected"][best]["evaluation"]["Noise level"],#results["selected"][best]["parameters"] # look for the best criteria criteria_scores = [] for criteria in results["scores"]: criteria_scores.append((results["scores"][criteria][best],criteria)) print criteria_scores print "\nDone"
data[str(i)] = [] for j in range(0, N): if (i, j) in cluster.percents: data[str(i)].append(cluster.percents[(i, j)]) else: data[str(i)].append(0) return data if __name__ == '__main__': results = convert_to_utf8(json.loads(open(sys.argv[1]).read())) best_clustering_id = results["best_clustering"] best_clustering_dic = results["selected"][best_clustering_id] num_clusters = best_clustering_dic["clustering"]["number_of_clusters"] clustering = Clustering.from_dic(best_clustering_dic["clustering"]) file_frames = int(sys.argv[2]) # generate a map element -> interpolation index_to_interpolation = {} acc = 0 for i in range(0, file_frames - 1): for j in range(i + 1, file_frames): for k in range(20): index_to_interpolation[acc] = (i, j) acc += 1 for cluster in clustering.clusters: colors = iter(cm.rainbow(np.linspace(0, 1, N))) theta = radar_factory(N, frame='polygon')
fig = plt.figure() ax = fig.gca(projection='3d') # Plot protein pdb = prody.parsePDB(params["data"]["files"][0]) if options.show_protein: pdb_backbone = pdb.select("name CA").getCoordsets()[0] # "backbone not hetero" ax.plot(pdb_backbone.T[0], pdb_backbone.T[1], pdb_backbone.T[2]) # Get geometric centers and plot ligands ligand_coords = pdb.select(params["data"]["matrix"]["parameters"]["body_selection"]).getCoordsets() # Get clustering if options.clustering_to_see is None: options.clustering_to_see = results["best_clustering"] try: clustering = Clustering.from_dic(results["selected"][options.clustering_to_see]["clustering"]) # Print some info print_cluster_info("selected", options.clustering_to_see, results) except: clustering = Clustering.from_dic(results["not_selected"][options.clustering_to_see]["clustering"]) # Print some info print_cluster_info("not_selected", options.clustering_to_see, results) # Show all clusters colors = iter(cm.rainbow(numpy.linspace(0, 1, len(clustering.clusters)))) for cluster in clustering.clusters: centers = [] for i,element in enumerate(cluster.all_elements): if options.stride is None or i%options.stride == 0: coords = ligand_coords[element] centers.append(coords.mean(0))
#-------------------------------- # Prepare the clustering for this guy #-------------------------------- ## Load template and modify its contents for this case CLUSTERING_PATH = os.path.join(RESULTS_PATH,"%s_%s_clustering"%(options.drug, options.protein)) MAX_CLUSTERS = 10 SCRIPT_PATH = os.path.join(RESULTS_PATH,"clustering.json") OUT_FILE = os.path.join(RESULTS_PATH, "clustering.out") script = load_dic_in_json(options.template) script["global"]["workspace"]["base"] = CLUSTERING_PATH script["data"]["files"].append(FILTERED_PDB_FILE) script["clustering"]["evaluation"]["maximum_clusters"] = MAX_CLUSTERS save_dic_in_json(script, SCRIPT_PATH) os.system("python -m pyproct.main %s > %s"%(SCRIPT_PATH, OUT_FILE)) best_clustering = Clustering.from_dic(get_best_clustering(CLUSTERING_PATH)["clustering"]) #-------------------------------- # Now calculate the values #-------------------------------- results = {} for cluster in best_clustering.clusters: energies = metrics[1][cluster.all_elements] distances = metrics[0][cluster.all_elements] results[cluster.id] = {} results[cluster.id]["max_energy"] = numpy.max(energies) results[cluster.id]["min_energy"] = numpy.min(energies) results[cluster.id]["mean_energy"] = numpy.mean(energies) results[cluster.id]["mean_distance"] = numpy.mean(distances) results[cluster.id]["population"] = len(cluster.all_elements)
for j in range(0,N): if (i,j) in cluster.percents: data[str(i)].append( cluster.percents[(i,j)]) else: data[str(i)].append(0) return data if __name__ == '__main__': results = convert_to_utf8(json.loads(open(sys.argv[1]).read())) best_clustering_id =results["best_clustering"] best_clustering_dic = results["selected"][best_clustering_id] num_clusters = best_clustering_dic["clustering"]["number_of_clusters"] clustering = Clustering.from_dic(best_clustering_dic["clustering"]) file_frames = int(sys.argv[2]) # generate a map element -> interpolation index_to_interpolation = {} acc = 0 for i in range(0, file_frames-1): for j in range(i+1, file_frames): for k in range(20): index_to_interpolation[acc] = (i,j) acc += 1 for cluster in clustering.clusters: colors = iter(cm.rainbow(np.linspace(0, 1, N))) theta = radar_factory(N, frame='polygon')
# # RCD_script = copy.deepcopy(template_script) # RCD_script["global"]["workspace"]["base"] = os.path.join("RDCvsRMSD", "campari", "RDC", "clustering") # RCD_script["data"]["matrix"]["method"] = "load" # RCD_script["data"]["matrix"]["parameters"]["path"] = os.path.join("RDCvsRMSD", "campari", "RDC", "matrix") # RCD_script["data"]["files"].append(os.path.join("RDCvsRMSD", "campari.pdb")) # # tools.save_dic_in_json(RCD_script, os.path.join("RDCvsRMSD", "campari", "RDC", "script.json")) # tools.save_dic_in_json(RMSD_script, os.path.join("RDCvsRMSD", "campari", "RMSD", "script.json")) # # os.system("python %s %s "%(PYPROCT, os.path.join("RDCvsRMSD", "campari", "RDC", "script.json"))) # os.system("python %s %s "%(PYPROCT, os.path.join("RDCvsRMSD", "campari", "RMSD", "script.json"))) results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "RDC_refined", "clustering","results","results.json")) RDC_clustering = Clustering.from_dic(results["selected"][results["best_clustering"]]["clustering"]).gen_class_list(number_of_elements = 5926) results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "RMSD_refined", "clustering","results","results.json")) RMSD_clustering = Clustering.from_dic(results["selected"][results["best_clustering"]]["clustering"]).gen_class_list(number_of_elements = 5926) results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "Dihedral", "clustering","results","results.json")) Dihedral_clustering = Clustering.from_dic(results["selected"][results["best_clustering"]]["clustering"]).gen_class_list(number_of_elements = 5926) results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "Dihedral", "clustering","results","results.json")) Dihedral_bad_score = Clustering.from_dic(results["selected"]["clustering_0098"]["clustering"]).gen_class_list(number_of_elements = 5926) results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "Dihedral", "clustering","results","results.json")) Dihedral_medium_score = Clustering.from_dic(results["selected"]["clustering_0056"]["clustering"]).gen_class_list(number_of_elements = 5926) results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "Dihedral", "clustering","results","results.json")) Dihedral_fairly_good_score = Clustering.from_dic(results["selected"]["clustering_0212"]["clustering"]).gen_class_list(number_of_elements = 5926)
# Plot protein pdb = prody.parsePDB(params["data"]["files"][0]) if options.show_protein: pdb_backbone = pdb.select("name CA").getCoordsets()[ 0] # "backbone not hetero" ax.plot(pdb_backbone.T[0], pdb_backbone.T[1], pdb_backbone.T[2]) # Get geometric centers and plot ligands ligand_coords = pdb.select(params["data"]["matrix"]["parameters"] ["body_selection"]).getCoordsets() # Get clustering if options.clustering_to_see is None: options.clustering_to_see = results["best_clustering"] try: clustering = Clustering.from_dic( results["selected"][options.clustering_to_see]["clustering"]) # Print some info print_cluster_info("selected", options.clustering_to_see, results) except: clustering = Clustering.from_dic( results["not_selected"][options.clustering_to_see]["clustering"]) # Print some info print_cluster_info("not_selected", options.clustering_to_see, results) # Show all clusters colors = iter(cm.rainbow(numpy.linspace(0, 1, len(clustering.clusters)))) for cluster in clustering.clusters: centers = [] for i, element in enumerate(cluster.all_elements): if options.stride is None or i % options.stride == 0: coords = ligand_coords[element]