def process_results(this_folders): results_matrix = {} #fig = plt.figure(figsize=(18, 18)) # Plot by temperature for folder in this_folders: subfolder_results = [] for subfolder in subfolders: results_file = os.path.join(folder, subfolder, "results", "results.json") if os.path.exists(results_file): results = load_dic_in_json(results_file) print results_file, results["selected"][results["best_clustering"]]["clustering"]["number_of_clusters"],\ results["selected"][results["best_clustering"]]["evaluation"]["Noise level"] subfolder_results.append(results["selected"][results["best_clustering"]]["clustering"]["number_of_clusters"]) else: subfolder_results.append(0.) results_matrix[folder] = subfolder_results plt.plot(range(len(subfolders)), subfolder_results, linewidth=2, label = folder.split("/")[2]) plt.legend(loc=2,prop={'size':6}) plt.show() ## Now plot by trajectory length for i in range(len(subfolders)): plot_by_size = [] for folder in this_folders: plot_by_size.append(results_matrix[folder][i]) plt.plot(range(len(this_folders)), plot_by_size, linewidth=2,label = subfolders[i]) plt.legend(prop={'size':6}) plt.show()
def process_campari_vs_profasi(campari, profasi): data = [] for i in range(len(campari)): A_folder = campari[i] B_folder = profasi[i] results_file =os.path.join("comparisons","campari_vs_profasi", "%svs%s"%(A_folder, B_folder), "results", "conf_space_comp.json") if os.path.exists(results_file): data.append(load_dic_in_json(results_file)["overlap"]) else: data.append(0.) print results_file, "not found" plt.plot(range(len(data)), data, linewidth=2) plt.show()
def process_matrix(folders, image_path, sim_type): data = [] for i in range(0,len(folders)-1): A_folder = folders[i] for j in range(i+1,len(campari_folders)): B_folder = folders[j] results_file = os.path.join("comparisons",sim_type, "%svs%s"%(A_folder, B_folder), "results", "conf_space_comp.json") print results_file if os.path.exists(results_file): data.append(load_dic_in_json(results_file)["overlap"]) else: data.append(0.) print data matrixToImage(CondensedMatrix(data), image_path, diagonal_value=1.)
def process_matrix_stats(this_folders): mean = [] stddev = [] for folder in this_folders: subfolder = "9000" matrix_stats_file = os.path.join(folder, subfolder, "matrix", "statistics.json") if os.path.exists(matrix_stats_file): stats = load_dic_in_json(matrix_stats_file) mean.append(stats["Mean"]) stddev.append(stats["Std. Dev."]) else: mean.append(0) stddev.append(0) plt.errorbar(range(len(mean)), mean, yerr = stddev, linewidth=2) plt.show()
""" Created on 27/03/2014 @author: victor """ import copy import os from tools import load_dic_in_json, create_dir, save_dic_in_json script_template = load_dic_in_json("template.json") campari_trajs = [ "trajectories/campari/N_000_.pdb", "trajectories/campari/N_001_.pdb", "trajectories/campari/N_002_.pdb", "trajectories/campari/N_003_.pdb", "trajectories/campari/N_004_.pdb", "trajectories/campari/N_005_.pdb", "trajectories/campari/N_006_.pdb", "trajectories/campari/N_007_.pdb", "trajectories/campari/N_008_.pdb", "trajectories/campari/N_009_.pdb", "trajectories/campari/N_010_.pdb", "trajectories/campari/N_011_.pdb", "trajectories/campari/N_012_.pdb", "trajectories/campari/N_013_.pdb", "trajectories/campari/N_014_.pdb", "trajectories/campari/N_015_.pdb" ]
selection = filterRecords("'L1 Binding Ene' < -226 and 'L1 Binding Ene' > -424 and 'L1(24.954.352.7)' < 14.1 and 'L1(24.954.352.7)' > 5.9", records) genSingleTrajFast(FILTERED_PDB_FILE, records, selection) genMetricsFile(METRICS_FILE, ["L1(24.954.352.7)","L1 Binding Ene"], selection) metrics = genMetrics(["L1(24.954.352.7)","L1 Binding Ene"], selection).T metrics = numpy.loadtxt(METRICS_FILE).T #-------------------------------- # Prepare the clustering for this guy #-------------------------------- ## Load template and modify its contents for this case CLUSTERING_PATH = os.path.join(RESULTS_PATH,"%s_%s_clustering"%(options.drug, options.protein)) MAX_CLUSTERS = 10 SCRIPT_PATH = os.path.join(RESULTS_PATH,"clustering.json") OUT_FILE = os.path.join(RESULTS_PATH, "clustering.out") script = load_dic_in_json(options.template) script["global"]["workspace"]["base"] = CLUSTERING_PATH script["data"]["files"].append(FILTERED_PDB_FILE) script["clustering"]["evaluation"]["maximum_clusters"] = MAX_CLUSTERS save_dic_in_json(script, SCRIPT_PATH) os.system("python -m pyproct.main %s > %s"%(SCRIPT_PATH, OUT_FILE)) best_clustering = Clustering.from_dic(get_best_clustering(CLUSTERING_PATH)["clustering"]) #-------------------------------- # Now calculate the values #-------------------------------- results = {} for cluster in best_clustering.clusters: energies = metrics[1][cluster.all_elements] distances = metrics[0][cluster.all_elements] results[cluster.id] = {}
# RMSD_script["data"]["files"].append(os.path.join("RDCvsRMSD", "campari.pdb")) # # RCD_script = copy.deepcopy(template_script) # RCD_script["global"]["workspace"]["base"] = os.path.join("RDCvsRMSD", "campari", "RDC", "clustering") # RCD_script["data"]["matrix"]["method"] = "load" # RCD_script["data"]["matrix"]["parameters"]["path"] = os.path.join("RDCvsRMSD", "campari", "RDC", "matrix") # RCD_script["data"]["files"].append(os.path.join("RDCvsRMSD", "campari.pdb")) # # tools.save_dic_in_json(RCD_script, os.path.join("RDCvsRMSD", "campari", "RDC", "script.json")) # tools.save_dic_in_json(RMSD_script, os.path.join("RDCvsRMSD", "campari", "RMSD", "script.json")) # # os.system("python %s %s "%(PYPROCT, os.path.join("RDCvsRMSD", "campari", "RDC", "script.json"))) # os.system("python %s %s "%(PYPROCT, os.path.join("RDCvsRMSD", "campari", "RMSD", "script.json"))) results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "RDC_refined", "clustering","results","results.json")) RDC_clustering = Clustering.from_dic(results["selected"][results["best_clustering"]]["clustering"]).gen_class_list(number_of_elements = 5926) results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "RMSD_refined", "clustering","results","results.json")) RMSD_clustering = Clustering.from_dic(results["selected"][results["best_clustering"]]["clustering"]).gen_class_list(number_of_elements = 5926) results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "Dihedral", "clustering","results","results.json")) Dihedral_clustering = Clustering.from_dic(results["selected"][results["best_clustering"]]["clustering"]).gen_class_list(number_of_elements = 5926) results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "Dihedral", "clustering","results","results.json")) Dihedral_bad_score = Clustering.from_dic(results["selected"]["clustering_0098"]["clustering"]).gen_class_list(number_of_elements = 5926) results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "Dihedral", "clustering","results","results.json")) Dihedral_medium_score = Clustering.from_dic(results["selected"]["clustering_0056"]["clustering"]).gen_class_list(number_of_elements = 5926) results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "Dihedral", "clustering","results","results.json"))
records = [] processFile(traj_pdb, records, True) all_metrics = genMetrics(plots["totale_spawning"], records) matrix_data = scipy.spatial.distance.pdist(normalize_metrics(all_metrics), 'euclidean') m_handler = MatrixHandler() m_handler.distance_matrix = CondensedMatrix(matrix_data) matrix_file = os.path.join(base_dir, TENERGY_SPAWN_MATRIX) m_handler.saveMatrix(matrix_file) ####################################################################################################################### # Cluster by metrics ####################################################################################################################### print "* Spawning - totalE clustering" be_rmsd_clustering_script_path = os.path.join(base_dir, 'scripts', CLUSTERING_SPAWN_TOTE_SCRIPT) working_directory = os.path.join(base_dir, TOTALE_SPAWN_WORKSPACE) params = load_dic_in_json(be_rmsd_clustering_script_path) params['global']['workspace']['base'] = working_directory params['data']['files'] = [os.path.join(os.getcwd(), traj_pdb)] params['data']['matrix']['parameters']['path'] = matrix_file save_dic_in_json(params, be_rmsd_clustering_script_path) use_pyproct(working_directory, be_rmsd_clustering_script_path) ####################################################################################################################### # Get 5 representatives. 2 strategies. ####################################################################################################################### ##################################################################### ##################################################################### # Work only with best clustering/ find best cluster ##################################################################### #####################################################################