def do_convergence_test(trajectory, traj_type): path, file_name = os.path.split(trajectory["path"]) traj_id = file_name.split(".")[0] base_path = os.path.join("convergence",traj_type,"%s"%traj_id) for n in range(1000,10000,1000): print "- Working with %s with %d frames"%(trajectory["path"],n) this_path = os.path.join(base_path, "%d"%n) create_dir(this_path) pdb_path = os.path.join(this_path,"%d.pdb"%n) extract_first_n_frames(n, trajectory["path"], pdb_path) script = copy.deepcopy(script_template) script["global"]["workspace"]["base"] = this_path script["data"]["files"] = [pdb_path] # script["clustering"]["evaluation"]["maximum_noise"] = trajectory["noise"] # script["clustering"]["evaluation"]["minimum_cluster_size"] = int(n/trajectory["max"]) # script["clustering"]["evaluation"]["minimum_clusters"] = trajectory["min"] # script["clustering"]["evaluation"]["maximum_clusters"] = trajectory["max"] script_path = os.path.join(this_path,"script.json") save_dic_in_json(script, script_path) os.system("python %s %s "%(PYPROCT, script_path)) os.system("rm %s"%pdb_path)
# working_dir = os.path.join("comparisons","profasi","%svs%s"%(A_traj_id,B_traj_id)) # create_dir(working_dir) # script["global"]["workspace"]["base"] = working_dir # script["data"]["files"] = [A_traj, B_traj] # script_path = os.path.join(working_dir,"script.json") # save_dic_in_json(script, script_path) # os.system("python %s %s "%(PYPROCT, script_path)) # initial_j_offset = 0 # initial_i_offset = 0 # Campari vs Profasi for i in [15]: #range(0,len(profasi_trajs)): A_traj = campari_trajs[i] path, file = os.path.split(A_traj) A_traj_id = file.split(".")[0] B_traj = profasi_trajs[i] path, file = os.path.split(B_traj) B_traj_id = file.split(".")[0] script = copy.deepcopy(script_template) working_dir = os.path.join("comparisons","campari_vs_profasi","%svs%s"%(A_traj_id,B_traj_id)) create_dir(working_dir) script["global"]["workspace"]["base"] = working_dir script["data"]["files"] = [{"file":A_traj,"base_selection":"resnum 3to53"},{"file": B_traj,"base_selection":"resnum 3to53"}] script_path = os.path.join(working_dir,"script.json") save_dic_in_json(script, script_path) os.system("python %s %s "%(PYPROCT, script_path))
metrics = numpy.loadtxt(METRICS_FILE).T #-------------------------------- # Prepare the clustering for this guy #-------------------------------- ## Load template and modify its contents for this case CLUSTERING_PATH = os.path.join(RESULTS_PATH,"%s_%s_clustering"%(options.drug, options.protein)) MAX_CLUSTERS = 10 SCRIPT_PATH = os.path.join(RESULTS_PATH,"clustering.json") OUT_FILE = os.path.join(RESULTS_PATH, "clustering.out") script = load_dic_in_json(options.template) script["global"]["workspace"]["base"] = CLUSTERING_PATH script["data"]["files"].append(FILTERED_PDB_FILE) script["clustering"]["evaluation"]["maximum_clusters"] = MAX_CLUSTERS save_dic_in_json(script, SCRIPT_PATH) os.system("python -m pyproct.main %s > %s"%(SCRIPT_PATH, OUT_FILE)) best_clustering = Clustering.from_dic(get_best_clustering(CLUSTERING_PATH)["clustering"]) #-------------------------------- # Now calculate the values #-------------------------------- results = {} for cluster in best_clustering.clusters: energies = metrics[1][cluster.all_elements] distances = metrics[0][cluster.all_elements] results[cluster.id] = {} results[cluster.id]["max_energy"] = numpy.max(energies) results[cluster.id]["min_energy"] = numpy.min(energies) results[cluster.id]["mean_energy"] = numpy.mean(energies) results[cluster.id]["mean_distance"] = numpy.mean(distances)
m_handler = MatrixHandler() m_handler.distance_matrix = CondensedMatrix(matrix_data) matrix_file = os.path.join(base_dir, TENERGY_SPAWN_MATRIX) m_handler.saveMatrix(matrix_file) ####################################################################################################################### # Cluster by metrics ####################################################################################################################### print "* Spawning - totalE clustering" be_rmsd_clustering_script_path = os.path.join(base_dir, 'scripts', CLUSTERING_SPAWN_TOTE_SCRIPT) working_directory = os.path.join(base_dir, TOTALE_SPAWN_WORKSPACE) params = load_dic_in_json(be_rmsd_clustering_script_path) params['global']['workspace']['base'] = working_directory params['data']['files'] = [os.path.join(os.getcwd(), traj_pdb)] params['data']['matrix']['parameters']['path'] = matrix_file save_dic_in_json(params, be_rmsd_clustering_script_path) use_pyproct(working_directory, be_rmsd_clustering_script_path) ####################################################################################################################### # Get 5 representatives. 2 strategies. ####################################################################################################################### ##################################################################### ##################################################################### # Work only with best clustering/ find best cluster ##################################################################### ##################################################################### best_clustering = get_best_clustering(working_directory) best_clustering = Clustering.from_dic(best_clustering["clustering"]) scores = [] for a_cluster in best_clustering.clusters: