def save_representatives( representatives, pdb_name, workspace_handler, trajectory_holder, do_merged_files_have_correlative_models, write_frame_number_instead_of_correlative_model_number, keep_remarks=False): """ Saves a pdb file containing the most representative elements of the clustering. @param representatives: A list of the representative elements of the clustering we want to extract. @param workspace_handler: The workspace handler of this run. @param trajectory_holder: The trajectory handler for this run or an array with pdb file paths. @param do_merged_files_have_correlative_models: When merging, output file will have models from 0 to M, where M is the total number of frames of the merged file. @param write_frame_number_instead_of_model_number: When extracting frames, extract those models which number coincides with the frame numbers in 'representatives'. Otherwise, extract those models which position coincide with the frame number in 'representatives'. """ results_directory = workspace_handler["results"] # Merge pdbs (in order) temporary_merged_trajectory_path = os.path.join( workspace_handler["tmp"], "tmp_merged_trajectory.pdb") #=========================================================== # THIS DOES NOT WORK IF USING DCD FILES # merge_pdbs(trajectory_holder, # temporary_merged_trajectory_path, # do_merged_files_have_correlative_models) # TEMPORARY HACK TO OVERCOME DCD MERGING BUG merged_pdb = trajectory_holder.getMergedStructure() prody.writePDB(temporary_merged_trajectory_path, merged_pdb) #========================================================== # Extract frames from the merged pdb file_handler_in = open(temporary_merged_trajectory_path, "r") file_handler_out = open( os.path.join(results_directory, "%s.pdb" % pdb_name), "w") pdb_tools.extract_frames_from_trajectory_sequentially( file_handler_in=file_handler_in, number_of_frames=pdb_tools.get_number_of_frames( temporary_merged_trajectory_path), file_handler_out=file_handler_out, frames_to_save=representatives, write_frame_number_instead_of_correlative_model_number= write_frame_number_instead_of_correlative_model_number, keep_header=keep_remarks) file_handler_in.close() file_handler_out.close() return os.path.join(results_directory, "%s.pdb" % pdb_name)
def save_all_clusters(my_params, pdb_params, workspaceHandler, trajectoryHandler, clustering, generatedFiles, timer): timer.start("Save clusters") #Parameters keep_remarks = my_params["keep_remarks"] if "keep_remarks" in my_params else False keep_frame_number = my_params["keep_frame_number"] if "keep_frame_number" in my_params else False # Places results_place = workspaceHandler["results"] clusters_place = workspaceHandler["clusters"] tmp_place = workspaceHandler["tmp"] #=========================================================== # THIS DOES NOT WORK IF USING DCD OR STRUCTS # The real job # input_path = os.path.join(tmp_place, "tmp_merged_trajectory.pdb") # merge_pdbs(pdb_params, input_path) # TEMPORARY HACK TO OVERCOME DCD MERGING BUG merged_pdb = trajectoryHandler.getMergedStructure() input_path = os.path.join(tmp_place, "tmp_merged_trajectory.pdb") prody.writePDB(input_path, merged_pdb) #========================================================== number_of_frames = get_number_of_frames(input_path) cluster_files = [] for cluster in clustering.clusters: output_path = os.path.join(clusters_place, "%s.pdb"%(cluster.id)) cluster_files.append(output_path) file_handler_in = open(input_path,"r") file_handler_out = open(output_path,"w") extract_frames_from_trajectory_sequentially(file_handler_in, number_of_frames, file_handler_out, cluster.all_elements, keep_header = keep_remarks, write_frame_number_instead_of_correlative_model_number=keep_frame_number) file_handler_in.close() file_handler_out.close() # Add all bz2 files to a tar file tar_path = os.path.join(results_place,"clusters.tar.gz") tar = tarfile.open(tar_path, "w:gz") for comp_file in cluster_files: tar.add(comp_file, os.path.basename(comp_file)) tar.close() timer.stop("Save clusters") generatedFiles.append({"description":"Clusters", "path":os.path.abspath(tar_path), "type":"compressed_pdb"})
def save_all_clusters(clustering, my_params, workspaceHandler, trajectoryHandler, generatedFiles): #Parameters keep_remarks = my_params.get_value("keep_remarks", default_value=False) keep_frame_number = my_params.get_value("keep_frame_number", default_value=False) # Places results_place = workspaceHandler["results"] clusters_place = workspaceHandler["clusters"] tmp_place = workspaceHandler["tmp"] merged_pdb = trajectoryHandler.getMergedStructure() input_path = os.path.join(tmp_place, "tmp_merged_trajectory.pdb") prody.writePDB(input_path, merged_pdb) number_of_frames = get_number_of_frames(input_path) cluster_files = [] for cluster in clustering.clusters: output_path = os.path.join(clusters_place, "%s.pdb" % (cluster.id)) cluster_files.append(output_path) file_handler_in = open(input_path, "r") file_handler_out = open(output_path, "w") extract_frames_from_trajectory_sequentially( file_handler_in, number_of_frames, file_handler_out, cluster.all_elements, keep_header=keep_remarks, write_frame_number_instead_of_correlative_model_number= keep_frame_number) file_handler_in.close() file_handler_out.close() # Add all bz2 files to a tar file tar_path = os.path.join(results_place, "clusters.tar.gz") tar = tarfile.open(tar_path, "w:gz") for comp_file in cluster_files: tar.add(comp_file, os.path.basename(comp_file)) tar.close() generatedFiles.append({ "description": "Clusters", "path": os.path.abspath(tar_path), "type": "compressed_pdb" })
def save_cluster_handler(self, data): data = convert_to_utf8(json.loads(data)) # print "DATA", data path = os.path.join(data["paths"]["tmp"], "tmp_merged_trajectory.pdb") # try: # print "PRIM PATH",path file_handler_in = open(path,"r") file_handler_out = open("results/tmp/cluster.pdb","w") extract_frames_from_trajectory_sequentially(file_handler_in, get_number_of_frames(path), file_handler_out, data["elements"], keep_header=True, write_frame_number_instead_of_correlative_model_number=True) file_handler_in.close() file_handler_out.close() self.wfile.write('{"path":"results/tmp/cluster.pdb"}')
####################################################################################################################### # Plot clusters ####################################################################################################################### plot_clusters(os.path.join(base_dir, "clusters.svg"), all_metrics, scores, scores.index(most_negative)) ####################################################################################################################### # Store the elements of the most negative cluster ####################################################################################################################### # This works because we have only one traj. Merging before should be mandatory. trajectory_path = os.path.join(os.getcwd(), traj_pdb) in_handler = open(trajectory_path,"r") most_negative_path = os.path.join(base_dir,"most_negative_cluster.pdb") out_handler = open(most_negative_path,"w") extract_frames_from_trajectory_sequentially(file_handler_in = in_handler, number_of_frames = get_number_of_frames(trajectory_path), file_handler_out = out_handler, frames_to_save = most_negative_cluster.all_elements, keep_header = True, write_frame_number_instead_of_correlative_model_number = True) in_handler.close() out_handler.close() ####################################################################################################################### # Pick 5 well-separated representatives ####################################################################################################################### working_directory = os.path.join(base_dir, "cluster_to_5") cluster_to_5_script_path = os.path.join(base_dir, 'scripts', CLUSTERING_TO_5_SCRIPT) params = load_dic_in_json(cluster_to_5_script_path) params['global']['workspace']['base'] = working_directory params['data']['files'] = [most_negative_path] params['data']['matrix']['parameters']['dist_fit_selection'] = "name CA" params['data']['matrix']['parameters']['body_selection'] = ligand_description