예제 #1
0
def save_representatives(
        representatives,
        pdb_name,
        workspace_handler,
        trajectory_holder,
        do_merged_files_have_correlative_models,
        write_frame_number_instead_of_correlative_model_number,
        keep_remarks=False):
    """
    Saves a pdb file containing the most representative elements of the clustering.

    @param representatives: A list of the representative elements of the clustering we want to extract.

    @param workspace_handler: The workspace handler of this run.

    @param trajectory_holder: The trajectory handler for this run or an array with pdb file paths.

    @param do_merged_files_have_correlative_models: When merging, output file will have models from 0 to M, where M is the total number
    of frames of the merged file.

    @param write_frame_number_instead_of_model_number: When extracting frames, extract those models which number coincides with the
    frame numbers in 'representatives'. Otherwise, extract those models which position coincide with the frame number in
    'representatives'.
    """
    results_directory = workspace_handler["results"]

    # Merge pdbs (in order)
    temporary_merged_trajectory_path = os.path.join(
        workspace_handler["tmp"], "tmp_merged_trajectory.pdb")

    #===========================================================
    # THIS DOES NOT WORK IF USING DCD FILES
    #     merge_pdbs(trajectory_holder,
    #                temporary_merged_trajectory_path,
    #                do_merged_files_have_correlative_models)

    # TEMPORARY HACK TO OVERCOME DCD MERGING BUG

    merged_pdb = trajectory_holder.getMergedStructure()
    prody.writePDB(temporary_merged_trajectory_path, merged_pdb)
    #==========================================================

    # Extract frames from the merged pdb
    file_handler_in = open(temporary_merged_trajectory_path, "r")
    file_handler_out = open(
        os.path.join(results_directory, "%s.pdb" % pdb_name), "w")

    pdb_tools.extract_frames_from_trajectory_sequentially(
        file_handler_in=file_handler_in,
        number_of_frames=pdb_tools.get_number_of_frames(
            temporary_merged_trajectory_path),
        file_handler_out=file_handler_out,
        frames_to_save=representatives,
        write_frame_number_instead_of_correlative_model_number=
        write_frame_number_instead_of_correlative_model_number,
        keep_header=keep_remarks)
    file_handler_in.close()
    file_handler_out.close()

    return os.path.join(results_directory, "%s.pdb" % pdb_name)
예제 #2
0
파일: clusters.py 프로젝트: lowks/pyProCT
def save_all_clusters(my_params, pdb_params, workspaceHandler, trajectoryHandler, clustering, generatedFiles, timer):
    timer.start("Save clusters")

    #Parameters
    keep_remarks = my_params["keep_remarks"] if "keep_remarks" in my_params else False
    keep_frame_number = my_params["keep_frame_number"] if "keep_frame_number" in my_params else False

    # Places
    results_place = workspaceHandler["results"]
    clusters_place = workspaceHandler["clusters"]
    tmp_place = workspaceHandler["tmp"]

    #===========================================================
    # THIS DOES NOT WORK IF USING DCD OR STRUCTS
    # The real job
#     input_path = os.path.join(tmp_place, "tmp_merged_trajectory.pdb")
#     merge_pdbs(pdb_params, input_path)

    # TEMPORARY HACK TO OVERCOME DCD MERGING BUG

    merged_pdb = trajectoryHandler.getMergedStructure()
    input_path = os.path.join(tmp_place, "tmp_merged_trajectory.pdb")
    prody.writePDB(input_path, merged_pdb)
#==========================================================

    number_of_frames = get_number_of_frames(input_path)
    cluster_files = []
    for cluster in clustering.clusters:
        output_path = os.path.join(clusters_place, "%s.pdb"%(cluster.id))
        cluster_files.append(output_path)
        file_handler_in = open(input_path,"r")
        file_handler_out = open(output_path,"w")
        extract_frames_from_trajectory_sequentially(file_handler_in,
                                                    number_of_frames,
                                                    file_handler_out,
                                                    cluster.all_elements,
                                                    keep_header = keep_remarks,
                                                    write_frame_number_instead_of_correlative_model_number=keep_frame_number)
        file_handler_in.close()
        file_handler_out.close()

    # Add all bz2 files to a tar file
    tar_path = os.path.join(results_place,"clusters.tar.gz")
    tar = tarfile.open(tar_path, "w:gz")
    for comp_file in cluster_files:
        tar.add(comp_file, os.path.basename(comp_file))
    tar.close()
    timer.stop("Save clusters")

    generatedFiles.append({"description":"Clusters",
                                         "path":os.path.abspath(tar_path),
                                         "type":"compressed_pdb"})
예제 #3
0
def save_all_clusters(clustering, my_params, workspaceHandler,
                      trajectoryHandler, generatedFiles):

    #Parameters
    keep_remarks = my_params.get_value("keep_remarks", default_value=False)
    keep_frame_number = my_params.get_value("keep_frame_number",
                                            default_value=False)

    # Places
    results_place = workspaceHandler["results"]
    clusters_place = workspaceHandler["clusters"]
    tmp_place = workspaceHandler["tmp"]

    merged_pdb = trajectoryHandler.getMergedStructure()
    input_path = os.path.join(tmp_place, "tmp_merged_trajectory.pdb")
    prody.writePDB(input_path, merged_pdb)

    number_of_frames = get_number_of_frames(input_path)
    cluster_files = []
    for cluster in clustering.clusters:
        output_path = os.path.join(clusters_place, "%s.pdb" % (cluster.id))
        cluster_files.append(output_path)
        file_handler_in = open(input_path, "r")
        file_handler_out = open(output_path, "w")
        extract_frames_from_trajectory_sequentially(
            file_handler_in,
            number_of_frames,
            file_handler_out,
            cluster.all_elements,
            keep_header=keep_remarks,
            write_frame_number_instead_of_correlative_model_number=
            keep_frame_number)
        file_handler_in.close()
        file_handler_out.close()

    # Add all bz2 files to a tar file
    tar_path = os.path.join(results_place, "clusters.tar.gz")
    tar = tarfile.open(tar_path, "w:gz")
    for comp_file in cluster_files:
        tar.add(comp_file, os.path.basename(comp_file))
    tar.close()

    generatedFiles.append({
        "description": "Clusters",
        "path": os.path.abspath(tar_path),
        "type": "compressed_pdb"
    })
예제 #4
0
    def save_cluster_handler(self, data):
        data = convert_to_utf8(json.loads(data))
#             print "DATA", data
        path = os.path.join(data["paths"]["tmp"], "tmp_merged_trajectory.pdb")

#             try:
#             print "PRIM PATH",path
        file_handler_in = open(path,"r")
        file_handler_out = open("results/tmp/cluster.pdb","w")
        extract_frames_from_trajectory_sequentially(file_handler_in,
                                                    get_number_of_frames(path),
                                                    file_handler_out,
                                                    data["elements"],
                                                    keep_header=True,
                                                    write_frame_number_instead_of_correlative_model_number=True)
        file_handler_in.close()
        file_handler_out.close()
        self.wfile.write('{"path":"results/tmp/cluster.pdb"}')
    #######################################################################################################################
    # Plot clusters
    #######################################################################################################################
    plot_clusters(os.path.join(base_dir, "clusters.svg"), all_metrics, scores, scores.index(most_negative))

    #######################################################################################################################
    # Store the elements of the most negative cluster
    #######################################################################################################################
    # This works because we have only one traj. Merging before should be mandatory.
    trajectory_path = os.path.join(os.getcwd(), traj_pdb)
    in_handler = open(trajectory_path,"r")
    most_negative_path = os.path.join(base_dir,"most_negative_cluster.pdb")
    out_handler = open(most_negative_path,"w")
    extract_frames_from_trajectory_sequentially(file_handler_in = in_handler,
                                               number_of_frames = get_number_of_frames(trajectory_path),
                                               file_handler_out = out_handler,
                                               frames_to_save = most_negative_cluster.all_elements,
                                               keep_header = True,
                                               write_frame_number_instead_of_correlative_model_number = True)
    in_handler.close()
    out_handler.close()

    #######################################################################################################################
    # Pick 5 well-separated representatives
    #######################################################################################################################
    working_directory = os.path.join(base_dir, "cluster_to_5")
    cluster_to_5_script_path = os.path.join(base_dir, 'scripts', CLUSTERING_TO_5_SCRIPT)
    params = load_dic_in_json(cluster_to_5_script_path)
    params['global']['workspace']['base'] = working_directory
    params['data']['files'] = [most_negative_path]
    params['data']['matrix']['parameters']['dist_fit_selection'] = "name CA"
    params['data']['matrix']['parameters']['body_selection'] = ligand_description