Esempio n. 1
0
def grab_results_from_task(task_id, user, output_peptide_directory,
                           output_psm_directory, params_obj,
                           folder_for_results):
    return_dict = {}
    return_dict["number_psms"] = 0
    return_dict["number_peptides"] = 0
    return_dict["task_id"] = task_id

    #Copying the psm files
    path_to_psm_files_list = ming_proteosafe_library.get_proteosafe_result_file_path(
        task_id, user, folder_for_results)
    if len(path_to_psm_files_list) == 1:
        output_psm_path = os.path.join(output_psm_directory, task_id + ".psms")
        path_to_param_file = ming_proteosafe_library.get_proteosafe_result_file_path(
            task_id, user, "params")[0]

        #These are original results that are from MSGF+ that includes the fragmentation method
        print(
            task_id, user,
            ming_proteosafe_library.get_proteosafe_result_file_path(
                task_id, user, "mergedResult"))
        path_to_merged_results = ming_proteosafe_library.get_proteosafe_result_file_path(
            task_id, user, "mergedResult")[0]

        print(path_to_psm_files_list[0] + " to " + output_psm_path)
        #name_demangle_filenames(path_to_psm_files_list[0], output_psm_path, path_to_param_file, "filename", "filename")
        name_demangle_filenames_and_instrument_collision(
            path_to_psm_files_list[0], output_psm_path, path_to_param_file,
            path_to_merged_results, "filename", "filename")

        #Now lets generate the peptide list from the psm list
        psm_set = ming_psm_library.PSMset("task results")
        psm_set.load_PSM_tsvfile(output_psm_path, True)
        print("PSM Count", len(psm_set.psms))
        psm_set.psms = filter_psms_with_params(params_obj, psm_set.psms)
        #Setting the task of each psm
        for psm in psm_set.psms:
            psm.extra_metadata["proteosafe_task"] = task_id

        print("PSM Count Filtered", len(psm_set.psms))
        psm_set.filter_to_fdr_by_length(0.05)

        output_pickle = open(output_psm_path, 'wb')
        pickle.dump(psm_set, output_pickle, pickle.HIGHEST_PROTOCOL)
        output_pickle.close()

        output_peptide_path = output_psm_path = os.path.join(
            output_peptide_directory, task_id + ".peptides")

        peptide_variant_set = save_psms_as_peptides(psm_set,
                                                    output_peptide_path, 0.05)

        return_dict["number_psms"] = len(psm_set.psms)
        return_dict["number_peptides"] = len(peptide_variant_set.peptide_list)

    return return_dict
def get_molecular_network_obj(job_obj):
    try:
        print(job_obj)
        path_to_clusterinfosummary = ming_proteosafe_library.get_proteosafe_result_file_path(job_obj["task"], "continuous", "clusterinfosummarygroup_attributes_withIDs")[0]
        path_to_pairs = ming_proteosafe_library.get_proteosafe_result_file_path(job_obj["task"], "continuous", "networkedges_selfloop")[0]

        molecular_network = molecular_network_library.MolecularNetwork()
        molecular_network.load_network(path_to_clusterinfosummary, path_to_pairs)
        return molecular_network
    except KeyboardInterrupt:
        raise
    except:
        #raise
        return None
def grab_single_result(task_id, output_peptide_directory,
                       output_psm_directory):
    return_dict = {}
    return_dict["number_psms"] = 0
    return_dict["number_peptides"] = 0
    return_dict["task_id"] = task_id

    task_info = ming_proteosafe_library.get_task_information(
        "proteomics2.ucsd.edu", task_id)
    user = task_info["user"]
    if task_info["status"] == "FAILED":
        return return_dict

    #lets check whether whether this has the peptide output, if not we can create it
    path_to_secondpass_peptides_files_list = ming_proteosafe_library.get_proteosafe_result_file_path(
        task_id, user, "updated_eval_psms_with_kl_with_ambiguity")

    if len(path_to_secondpass_peptides_files_list) == 0:
        return_dict = grab_results_from_MSGFDB(task_id, user,
                                               output_peptide_directory,
                                               output_psm_directory)
        return return_dict

    if len(path_to_secondpass_peptides_files_list) == 1:
        return_dict = grab_results_from_multipass(task_id, user,
                                                  output_peptide_directory,
                                                  output_psm_directory)
        return return_dict
Esempio n. 4
0
def get_molecular_network_obj(job_obj):
    try:
        print(job_obj)
        path_to_clusterinfosummary = ming_proteosafe_library.get_proteosafe_result_file_path(
            job_obj["task"], "continuous",
            "clusterinfosummarygroup_attributes_withIDs")[0]
        path_to_pairs = ming_proteosafe_library.get_proteosafe_result_file_path(
            job_obj["task"], "continuous", "networkedges_selfloop")[0]

        molecular_network = molecular_network_library.MolecularNetwork()
        molecular_network.load_network(path_to_clusterinfosummary,
                                       path_to_pairs)
        return molecular_network
    except KeyboardInterrupt:
        raise
    except:
        #raise
        return None
def grab_results_from_multipass(task_id, user, output_peptide_directory,
                                output_psm_directory):
    return_dict = {}
    return_dict["number_psms"] = 0
    return_dict["number_peptides"] = 0
    return_dict["task_id"] = task_id

    #Copying the psm files
    path_to_psm_files_list = ming_proteosafe_library.get_proteosafe_result_file_path(
        task_id, user, "updated_eval_psms_with_kl_with_ambiguity")
    if len(path_to_psm_files_list) == 1:
        output_psm_path = os.path.join(output_psm_directory, task_id + ".psms")
        path_to_param_file = ming_proteosafe_library.get_proteosafe_result_file_path(
            task_id, user, "params")[0]

        #path_to_merged_results = ming_proteosafe_library.get_proteosafe_backend_result_file_path(task_id, "mergedResult", "proteomics2")[0]
        print(
            ming_proteosafe_library.get_proteosafe_result_file_path(
                task_id, user, "mergedResult"))
        path_to_merged_results = ming_proteosafe_library.get_proteosafe_result_file_path(
            task_id, user, "mergedResult")[0]

        print(path_to_psm_files_list[0] + " to " + output_psm_path)
        #name_demangle_filenames(path_to_psm_files_list[0], output_psm_path, path_to_param_file, "filename", "filename")
        name_demangle_filenames_and_instrument_collision(
            path_to_psm_files_list[0], output_psm_path, path_to_param_file,
            path_to_merged_results, "filename", "filename")

        #Now lets generate the peptide list from the psm list
        psm_set = ming_psm_library.PSMset("task results")
        psm_set.load_PSM_tsvfile(output_psm_path)
        output_peptide_path = output_psm_path = os.path.join(
            output_peptide_directory, task_id + ".peptides")

        peptide_variant_set = library_creation.create_library_unique_peptides_filtered(
            [psm_set], 0.01)
        peptide_variant_set.write_output(open(output_peptide_path, "w"))

        return_dict["number_psms"] = len(psm_set.psms)
        return_dict["number_peptides"] = len(peptide_variant_set.peptide_list)

    return return_dict