def main(): first_pass_results_filename = sys.argv[1] second_pass_results_filename = sys.argv[2] fasta_db_filename = sys.argv[3] second_pass_proteins_filename = sys.argv[4] output_first_pass_peptides = sys.argv[5] output_second_pass_peptides = sys.argv[6] output_psms_first_pass = sys.argv[7] output_psms_updated_evalues = sys.argv[8] output_original_high_FDR_psms = sys.argv[9] output_updated_high_FDR_psms = sys.argv[10] #Low FDR Original and updated evals psm_list_first_pass = ming_psm_library.PSMset(first_pass_results_filename) psm_list_first_pass.load_MSGF_Plus_tsvfile(first_pass_results_filename) psm_list_first_pass.remove_duplicated_rows() psm_list_first_pass.filter_to_fdr_by_length(0.01) psm_list_first_pass.write_output(open(output_psms_first_pass, "w")) update_psm_set_with_second_pass_psms(first_pass_results_filename, second_pass_results_filename, output_psms_updated_evalues) #High FDR for other purposes to show things psm_list_first_pass = ming_psm_library.PSMset(first_pass_results_filename) psm_list_first_pass.load_MSGF_Plus_tsvfile(first_pass_results_filename) psm_list_first_pass.remove_duplicated_rows() psm_list_first_pass.filter_to_fdr_by_length(0.01) psm_list_first_pass.write_output(open(output_original_high_FDR_psms, "w")) update_psm_set_with_second_pass_psms(first_pass_results_filename, second_pass_results_filename, output_updated_high_FDR_psms, 0.05) #Precursor Level psm_list_first_pass = ming_psm_library.PSMset(first_pass_results_filename) psm_list_first_pass.load_MSGF_Plus_tsvfile(first_pass_results_filename) psm_list_first_pass.remove_duplicated_rows() full_peptides_list_first_pass = library_creation.create_library_unique_peptides_filtered( [psm_list_first_pass], filter_by_length=True) full_peptides_list_first_pass.write_output( open(output_first_pass_peptides, "w")) psm_list_second_pass = ming_psm_library.PSMset( second_pass_results_filename) psm_list_second_pass.load_MSGF_Plus_tsvfile(second_pass_results_filename) psm_list_second_pass.remove_duplicated_rows() full_peptides_list_second_pass = library_creation.create_library_unique_peptides_filtered( [psm_list_second_pass], filter_by_length=True) full_peptides_list_second_pass.write_output( open(output_second_pass_peptides, "w"))
def get_first_pass_variant_set(first_pass_results_filename): psm_list_first_pass = ming_psm_library.PSMset(first_pass_results_filename) psm_list_first_pass.load_MSGF_Plus_tsvfile(first_pass_results_filename) psm_list_first_pass.filter_to_fdr_by_length(0.05) print "First Pass PSMs: " + str(len(psm_list_first_pass)) full_peptides_list_first_pass = library_creation.create_library_unique_peptides_filtered( [psm_list_first_pass], filter_by_length=True) print "First Pass Variants: " + str(len(full_peptides_list_first_pass)) return full_peptides_list_first_pass
def save_psms_as_peptides(psm_set, output_peptide_path, fdr): peptide_variant_set = library_creation.create_library_unique_peptides_filtered( [psm_set], fdr, filter_by_length=True) psm_set = ming_psm_library.PSMset("task results") for peptide in peptide_variant_set.peptide_list: psm_set.psms.append(peptide.get_best_psm()) output_pickle = open(output_peptide_path, 'wb') pickle.dump(psm_set, output_pickle, pickle.HIGHEST_PROTOCOL) output_pickle.close() return peptide_variant_set
def main(): input_searchresults_filename = sys.argv[1] output_peptide_list = sys.argv[2] output_peptide_list_with_decoy_filename = sys.argv[3] psm_list = ming_psm_library.PSMset(input_searchresults_filename) psm_list.load_MSGF_Plus_tsvfile(input_searchresults_filename) psm_list.filter_to_fdr_by_length(0.01) print len(psm_list) full_peptides_list = library_creation.create_library_unique_peptides_filtered( [psm_list], fdr=0.01, filter_by_length=True) output_file = open(output_peptide_list, "w") all_peptides = [ peptide.get_stripped_sequence() for peptide in full_peptides_list.peptide_list ] all_peptides = list(set(all_peptides)) for peptide in all_peptides: output_file.write(peptide + "\n") #Now lets load the PSMs and keep all variants, and then output them with the decoys present print "GIVING US FULL RESULT SET" psm_list = ming_psm_library.PSMset(input_searchresults_filename) psm_list.load_MSGF_Plus_tsvfile(input_searchresults_filename) full_peptides_list = library_creation.create_library_unique_peptides_filtered( [psm_list], 1.0) output_peptide_list_with_decoy_file = open( output_peptide_list_with_decoy_filename, "w") output_peptide_list_with_decoy_file.write( ming_psm_library.PeptideVariant.output_header() + "\n") for peptide in full_peptides_list.peptide_list: output_peptide_list_with_decoy_file.write(str(peptide) + "\n")
def main(): input_fasta_filename = sys.argv[1] input_searchresults_filename = sys.argv[2] output_proteins_as_list = sys.argv[3] proteome = ming_protein_library.parse_fasta_proteome_file( input_fasta_filename) #for protein in proteome.protein_list: # print protein.protein psm_list = ming_psm_library.PSMset(input_searchresults_filename) psm_list.load_MSGF_Plus_tsvfile(input_searchresults_filename) full_peptides_list = library_creation.create_library_unique_peptides_filtered( [psm_list], 0.01, filter_by_length=True) target_peptide_strings = [] decoy_peptide_strings = [] for peptide_obj in full_peptides_list.peptide_list: peptide_to_search = peptide_obj.get_stripped_sequence() if peptide_obj.is_decoy(): decoy_peptide_strings.append(peptide_to_search[::-1]) else: target_peptide_strings.append(peptide_to_search) protein_coverage_of_targets = proteome.get_proteins_with_number_of_peptides_covered_map( target_peptide_strings) protein_coverage_of_decoys = proteome.get_proteins_with_number_of_peptides_covered_map( decoy_peptide_strings) output_file = open(output_proteins_as_list, "w") output_file.write( "protein\tdecoy_count\ttarget_count\ttotal_count\tlength\n") for protein in protein_coverage_of_targets: output_string = protein + "\t" output_string += str(protein_coverage_of_decoys[protein]) + "\t" output_string += str(protein_coverage_of_targets[protein]) + "\t" output_string += str(protein_coverage_of_targets[protein] + protein_coverage_of_decoys[protein]) + "\t" output_string += str(len( proteome.protein_map[protein].sequence)) + "\n" output_file.write(output_string) output_file.close()
def grab_results_from_multipass(task_id, user, output_peptide_directory, output_psm_directory): return_dict = {} return_dict["number_psms"] = 0 return_dict["number_peptides"] = 0 return_dict["task_id"] = task_id #Copying the psm files path_to_psm_files_list = ming_proteosafe_library.get_proteosafe_result_file_path( task_id, user, "updated_eval_psms_with_kl_with_ambiguity") if len(path_to_psm_files_list) == 1: output_psm_path = os.path.join(output_psm_directory, task_id + ".psms") path_to_param_file = ming_proteosafe_library.get_proteosafe_result_file_path( task_id, user, "params")[0] #path_to_merged_results = ming_proteosafe_library.get_proteosafe_backend_result_file_path(task_id, "mergedResult", "proteomics2")[0] print( ming_proteosafe_library.get_proteosafe_result_file_path( task_id, user, "mergedResult")) path_to_merged_results = ming_proteosafe_library.get_proteosafe_result_file_path( task_id, user, "mergedResult")[0] print(path_to_psm_files_list[0] + " to " + output_psm_path) #name_demangle_filenames(path_to_psm_files_list[0], output_psm_path, path_to_param_file, "filename", "filename") name_demangle_filenames_and_instrument_collision( path_to_psm_files_list[0], output_psm_path, path_to_param_file, path_to_merged_results, "filename", "filename") #Now lets generate the peptide list from the psm list psm_set = ming_psm_library.PSMset("task results") psm_set.load_PSM_tsvfile(output_psm_path) output_peptide_path = output_psm_path = os.path.join( output_peptide_directory, task_id + ".peptides") peptide_variant_set = library_creation.create_library_unique_peptides_filtered( [psm_set], 0.01) peptide_variant_set.write_output(open(output_peptide_path, "w")) return_dict["number_psms"] = len(psm_set.psms) return_dict["number_peptides"] = len(peptide_variant_set.peptide_list) return return_dict
def main(): input_fasta_filename = sys.argv[1] input_searchresults_filename = sys.argv[2] output_fasta_filename = sys.argv[3] output_proteins_as_list = sys.argv[4] proteome = ming_protein_library.parse_fasta_proteome_file(input_fasta_filename) #for protein in proteome.protein_list: # print protein.protein psm_list = ming_psm_library.PSMset(input_searchresults_filename) psm_list.load_MSGF_Plus_tsvfile(input_searchresults_filename) psm_list.filter_to_fdr_by_length(0.01) print len(psm_list) full_peptides_list = library_creation.create_library_unique_peptides_filtered([psm_list], fdr=0.01, filter_by_length=True) #Testing efficient version fo this all_peptide_strings = [] for peptide_obj in full_peptides_list.peptide_list: peptide_to_search = peptide_obj.get_stripped_sequence() all_peptide_strings.append(peptide_obj.get_stripped_sequence()) all_proteins = proteome.get_proteins_covered_by_k_peptides(all_peptide_strings, 2, True) all_protein_names = [] for protein in all_proteins: all_protein_names.append(protein.protein) output_protein_filename = output_fasta_filename open(output_protein_filename, "w").write(json.dumps(all_protein_names)) #Outputting the list of proteins output_protein_list_file = open(output_proteins_as_list, "w") output_protein_list_file.write("Protein\n") for protein in all_protein_names: output_protein_list_file.write(protein + "\n") exit(0)