def find_matches_in_dataset(dataset_id, input_spectrum_collection): dataset_match_list = [] path_to_clustered_mgf = os.path.join(PATH_TO_DATASET_UPLOADS, dataset_id, "clustered", dataset_id + "_specs_ms.mgf") relative_user_path_to_clustered = os.path.join( dataset_id, "clustered", dataset_id + "_specs_ms.mgf") if not ming_fileio_library.is_path_present(path_to_clustered_mgf): return dataset_match_list #Lets compare these two files # input_spectra_filename and symlink_destination dataset_clustered_spectra = ming_spectrum_library.SpectrumCollection( path_to_clustered_mgf) dataset_clustered_spectra.load_from_file() for myspectrum in input_spectrum_collection.spectrum_list: match_list = dataset_clustered_spectra.search_spectrum( myspectrum, 1.0, 1.0, 6, 0.7, 1) for match in match_list: match.filename = relative_user_path_to_clustered dataset_match_list += match_list print "Dataset matches: " + str(len(dataset_match_list)) return dataset_match_list
def get_proteosafe_result_file_path(task_id, username, source_folder_name): proteosafe_data_path = "/data/ccms-data/tasks/" source_folder_path = os.path.join(proteosafe_data_path, username, task_id, source_folder_name) if not ming_fileio_library.is_path_present(source_folder_path): return [] source_files = ming_fileio_library.list_files_in_dir(source_folder_path) return source_files
def get_proteosafe_backend_result_file_path(task_id, source_folder_name, site): proteosafe_data_path = "/data/" if site == "proteomics2": proteosafe_data_path += "beta-proteomics2" source_folder_path = os.path.join(proteosafe_data_path, "tasks", task_id, source_folder_name) if not ming_fileio_library.is_path_present(source_folder_path): return [] source_files = ming_fileio_library.list_files_in_dir(source_folder_path) return source_files
def find_matches_in_file(input_spectrum_collection, dataset_filepath, relative_dataset_filepath, match_parameters, top_k=1): dataset_match_list = [] if not ming_fileio_library.is_path_present(dataset_filepath): print("Cant find", dataset_filepath) return dataset_match_list dataset_query_spectra = ming_spectrum_library.SpectrumCollection( dataset_filepath) try: dataset_query_spectra.load_from_file() except: return dataset_match_list for repo_spectrum in dataset_query_spectra.spectrum_list: if match_parameters["FILTER_WINDOW"]: repo_spectrum.window_filter_peaks(50, 6) if match_parameters["FILTER_PRECURSOR"]: repo_spectrum.filter_precursor_peaks() for myspectrum in input_spectrum_collection.spectrum_list: if match_parameters["FILTER_WINDOW"]: myspectrum.window_filter_peaks(50, 6) if match_parameters["FILTER_PRECURSOR"]: myspectrum.filter_precursor_peaks() try: match_list = dataset_query_spectra.search_spectrum( myspectrum, match_parameters["PM_TOLERANCE"], match_parameters["FRAGMENT_TOLERANCE"], match_parameters["MIN_MATCHED_PEAKS"], match_parameters["MIN_COSINE"], analog_search=match_parameters["ANALOG_SEARCH"], top_k=top_k) for match in match_list: match["filename"] = relative_dataset_filepath dataset_match_list += match_list except: print("Error in Matching") print("Dataset matches: " + str(len(dataset_match_list))) return dataset_match_list
def find_matches_in_dataset(dataset_id, input_spectrum_collection): dataset_match_list = [] path_to_clustered_mgf = os.path.join(PATH_TO_DATASET_UPLOADS, dataset_id, "clustered", dataset_id + "_specs_ms.mgf") relative_user_path_to_clustered = os.path.join(dataset_id, "clustered", dataset_id + "_specs_ms.mgf") if not ming_fileio_library.is_path_present(path_to_clustered_mgf): return dataset_match_list #Lets compare these two files # input_spectra_filename and symlink_destination dataset_clustered_spectra = ming_spectrum_library.SpectrumCollection(path_to_clustered_mgf) dataset_clustered_spectra.load_from_file() for myspectrum in input_spectrum_collection.spectrum_list: match_list = dataset_clustered_spectra.search_spectrum(myspectrum, 1.0, 1.0, 6, 0.7, 1) for match in match_list: match.filename = relative_user_path_to_clustered dataset_match_list += match_list print "Dataset matches: " + str(len(dataset_match_list)) return dataset_match_list