def post_process_normal(spectrum_in: SpectrumType, min_peaks: int = 10) \ -> Union[SpectrumType, None]: """Normal processing of spectra for Spec2Vec Parameters ---------- spectrum_in: Input spectrum. min_peaks: Minimum number of peaks to pass the spectrum (otherwise -> None) """ if spectrum_in is None: return None s = spectrum_in.clone() s = normalize_intensities(s) if any(np.isnan(s.peaks[1])): return None # remove spectra that have all intensities 0 s = select_by_mz(s, mz_from=0, mz_to=1000) s = require_minimum_number_of_peaks(s, n_required=min_peaks) s = reduce_to_number_of_peaks(s, n_required=min_peaks, ratio_desired=0.5) if s is None: return None # remove low peaks unless less than 10 peaks are left s_remove_low_peaks = select_by_relative_intensity(s, intensity_from=0.001) if len(s_remove_low_peaks.peaks) >= 10: s = s_remove_low_peaks # add losses to normally processed spectra s = add_losses(s, loss_mz_from=5.0, loss_mz_to=200.0) return s
def test_require_minimum_number_of_peaks_required_4_ratio_none(spectrum_in: SpectrumType): """Test if parent_mass scaling is properly ignored when not passing ratio_required.""" spectrum_in.set("parent_mass", 100) spectrum = require_minimum_number_of_peaks(spectrum_in, n_required=4) assert spectrum == spectrum_in, "Expected the spectrum to qualify because the number of peaks (4) is equal to the" \ "required number (4)."
def apply_my_filters(s): s = default_filters(s) s = add_parent_mass(s) s = normalize_intensities(s) s = select_by_relative_intensity(s, intensity_from=0.0, intensity_to=1.0) s = select_by_mz(s, mz_from=0, mz_to=1000) s = require_minimum_number_of_peaks(s, n_required=5) return s
def test_require_minimum_number_of_peaks_no_params(): mz = numpy.array([10, 20, 30, 40], dtype="float") intensities = numpy.array([0, 1, 10, 100], dtype="float") spectrum_in = Spectrum(mz=mz, intensities=intensities) spectrum = require_minimum_number_of_peaks(spectrum_in) assert spectrum is None, "Expected None because the number of peaks (4) is less than the default threshold (10)."
def test_require_minimum_number_of_peaks_required_4(): mz = numpy.array([10, 20, 30, 40], dtype="float") intensities = numpy.array([0, 1, 10, 100], dtype="float") spectrum_in = Spectrum(mz=mz, intensities=intensities) spectrum = require_minimum_number_of_peaks(spectrum_in, n_required=4) assert spectrum == spectrum_in, "Expected the spectrum to qualify because the number of peaks (4) is equal to the" \ "required number (4)."
def apply_my_filters(s): """This is how a user would typically design his own pre- and post- processing pipeline.""" s = default_filters(s) s = add_parent_mass(s) s = normalize_intensities(s) s = reduce_to_number_of_peaks(s, n_required=10, ratio_desired=0.5) s = select_by_mz(s, mz_from=0, mz_to=1000) s = add_losses(s, loss_mz_from=10.0, loss_mz_to=200.0) s = require_minimum_number_of_peaks(s, n_required=5) return s
def spectrum_processing(s): """This is how one would typically design a desired pre- and post- processing pipeline.""" s = default_filters(s) s = add_precursor_mz(s) s = normalize_intensities(s) s = reduce_to_number_of_peaks(s, n_required=5, ratio_desired=0.5, n_max=500) s = select_by_mz(s, mz_from=0, mz_to=1000) s = add_losses(s, loss_mz_from=10.0, loss_mz_to=200.0) s = require_minimum_number_of_peaks(s, n_required=5) return s
def test_require_minimum_number_of_peaks_required_4_ratio_none(): """Test if parent_mass scaling is properly ignored when not passing ratio_required.""" mz = numpy.array([10, 20, 30, 40], dtype="float") intensities = numpy.array([0, 1, 10, 100], dtype="float") metadata = dict(parent_mass=100) spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata=metadata) spectrum = require_minimum_number_of_peaks(spectrum_in, n_required=4) assert spectrum == spectrum_in, "Expected the spectrum to qualify because the number of peaks (4) is equal to the" \ "required number (4)."
def test_require_minimum_number_of_peaks_required_5_or_1(): mz = numpy.array([10, 20, 30, 40], dtype="float") intensities = numpy.array([0, 1, 10, 100], dtype="float") metadata = dict(parent_mass=10) spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata=metadata) spectrum = require_minimum_number_of_peaks(spectrum_in, n_required=5, ratio_required=0.1) assert spectrum is None, "Did not expect the spectrum to qualify because the number of peaks (4) is less " \ "than the required number (5)."
def test_require_minimum_number_of_peaks_required_4_or_1(): mz = numpy.array([10, 20, 30, 40], dtype="float") intensities = numpy.array([0, 1, 10, 100], dtype="float") metadata = dict(parent_mass=10) spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata=metadata) spectrum = require_minimum_number_of_peaks(spectrum_in, n_required=4, ratio_required=0.1) assert spectrum == spectrum_in, "Expected the spectrum to qualify because the number of peaks (4) is equal to the" \ "required number (4)."
def post_process(s): s = normalize_intensities(s) s = select_by_mz(s, mz_from=0, mz_to=1000) s = require_minimum_number_of_peaks(s, n_required=10) try: s = reduce_to_number_of_peaks(s, n_required=10, ratio_desired=0.5) except: pass if s is None: return None s_remove_low_peaks = select_by_relative_intensity(s, intensity_from=0.001) if len(s_remove_low_peaks.peaks) >= 10: s = s_remove_low_peaks s = add_losses(s, loss_mz_from=5.0, loss_mz_to=200.0) return s
def post_process_md(spectrum_in: SpectrumType, low_int_cutoff: float = 0.05, min_peaks: int = 10, max_peaks: int = 30) -> Union[SpectrumType, None]: """Processing of spectra that are used for mass difference extraction Parameters ---------- spectrum_in: Input spectrum. low_int_cutoff: Lower intensity cutoff for the peaks selected for MD min_peaks: Minimum number of peaks to pass the spectrum (otherwise -> None) max_peaks: Maximum number of peaks allowed in the spectrum (ranked on intensity) """ if spectrum_in is None: return None s = spectrum_in.clone() # remove precurzor_mz from spectra so neutral losses don't end up in MDs s = remove_precursor_mz_peak(s) s = normalize_intensities(s) if any(np.isnan(s.peaks[1])): return None # remove spectra that have all intensities 0 s = select_by_mz(s, mz_from=0, mz_to=1000) s = require_minimum_number_of_peaks(s, n_required=min_peaks) s = reduce_to_number_of_peaks(s, n_required=min_peaks, ratio_desired=0.5) if s is None: return None # remove low peaks unless less than 10 peaks are left s_remove_low_peaks = select_by_relative_intensity(s, intensity_from=0.001) if len(s_remove_low_peaks.peaks) >= 10: s = s_remove_low_peaks # do an additional removal step with a different intensity cutoff s_second_peak_removal = select_by_relative_intensity( s, intensity_from=low_int_cutoff) if len(s_second_peak_removal.peaks) >= 10: s = s_second_peak_removal # reduce to top30 peaks s = reduce_to_number_of_peaks(s, n_required=min_peaks, n_max=max_peaks) return s
def post_process_classical(spectrum_in: SpectrumType, min_peaks: int = 10) \ -> Union[SpectrumType, None]: """Processing of spectra for calculating classical scores Parameters ---------- spectrum_in: Input spectrum. min_peaks: Minimum number of peaks to pass the spectrum (otherwise -> None) """ if spectrum_in is None: return None s = spectrum_in.clone() s = normalize_intensities(s) if any(np.isnan(s.peaks[1])): return None # remove spectra that have all intensities 0 s = select_by_mz(s, mz_from=0, mz_to=1000) s = require_minimum_number_of_peaks(s, n_required=min_peaks) s = select_by_relative_intensity(s, intensity_from=0.01, intensity_to=1.0) return s
def test_require_minimum_number_of_peaks_required_5_or_10(spectrum_in: SpectrumType): spectrum_in.set("parent_mass", 100) spectrum = require_minimum_number_of_peaks(spectrum_in, n_required=5, ratio_required=0.1) assert spectrum is None, "Did not expect the spectrum to qualify because the number of peaks (4) is less " \ "than the required number (10)."
def test_require_minimum_number_of_peaks_no_params(spectrum_in: SpectrumType): spectrum = require_minimum_number_of_peaks(spectrum_in) assert spectrum is None, "Expected None because the number of peaks (4) is less than the default threshold (10)."
def test_empty_spectrum(): spectrum_in = None spectrum = require_minimum_number_of_peaks(spectrum_in) assert spectrum is None, "Expected different handling of None spectrum."
def test_require_minimum_number_of_peaks_required_4_or_1(spectrum_in: SpectrumType): spectrum = require_minimum_number_of_peaks(spectrum_in, n_required=4, ratio_required=0.1) assert spectrum == spectrum_in, "Expected the spectrum to qualify because the number of peaks (4) is equal to the" \ "required number (4)."