def test_add_losses_without_precursor_mz_parameterized(mz, intensities): spectrum_in = SpectrumBuilder().with_mz(mz).with_intensities( intensities).build() spectrum = add_losses(spectrum_in) with LogCapture() as log: spectrum = add_losses(spectrum_in) assert spectrum == spectrum_in and spectrum is not spectrum_in log.check(( "matchms", "WARNING", "No precursor_mz found. Consider applying 'add_precursor_mz' filter first." ))
def post_process_normal(spectrum_in: SpectrumType, min_peaks: int = 10) \ -> Union[SpectrumType, None]: """Normal processing of spectra for Spec2Vec Parameters ---------- spectrum_in: Input spectrum. min_peaks: Minimum number of peaks to pass the spectrum (otherwise -> None) """ if spectrum_in is None: return None s = spectrum_in.clone() s = normalize_intensities(s) if any(np.isnan(s.peaks[1])): return None # remove spectra that have all intensities 0 s = select_by_mz(s, mz_from=0, mz_to=1000) s = require_minimum_number_of_peaks(s, n_required=min_peaks) s = reduce_to_number_of_peaks(s, n_required=min_peaks, ratio_desired=0.5) if s is None: return None # remove low peaks unless less than 10 peaks are left s_remove_low_peaks = select_by_relative_intensity(s, intensity_from=0.001) if len(s_remove_low_peaks.peaks) >= 10: s = s_remove_low_peaks # add losses to normally processed spectra s = add_losses(s, loss_mz_from=5.0, loss_mz_to=200.0) return s
def test_add_losses_returns_new_spectrum_instance(): spectrum_in = Spectrum(mz=numpy.array([], dtype="float"), intensities=numpy.array([], dtype="float")) spectrum = add_losses(spectrum_in) assert spectrum == spectrum_in and spectrum is not spectrum_in
def test_add_losses_returns_new_spectrum_instance(): """Test if no change is done to empty spectrum.""" spectrum_in = Spectrum(mz=numpy.array([], dtype="float"), intensities=numpy.array([], dtype="float")) spectrum = add_losses(spectrum_in) assert spectrum == spectrum_in and spectrum is not spectrum_in
def test_add_losses_without_precursor_mz(): spectrum_in = Spectrum(mz=numpy.array([100, 150, 200, 300], dtype="float"), intensities=numpy.array([700, 200, 100, 1000], dtype="float")) spectrum = add_losses(spectrum_in) assert spectrum == spectrum_in and spectrum is not spectrum_in
def test_add_losses_without_precursor_mz(): """Test if no changes are done without having a precursor-m/z.""" spectrum_in = Spectrum(mz=numpy.array([100, 150, 200, 300], dtype="float"), intensities=numpy.array([700, 200, 100, 1000], dtype="float")) spectrum = add_losses(spectrum_in) assert spectrum == spectrum_in and spectrum is not spectrum_in
def test_add_losses(): spectrum_in = Spectrum(mz=numpy.array([100, 150, 200, 300], dtype="float"), intensities=numpy.array([700, 200, 100, 1000], dtype="float"), metadata={"precursor_mz": 445.0}) spectrum = add_losses(spectrum_in) assert numpy.allclose(spectrum.losses.mz, numpy.array([145, 245, 295, 345], "float"))
def test_add_losses_with_precursor_mz_wrong_type(): """Test if correct assert error is raised for precursor-mz as string.""" spectrum_in = Spectrum(mz=numpy.array([100, 150, 200, 300], dtype="float"), intensities=numpy.array([700, 200, 100, 1000], dtype="float"), metadata={"precursor_mz": "445.0"}) with pytest.raises(AssertionError) as msg: _ = add_losses(spectrum_in) assert "Expected 'precursor_mz' to be a scalar number." in str(msg.value)
def apply_my_filters(s): """This is how a user would typically design his own pre- and post- processing pipeline.""" s = default_filters(s) s = add_parent_mass(s) s = normalize_intensities(s) s = reduce_to_number_of_peaks(s, n_required=10, ratio_desired=0.5) s = select_by_mz(s, mz_from=0, mz_to=1000) s = add_losses(s, loss_mz_from=10.0, loss_mz_to=200.0) s = require_minimum_number_of_peaks(s, n_required=5) return s
def apply_my_filters(s): s = default_filters(s) s = add_parent_mass(s) s = add_losses(s) s = normalize_intensities(s) s = select_by_relative_intensity(s, intensity_from=0.01, intensity_to=1.0) s = select_by_mz(s, mz_from=0, mz_to=1000) s = require_minimum_number_of_peaks(s, n_required=5) return s
def spectrum_processing(s): """This is how one would typically design a desired pre- and post- processing pipeline.""" s = default_filters(s) s = add_precursor_mz(s) s = normalize_intensities(s) s = reduce_to_number_of_peaks(s, n_required=5, ratio_desired=0.5, n_max=500) s = select_by_mz(s, mz_from=0, mz_to=1000) s = add_losses(s, loss_mz_from=10.0, loss_mz_to=200.0) s = require_minimum_number_of_peaks(s, n_required=5) return s
def test_add_losses_with_max_loss_mz_250(): """Test if losses are correctly generated and losses with mz > 250 are discarded.""" spectrum_in = Spectrum(mz=numpy.array([100, 150, 200, 300], dtype="float"), intensities=numpy.array([700, 200, 100, 1000], dtype="float"), metadata={"precursor_mz": 445.0}) spectrum = add_losses(spectrum_in, loss_mz_to=250) expected_mz = numpy.array([145, 245], "float") expected_intensities = numpy.array([1000, 100], "float") assert numpy.allclose(spectrum.losses.mz, expected_mz), "Expected different loss m/z." assert numpy.allclose(spectrum.losses.intensities, expected_intensities), "Expected different intensities."
def test_add_losses(): """Test if all losses are correctly generated form mz values and precursor-m/z.""" spectrum_in = Spectrum(mz=numpy.array([100, 150, 200, 300], dtype="float"), intensities=numpy.array([700, 200, 100, 1000], dtype="float"), metadata={"precursor_mz": 445.0}) spectrum = add_losses(spectrum_in) expected_mz = numpy.array([145, 245, 295, 345], "float") expected_intensities = numpy.array([1000, 100, 200, 700], "float") assert numpy.allclose(spectrum.losses.mz, expected_mz), "Expected different loss m/z." assert numpy.allclose(spectrum.losses.intensities, expected_intensities), "Expected different intensities."
def test_add_losses_with_peakmz_larger_precursormz(): """Test if losses are correctly generated and loss < 0 is discarded.""" spectrum_in = Spectrum(mz=numpy.array([100, 150, 200, 450], dtype="float"), intensities=numpy.array([700, 200, 100, 1000], dtype="float"), metadata={"precursor_mz": 445.0}) spectrum = add_losses(spectrum_in) expected_mz = numpy.array([245, 295, 345], "float") expected_intensities = numpy.array([100, 200, 700], "float") assert numpy.allclose(spectrum.losses.mz, expected_mz), "Expected different loss m/z." assert numpy.allclose(spectrum.losses.intensities, expected_intensities), "Expected different intensities."
def test_add_losses_parameterized(mz, loss_mz_to, expected_mz, expected_intensities): intensities = numpy.array([700, 200, 100, 1000], "float") metadata = {"precursor_mz": 445.0} spectrum_in = SpectrumBuilder().with_mz(mz).with_intensities( intensities).with_metadata(metadata).build() spectrum = add_losses(spectrum_in, loss_mz_to=loss_mz_to) assert numpy.allclose(spectrum.losses.mz, expected_mz), "Expected different loss m/z." assert numpy.allclose( spectrum.losses.intensities, expected_intensities), "Expected different intensities."
def test_spectrum_document_init_n_decimals_2(): mz = numpy.array([10, 20, 30, 40], dtype="float") intensities = numpy.array([0, 1, 10, 100], dtype="float") metadata = dict(precursor_mz=100.0) spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata=metadata) spectrum = add_losses(spectrum_in) spectrum_document = SpectrumDocument(spectrum, n_decimals=2) assert spectrum_document.n_decimals == 2 assert len(spectrum_document) == 8 assert spectrum_document.words == [ "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]" ] assert next(spectrum_document) == "[email protected]"
def post_process(s): s = normalize_intensities(s) s = select_by_mz(s, mz_from=0, mz_to=1000) s = require_minimum_number_of_peaks(s, n_required=10) try: s = reduce_to_number_of_peaks(s, n_required=10, ratio_desired=0.5) except: pass if s is None: return None s_remove_low_peaks = select_by_relative_intensity(s, intensity_from=0.001) if len(s_remove_low_peaks.peaks) >= 10: s = s_remove_low_peaks s = add_losses(s, loss_mz_from=5.0, loss_mz_to=200.0) return s
def test_normalize_intensities_losses_present(mz, intensities, metadata, expected_losses): """Test if also losses (if present) are normalized correctly.""" spectrum_in = SpectrumBuilder().with_mz(mz).with_intensities( intensities).with_metadata(metadata).build() spectrum = add_losses(spectrum_in) spectrum = normalize_intensities(spectrum) assert max(spectrum.peaks.intensities ) == 1.0, "Expected the spectrum to be scaled to 1.0." assert numpy.array_equal(spectrum.peaks.intensities, intensities / 100), "Expected different intensities" assert max(spectrum.losses.intensities ) == 1.0, "Expected the losses to be scaled to 1.0." assert numpy.all(spectrum.losses.intensities == expected_losses), "Expected different loss intensities"
def test_normalize_intensities_losses_present(): """Test if also losses (if present) are normalized correctly.""" mz = numpy.array([10, 20, 30, 40], dtype='float') intensities = numpy.array([0, 1, 10, 100], dtype='float') spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata={"precursor_mz": 45.0}) spectrum = add_losses(spectrum_in) spectrum = normalize_intensities(spectrum) expected_loss_intensities = numpy.array([1., 0.1, 0.01, 0.], dtype='float') assert max(spectrum.peaks.intensities ) == 1.0, "Expected the spectrum to be scaled to 1.0." assert numpy.array_equal(spectrum.peaks.intensities, intensities / 100), "Expected different intensities" assert max(spectrum.losses.intensities ) == 1.0, "Expected the losses to be scaled to 1.0." assert numpy.all(spectrum.losses.intensities == expected_loss_intensities ), "Expected different loss intensities"
def spectrum_processing_s2v( spectrum: SpectrumType, **settings: Union[int, float]) -> Union[SpectrumType]: """Spectrum processing required for computing Spec2Vec scores. Args: ---------- spectrum: Spectrum to process mz_from: Peaks below this value are removed. Default = 10.0 mz_to: Peaks above this value are removed. Default = 1000.0 n_required Number of minimal required peaks for a spectrum to be considered. n_max Maximum number of peaks to be kept per spectrum. Default is 1000. loss_mz_from Minimum allowed m/z value for losses. Default is 0.0. loss_mz_to Maximum allowed m/z value for losses. Default is 1000.0. """ settings = set_spec2vec_defaults(**settings) spectrum = select_by_mz(spectrum, mz_from=settings["mz_from"], mz_to=settings["mz_to"]) spectrum = reduce_to_number_of_peaks(spectrum, n_required=settings["n_required"], n_max=settings["n_max"]) spectrum = add_losses(spectrum, loss_mz_from=settings["loss_mz_from"], loss_mz_to=settings["loss_mz_to"]) assert spectrum is not None, \ "Expects Spectrum that has high enough quality and is not None" return spectrum
def test_add_losses_with_input_none(): """Test if input spectrum is None.""" spectrum_in = None spectrum = add_losses(spectrum_in) assert spectrum is None
def test_add_losses_with_input_none(): spectrum_in = None spectrum = add_losses(spectrum_in) assert spectrum is None