Ejemplo n.º 1
0
def test_add_losses_without_precursor_mz_parameterized(mz, intensities):
    spectrum_in = SpectrumBuilder().with_mz(mz).with_intensities(
        intensities).build()
    spectrum = add_losses(spectrum_in)

    with LogCapture() as log:
        spectrum = add_losses(spectrum_in)

    assert spectrum == spectrum_in and spectrum is not spectrum_in
    log.check((
        "matchms", "WARNING",
        "No precursor_mz found. Consider applying 'add_precursor_mz' filter first."
    ))
Ejemplo n.º 2
0
def post_process_normal(spectrum_in: SpectrumType, min_peaks: int = 10) \
        -> Union[SpectrumType, None]:
    """Normal processing of spectra for Spec2Vec

    Parameters
    ----------
    spectrum_in:
        Input spectrum.
    min_peaks:
        Minimum number of peaks to pass the spectrum (otherwise -> None)
    """
    if spectrum_in is None:
        return None

    s = spectrum_in.clone()
    s = normalize_intensities(s)
    if any(np.isnan(s.peaks[1])):
        return None  # remove spectra that have all intensities 0
    s = select_by_mz(s, mz_from=0, mz_to=1000)
    s = require_minimum_number_of_peaks(s, n_required=min_peaks)
    s = reduce_to_number_of_peaks(s, n_required=min_peaks, ratio_desired=0.5)
    if s is None:
        return None
    # remove low peaks unless less than 10 peaks are left
    s_remove_low_peaks = select_by_relative_intensity(s, intensity_from=0.001)
    if len(s_remove_low_peaks.peaks) >= 10:
        s = s_remove_low_peaks
    # add losses to normally processed spectra
    s = add_losses(s, loss_mz_from=5.0, loss_mz_to=200.0)
    return s
Ejemplo n.º 3
0
def test_add_losses_returns_new_spectrum_instance():
    spectrum_in = Spectrum(mz=numpy.array([], dtype="float"),
                           intensities=numpy.array([], dtype="float"))

    spectrum = add_losses(spectrum_in)

    assert spectrum == spectrum_in and spectrum is not spectrum_in
Ejemplo n.º 4
0
def test_add_losses_returns_new_spectrum_instance():
    """Test if no change is done to empty spectrum."""
    spectrum_in = Spectrum(mz=numpy.array([], dtype="float"),
                           intensities=numpy.array([], dtype="float"))

    spectrum = add_losses(spectrum_in)

    assert spectrum == spectrum_in and spectrum is not spectrum_in
Ejemplo n.º 5
0
def test_add_losses_without_precursor_mz():
    spectrum_in = Spectrum(mz=numpy.array([100, 150, 200, 300], dtype="float"),
                           intensities=numpy.array([700, 200, 100, 1000],
                                                   dtype="float"))

    spectrum = add_losses(spectrum_in)

    assert spectrum == spectrum_in and spectrum is not spectrum_in
Ejemplo n.º 6
0
def test_add_losses_without_precursor_mz():
    """Test if no changes are done without having a precursor-m/z."""
    spectrum_in = Spectrum(mz=numpy.array([100, 150, 200, 300], dtype="float"),
                           intensities=numpy.array([700, 200, 100, 1000], dtype="float"))

    spectrum = add_losses(spectrum_in)

    assert spectrum == spectrum_in and spectrum is not spectrum_in
Ejemplo n.º 7
0
def test_add_losses():
    spectrum_in = Spectrum(mz=numpy.array([100, 150, 200, 300], dtype="float"),
                           intensities=numpy.array([700, 200, 100, 1000],
                                                   dtype="float"),
                           metadata={"precursor_mz": 445.0})

    spectrum = add_losses(spectrum_in)

    assert numpy.allclose(spectrum.losses.mz,
                          numpy.array([145, 245, 295, 345], "float"))
Ejemplo n.º 8
0
def test_add_losses_with_precursor_mz_wrong_type():
    """Test if correct assert error is raised for precursor-mz as string."""
    spectrum_in = Spectrum(mz=numpy.array([100, 150, 200, 300], dtype="float"),
                           intensities=numpy.array([700, 200, 100, 1000], dtype="float"),
                           metadata={"precursor_mz": "445.0"})

    with pytest.raises(AssertionError) as msg:
        _ = add_losses(spectrum_in)

    assert "Expected 'precursor_mz' to be a scalar number." in str(msg.value)
Ejemplo n.º 9
0
 def apply_my_filters(s):
     """This is how a user would typically design his own pre- and post-
     processing pipeline."""
     s = default_filters(s)
     s = add_parent_mass(s)
     s = normalize_intensities(s)
     s = reduce_to_number_of_peaks(s, n_required=10, ratio_desired=0.5)
     s = select_by_mz(s, mz_from=0, mz_to=1000)
     s = add_losses(s, loss_mz_from=10.0, loss_mz_to=200.0)
     s = require_minimum_number_of_peaks(s, n_required=5)
     return s
Ejemplo n.º 10
0
 def apply_my_filters(s):
     s = default_filters(s)
     s = add_parent_mass(s)
     s = add_losses(s)
     s = normalize_intensities(s)
     s = select_by_relative_intensity(s,
                                      intensity_from=0.01,
                                      intensity_to=1.0)
     s = select_by_mz(s, mz_from=0, mz_to=1000)
     s = require_minimum_number_of_peaks(s, n_required=5)
     return s
Ejemplo n.º 11
0
def spectrum_processing(s):
    """This is how one would typically design a desired pre- and post-
    processing pipeline."""
    s = default_filters(s)
    s = add_precursor_mz(s)
    s = normalize_intensities(s)
    s = reduce_to_number_of_peaks(s, n_required=5, ratio_desired=0.5, n_max=500)
    s = select_by_mz(s, mz_from=0, mz_to=1000)
    s = add_losses(s, loss_mz_from=10.0, loss_mz_to=200.0)
    s = require_minimum_number_of_peaks(s, n_required=5)
    return s
Ejemplo n.º 12
0
def test_add_losses_with_max_loss_mz_250():
    """Test if losses are correctly generated and losses with mz > 250 are discarded."""
    spectrum_in = Spectrum(mz=numpy.array([100, 150, 200, 300], dtype="float"),
                           intensities=numpy.array([700, 200, 100, 1000], dtype="float"),
                           metadata={"precursor_mz": 445.0})

    spectrum = add_losses(spectrum_in, loss_mz_to=250)

    expected_mz = numpy.array([145, 245], "float")
    expected_intensities = numpy.array([1000, 100], "float")
    assert numpy.allclose(spectrum.losses.mz, expected_mz), "Expected different loss m/z."
    assert numpy.allclose(spectrum.losses.intensities, expected_intensities), "Expected different intensities."
Ejemplo n.º 13
0
def test_add_losses():
    """Test if all losses are correctly generated form mz values and precursor-m/z."""
    spectrum_in = Spectrum(mz=numpy.array([100, 150, 200, 300], dtype="float"),
                           intensities=numpy.array([700, 200, 100, 1000], dtype="float"),
                           metadata={"precursor_mz": 445.0})

    spectrum = add_losses(spectrum_in)

    expected_mz = numpy.array([145, 245, 295, 345], "float")
    expected_intensities = numpy.array([1000, 100, 200, 700], "float")
    assert numpy.allclose(spectrum.losses.mz, expected_mz), "Expected different loss m/z."
    assert numpy.allclose(spectrum.losses.intensities, expected_intensities), "Expected different intensities."
Ejemplo n.º 14
0
def test_add_losses_with_peakmz_larger_precursormz():
    """Test if losses are correctly generated and loss < 0 is discarded."""
    spectrum_in = Spectrum(mz=numpy.array([100, 150, 200, 450], dtype="float"),
                           intensities=numpy.array([700, 200, 100, 1000], dtype="float"),
                           metadata={"precursor_mz": 445.0})

    spectrum = add_losses(spectrum_in)

    expected_mz = numpy.array([245, 295, 345], "float")
    expected_intensities = numpy.array([100, 200, 700], "float")
    assert numpy.allclose(spectrum.losses.mz, expected_mz), "Expected different loss m/z."
    assert numpy.allclose(spectrum.losses.intensities, expected_intensities), "Expected different intensities."
Ejemplo n.º 15
0
def test_add_losses_parameterized(mz, loss_mz_to, expected_mz,
                                  expected_intensities):
    intensities = numpy.array([700, 200, 100, 1000], "float")
    metadata = {"precursor_mz": 445.0}
    spectrum_in = SpectrumBuilder().with_mz(mz).with_intensities(
        intensities).with_metadata(metadata).build()

    spectrum = add_losses(spectrum_in, loss_mz_to=loss_mz_to)

    assert numpy.allclose(spectrum.losses.mz,
                          expected_mz), "Expected different loss m/z."
    assert numpy.allclose(
        spectrum.losses.intensities,
        expected_intensities), "Expected different intensities."
Ejemplo n.º 16
0
def test_spectrum_document_init_n_decimals_2():
    mz = numpy.array([10, 20, 30, 40], dtype="float")
    intensities = numpy.array([0, 1, 10, 100], dtype="float")
    metadata = dict(precursor_mz=100.0)
    spectrum_in = Spectrum(mz=mz, intensities=intensities, metadata=metadata)
    spectrum = add_losses(spectrum_in)
    spectrum_document = SpectrumDocument(spectrum, n_decimals=2)

    assert spectrum_document.n_decimals == 2
    assert len(spectrum_document) == 8
    assert spectrum_document.words == [
        "[email protected]", "[email protected]", "[email protected]", "[email protected]", "[email protected]",
        "[email protected]", "[email protected]", "[email protected]"
    ]
    assert next(spectrum_document) == "[email protected]"
Ejemplo n.º 17
0
def post_process(s):
    s = normalize_intensities(s)
    s = select_by_mz(s, mz_from=0, mz_to=1000)
    s = require_minimum_number_of_peaks(s, n_required=10)
    try:
        s = reduce_to_number_of_peaks(s, n_required=10, ratio_desired=0.5)
    except:
        pass
    if s is None:
        return None
    s_remove_low_peaks = select_by_relative_intensity(s, intensity_from=0.001)
    if len(s_remove_low_peaks.peaks) >= 10:
        s = s_remove_low_peaks

    s = add_losses(s, loss_mz_from=5.0, loss_mz_to=200.0)
    return s
Ejemplo n.º 18
0
def test_normalize_intensities_losses_present(mz, intensities, metadata,
                                              expected_losses):
    """Test if also losses (if present) are normalized correctly."""
    spectrum_in = SpectrumBuilder().with_mz(mz).with_intensities(
        intensities).with_metadata(metadata).build()

    spectrum = add_losses(spectrum_in)
    spectrum = normalize_intensities(spectrum)

    assert max(spectrum.peaks.intensities
               ) == 1.0, "Expected the spectrum to be scaled to 1.0."
    assert numpy.array_equal(spectrum.peaks.intensities, intensities /
                             100), "Expected different intensities"
    assert max(spectrum.losses.intensities
               ) == 1.0, "Expected the losses to be scaled to 1.0."
    assert numpy.all(spectrum.losses.intensities ==
                     expected_losses), "Expected different loss intensities"
def test_normalize_intensities_losses_present():
    """Test if also losses (if present) are normalized correctly."""
    mz = numpy.array([10, 20, 30, 40], dtype='float')
    intensities = numpy.array([0, 1, 10, 100], dtype='float')
    spectrum_in = Spectrum(mz=mz,
                           intensities=intensities,
                           metadata={"precursor_mz": 45.0})

    spectrum = add_losses(spectrum_in)
    spectrum = normalize_intensities(spectrum)
    expected_loss_intensities = numpy.array([1., 0.1, 0.01, 0.], dtype='float')

    assert max(spectrum.peaks.intensities
               ) == 1.0, "Expected the spectrum to be scaled to 1.0."
    assert numpy.array_equal(spectrum.peaks.intensities, intensities /
                             100), "Expected different intensities"
    assert max(spectrum.losses.intensities
               ) == 1.0, "Expected the losses to be scaled to 1.0."
    assert numpy.all(spectrum.losses.intensities == expected_loss_intensities
                     ), "Expected different loss intensities"
Ejemplo n.º 20
0
def spectrum_processing_s2v(
        spectrum: SpectrumType,
        **settings: Union[int, float]) -> Union[SpectrumType]:
    """Spectrum processing required for computing Spec2Vec scores.

    Args:
    ----------
    spectrum:
        Spectrum to process
    mz_from:
        Peaks below this value are removed. Default = 10.0
    mz_to:
        Peaks above this value are removed. Default = 1000.0
    n_required
        Number of minimal required peaks for a spectrum to be considered.
    n_max
        Maximum number of peaks to be kept per spectrum. Default is 1000.
    loss_mz_from
        Minimum allowed m/z value for losses. Default is 0.0.
    loss_mz_to
        Maximum allowed m/z value for losses. Default is 1000.0.
    """
    settings = set_spec2vec_defaults(**settings)
    spectrum = select_by_mz(spectrum,
                            mz_from=settings["mz_from"],
                            mz_to=settings["mz_to"])
    spectrum = reduce_to_number_of_peaks(spectrum,
                                         n_required=settings["n_required"],
                                         n_max=settings["n_max"])

    spectrum = add_losses(spectrum,
                          loss_mz_from=settings["loss_mz_from"],
                          loss_mz_to=settings["loss_mz_to"])
    assert spectrum is not None, \
        "Expects Spectrum that has high enough quality and is not None"
    return spectrum
Ejemplo n.º 21
0
def test_add_losses_with_input_none():
    """Test if input spectrum is None."""
    spectrum_in = None
    spectrum = add_losses(spectrum_in)
    assert spectrum is None
Ejemplo n.º 22
0
def test_add_losses_with_input_none():
    spectrum_in = None
    spectrum = add_losses(spectrum_in)
    assert spectrum is None