Example #1
0
def test_cosine_hungarian_with_tolerance_2_0():
    """Compare output cosine score for tolerance 2.0 with own calculation on simple dummy spectrums."""
    spectrum_1 = Spectrum(mz=numpy.array([100, 299, 300, 301, 510],
                                         dtype="float"),
                          intensities=numpy.array([0.1, 1.0, 0.2, 0.3, 0.4],
                                                  dtype="float"))

    spectrum_2 = Spectrum(mz=numpy.array([100, 300, 301, 511], dtype="float"),
                          intensities=numpy.array([0.1, 1.0, 0.3, 0.4],
                                                  dtype="float"))
    cosine_hungarian = CosineHungarian(tolerance=2.0)
    score, n_matches = cosine_hungarian.pair(spectrum_1, spectrum_2)

    # Derive expected cosine score
    expected_matches = [[0, 1, 3, 4], [
        0, 1, 2, 3
    ]]  # Those peaks have matching mz values (within given tolerance)
    multiply_matching_intensities = spectrum_1.peaks.intensities[expected_matches[0]] \
        * spectrum_2.peaks.intensities[expected_matches[1]]
    denominator = numpy.sqrt((spectrum_1.peaks.intensities ** 2).sum()) \
        * numpy.sqrt((spectrum_2.peaks.intensities ** 2).sum())
    expected_score = multiply_matching_intensities.sum() / denominator

    assert score == pytest.approx(expected_score,
                                  0.0001), "Expected different cosine score."
    assert n_matches == len(
        expected_matches[0]), "Expected different number of matching peaks."
Example #2
0
def test_cosine_hungarian_without_parameters():
    """Compare output cosine score with own calculation on simple dummy spectrums."""
    spectrum_1 = Spectrum(mz=numpy.array([100, 200, 300, 500, 510],
                                         dtype="float"),
                          intensities=numpy.array([0.1, 0.2, 1.0, 0.3, 0.4],
                                                  dtype="float"))

    spectrum_2 = Spectrum(mz=numpy.array([100, 200, 290, 490, 510],
                                         dtype="float"),
                          intensities=numpy.array([0.1, 0.2, 1.0, 0.3, 0.4],
                                                  dtype="float"))
    cosine_hungarian = CosineHungarian()
    score = cosine_hungarian.pair(spectrum_1, spectrum_2)

    # Derive expected cosine score
    expected_matches = [
        0, 1, 4
    ]  # Those peaks have matching mz values (within given tolerance)
    multiply_matching_intensities = spectrum_1.peaks.intensities[expected_matches] \
        * spectrum_2.peaks.intensities[expected_matches]
    denominator = numpy.sqrt((spectrum_1.peaks.intensities ** 2).sum()) \
        * numpy.sqrt((spectrum_2.peaks.intensities ** 2).sum())
    expected_score = multiply_matching_intensities.sum() / denominator

    assert score["score"] == pytest.approx(
        expected_score, 0.0001), "Expected different cosine score."
    assert score["matches"] == len(
        expected_matches), "Expected different number of matching peaks."
Example #3
0
def test_cosine_hungarian_case_without_matches():
    """Test case for spectrums without any matching peaks."""
    spectrum_1 = Spectrum(mz=numpy.array([100, 200], dtype="float"),
                          intensities=numpy.array([1.0, 0.1], dtype="float"),
                          metadata={})

    spectrum_2 = Spectrum(mz=numpy.array([110, 210], dtype="float"),
                          intensities=numpy.array([1.0, 0.1], dtype="float"),
                          metadata={})

    cosine_hungarian = CosineHungarian()
    score, n_matches = cosine_hungarian.pair(spectrum_1, spectrum_2)
    assert score == 0.0, "Expected different cosine score."
    assert n_matches == 0, "Expected different number of matching peaks."
Example #4
0
def test_cosine_hungarian_case_where_greedy_would_fail():
    """Test case that would fail for cosine greedy implementations."""
    spectrum_1 = Spectrum(mz=numpy.array([100.005, 100.016], dtype="float"),
                          intensities=numpy.array([1.0, 0.9], dtype="float"),
                          metadata={})

    spectrum_2 = Spectrum(mz=numpy.array([100.005, 100.01], dtype="float"),
                          intensities=numpy.array([0.9, 1.0], dtype="float"),
                          metadata={})

    cosine_hungarian = CosineHungarian(tolerance=0.01)
    score, n_matches = cosine_hungarian.pair(spectrum_1, spectrum_2)
    assert score == pytest.approx(0.994475,
                                  0.0001), "Expected different cosine score."
    assert n_matches == 2, "Expected different number of matching peaks."
Example #5
0
def test_cosine_hungarian_order_of_arguments():
    """Compare cosine scores for A,B versus B,A, which should give the same score."""
    spectrum_1 = Spectrum(mz=numpy.array([100, 200, 299, 300, 301, 500, 510],
                                         dtype="float"),
                          intensities=numpy.array(
                              [0.02, 0.02, 1.0, 0.2, 0.4, 0.04, 0.2],
                              dtype="float"),
                          metadata=dict())

    spectrum_2 = Spectrum(
        mz=numpy.array([100, 200, 300, 301, 500, 512], dtype="float"),
        intensities=numpy.array([0.02, 0.02, 1.0, 0.2, 0.04, 0.2],
                                dtype="float"),
        metadata=dict())

    cosine_hungarian = CosineHungarian(tolerance=2.0)
    score_1_2, n_matches_1_2 = cosine_hungarian.pair(spectrum_1, spectrum_2)
    score_2_1, n_matches_2_1 = cosine_hungarian.pair(spectrum_2, spectrum_1)

    assert score_1_2 == score_2_1, "Expected that the order of the arguments would not matter."
    assert n_matches_1_2 == n_matches_2_1, "Expected that the order of the arguments would not matter."
Example #6
0
def test_cosine_hungarian_with_peak_powers():
    """Compare output cosine score with own calculation on simple dummy spectrums.
    Here testing the options to raise peak intensities to given powers.
    """
    mz_power = 0.5
    intensity_power = 2.0
    spectrum_1 = Spectrum(mz=numpy.array([100, 200, 300, 500, 510],
                                         dtype="float"),
                          intensities=numpy.array([0.1, 0.2, 1.0, 0.3, 0.4],
                                                  dtype="float"))

    spectrum_2 = Spectrum(mz=numpy.array([100, 200, 290, 490, 510],
                                         dtype="float"),
                          intensities=numpy.array([0.1, 0.2, 1.0, 0.3, 0.4],
                                                  dtype="float"))
    cosine_hungarian = CosineHungarian(tolerance=1.0,
                                       mz_power=mz_power,
                                       intensity_power=intensity_power)
    score, n_matches = cosine_hungarian.pair(spectrum_1, spectrum_2)

    # Derive expected cosine score
    matches = [
        0, 1, 4
    ]  # Those peaks have matching mz values (within given tolerance)
    intensity1 = spectrum_1.peaks.intensities
    mz1 = spectrum_1.peaks.mz
    intensity2 = spectrum_2.peaks.intensities
    mz2 = spectrum_2.peaks.mz
    multiply_matching_intensities = (mz1[matches] ** mz_power) * (intensity1[matches] ** intensity_power) \
        * (mz2[matches] ** mz_power) * (intensity2[matches] ** intensity_power)
    denominator = numpy.sqrt((((mz1 ** mz_power) * (intensity1 ** intensity_power)) ** 2).sum()) \
        * numpy.sqrt((((mz2 ** mz_power) * (intensity2 ** intensity_power)) ** 2).sum())
    expected_score = multiply_matching_intensities.sum() / denominator

    assert score == pytest.approx(expected_score,
                                  0.0001), "Expected different cosine score."
    assert n_matches == len(
        matches), "Expected different number of matching peaks."