def test_query_analog(db): """Build an analog and try to find the original spectrum in the database. """ p, (mzs, spectra) = db for i, (mz, data) in enumerate(zip(mzs, spectra)): mz = mzs[i] - 50 data = spectra[i].copy() data[:, MZ] = data[:, MZ] - 50 filtered_analog = filter_data(mz, data, 0, 17, 50, 6) filtered_orig = filter_data(mzs[i], spectra[i], 0, 17, 50, 6) score = cosine_score(mzs[i], filtered_orig, mz, filtered_analog, 0.02, 0) results = query(str(p), [i], [mz], [filtered_analog], [], 0.02, 0, 0, 17, 50, 6, 0., 100.) assert i in results seen_i = False for r in results[i]: assert 'id' in r assert 'bank_id' in r assert 'name' in r assert 'score' in r if r['id'] == i: seen_i = True assert r['score'] == pytest.approx(score) assert seen_i
def test_filter_data_reversed(random_spectrum, min_intensity, parent_filter_tolerance, matched_peaks_window, min_matched_peaks_search): """Reversed arrays should give same results than non-reversed arrays. """ parent, data = random_spectrum filtered = np.sort(filter_data(parent, data, min_intensity, parent_filter_tolerance, matched_peaks_window, min_matched_peaks_search), axis=0) filtered_r = np.sort(filter_data(parent, data[::-1], min_intensity, parent_filter_tolerance, matched_peaks_window, min_matched_peaks_search), axis=0) assert filtered == pytest.approx(filtered_r)
def test_filter_data_parent(random_spectrum, parent_filter_tolerance): """Peaks close to the parent mass should be filtered out""" parent, data = random_spectrum # Make sure we have *m/z* in the parent+/-parent_filter_tolerance range data = data.copy() data[0, MZ] = parent + np.random.random((1,)) * parent_filter_tolerance # Make sure that excluding range is strict data[1, MZ] = parent + parent_filter_tolerance data[2, MZ] = parent - parent_filter_tolerance filtered = np.sort(filter_data(parent, data, 0, parent_filter_tolerance, 50, 6), axis=0) if parent_filter_tolerance == 0: assert filtered.shape == data.shape elif parent_filter_tolerance > 0: assert filtered.shape < data.shape for mz in filtered[:, MZ]: assert mz<parent-parent_filter_tolerance \ or mz == pytest.approx(parent-parent_filter_tolerance) \ or mz>parent+parent_filter_tolerance \ or mz == pytest.approx(parent+parent_filter_tolerance)
def test_query_in_bank(db, bank): """Test if looking for a spectra in a specific bank that is for sure in database will successfully returns this spectra. """ p, (mzs, spectra) = db for i, (mz, data) in enumerate(zip(mzs, spectra)): filtered = filter_data(mz, data, 0, 17, 50, 6) results = query(str(p), [i], [mz], [filtered], bank, 0.02, 0, 0, 17, 50, 6, 0.) ids = [] if 0 in bank: ids.append(i) if 1 in bank and i%2: ids.append(len(mzs)+i) for id in ids: if 0 in bank or (1 in bank and i%2): assert i in results seen_i = False for r in results[i]: assert 'id' in r assert 'bank_id' in r assert 'name' in r assert 'score' in r if r['id'] == id: seen_i = True assert pytest.approx(r['score']) == 1.0 assert seen_i else: assert id not in results
def test_filter_data_non_contiguous(random_spectrum, min_intensity, parent_filter_tolerance, matched_peaks_window, min_matched_peaks_search): """f-contiguous arrays should give same results than c-contiguous arrays. """ parent, data = random_spectrum filtered = np.sort(filter_data(parent, data, min_intensity, parent_filter_tolerance, matched_peaks_window, min_matched_peaks_search), axis=0) data = np.asfortranarray(data, dtype=data.dtype) filtered_nc = np.sort(filter_data(parent, data, min_intensity, parent_filter_tolerance, matched_peaks_window, min_matched_peaks_search), axis=0) assert filtered == pytest.approx(filtered_nc)
def test_filter_data_already_filtered(random_spectrum, min_intensity, parent_filter_tolerance, matched_peaks_window, min_matched_peaks_search): """If filtering an already filtered array with the same parameters, *m/z* values should not change. """ parent, data = random_spectrum data = filter_data(parent, data, min_intensity, parent_filter_tolerance, matched_peaks_window, min_matched_peaks_search) expected = np.sort(data, axis=0) filtered = np.sort(filter_data(parent, data, min_intensity, parent_filter_tolerance, matched_peaks_window, min_matched_peaks_search), axis=0) assert filtered[:, MZ] == pytest.approx(expected[:, MZ])
def test_filter_data_empty(min_intensity, parent_filter_tolerance, matched_peaks_window, min_matched_peaks_search): parent = 152.569 data = np.empty((0, 2), dtype=np.float32) filtered = filter_data(parent, data, min_intensity, parent_filter_tolerance, matched_peaks_window, min_matched_peaks_search) assert filtered.size == 0
def test_filter_data_no_filtering(random_spectrum): """If all parameters are set to zero, no *m/z* should be filtered. """ parent, data = random_spectrum expected = np.sort(data, axis=0) filtered = np.sort(filter_data(parent, data, 0, 0, 0, 0), axis=0) assert filtered.shape == expected.shape assert filtered[:, MZ] == pytest.approx(expected[:, MZ], rel=1e-4)
def test_filter_data_known(known_spectrum_filter_comparison): """Test against known results. """ parent, data, expected = known_spectrum_filter_comparison expected = np.sort(expected, axis=0) filtered = np.sort(filter_data(parent, data, 0, 17, 50, 6), axis=0) assert filtered == pytest.approx(expected)
def test_filter_data_window(random_spectrum, matched_peaks_window, min_matched_peaks_search): """If `matched_peaks_window` or `min_matched_peaks_search` is zero, no peaks should be filtered. """ parent, data = random_spectrum filtered = np.sort(filter_data(parent, data, 0, 0, matched_peaks_window, min_matched_peaks_search), axis=0) if matched_peaks_window == 0 or min_matched_peaks_search == 0: assert filtered.shape == data.shape assert filtered[:, MZ] == pytest.approx(np.sort(data[:, MZ], axis=0)) else: assert filtered.shape <= data.shape
def test_filter_data_norm(random_spectrum, min_intensity, parent_filter_tolerance, matched_peaks_window, min_matched_peaks_search): """Norm of filtered data should always be 1""" parent, data = random_spectrum data = data.copy() data[:, INTENSITY] = data[:, INTENSITY] * 100 filtered = np.sort(filter_data(parent, data, min_intensity, parent_filter_tolerance, matched_peaks_window, min_matched_peaks_search), axis=0) if filtered.size > 0: assert pytest.approx(filtered[:, INTENSITY] @ filtered[:, INTENSITY]) == 1.
def test_filter_data_python_cython(random_spectrum, min_intensity, parent_filter_tolerance, matched_peaks_window, min_matched_peaks_search): """Cythonized `filter_data` and it's fallback Python version should give the same results. """ parent, data = random_spectrum filtered_p = np.sort(filter_data.__wrapped__(parent, data, min_intensity, parent_filter_tolerance, matched_peaks_window, min_matched_peaks_search), axis=0) filtered_c = np.sort(filter_data(parent, data, min_intensity, parent_filter_tolerance, matched_peaks_window, min_matched_peaks_search), axis=0) assert filtered_p.shape == filtered_c.shape assert filtered_p == pytest.approx(filtered_c)
def test_human_readable_data_random(random_spectrum): parent, data = random_spectrum filtered = filter_data(parent, data, 0, 17, 50, 6) data = human_readable_data(filtered) assert data.shape == filtered.shape max_ = filtered[:, INTENSITY].max()**2 for i, row in enumerate(data): assert row[INTENSITY] == pytest.approx(filtered[i, INTENSITY]**2 / max_ * 100) assert data[:, INTENSITY].max() == 100 assert data[:INTENSITY].min() >= 0 assert np.array_equal(data[:, MZ], filtered[:, MZ]) assert np.array_equal(np.argsort(data[:, INTENSITY]), np.argsort(filtered[:, INTENSITY]))
def test_filter_data_low_mass(random_spectrum, min_intensity, parent_filter_tolerance, matched_peaks_window, min_matched_peaks_search): """Low mass peaks should be filtered out""" parent, data = random_spectrum # Make sure we have *m/z* below 50 data = data.copy() data[0, MZ] = np.random.random((1,)) * 50 filtered = np.sort(filter_data(parent, data, min_intensity, parent_filter_tolerance, matched_peaks_window, min_matched_peaks_search), axis=0) assert filtered.shape < data.shape if filtered.size > 0: assert filtered[:, MZ].min() > 50 for mz in filtered[:, MZ]: assert mz > 50
def test_query_python_cython(db): """Cythonized `query` and it's fallback Python version should give the same results. """ p, (mzs, spectra) = db for i, (mz, data) in enumerate(zip(mzs, spectra)): filtered = filter_data(mz, data, 0, 17, 50, 6) results_p = query.__wrapped__(str(p), [i], [mz], [filtered], [], 0.02, 0, 0, 17, 50, 6, 0.) results_c = query(str(p), [i], [mz], [filtered], [], 0.02, 0, 0, 17, 50, 6, 0.) assert results_p.keys() == results_c.keys() for k in results_c.keys(): for r_c, r_p in zip(results_c[k], results_p[k]): assert r_c['id'] == r_p['id'] assert r_c['bank_id'] == r_p['bank_id'] assert r_c['name'].decode() == r_p['name'] assert r_c['score'] == pytest.approx(r_p['score'])
def test_filter_data_min_intensity(random_spectrum, min_intensity): """Peaks higher than `min_intensity` % of maximum intensity should be filtered out""" parent, data = random_spectrum filtered = np.sort(filter_data(parent, data, min_intensity, 0, 0, 0), axis=0) if min_intensity == 0: assert filtered.shape == data.shape assert filtered[:, MZ] == pytest.approx(np.sort(data[:, MZ], axis=0)) elif min_intensity == 100: assert filtered.shape[0] == 1 elif min_intensity > 100: assert filtered.shape[0] == 0 else: assert filtered.shape <= data.shape max = filtered[:, INTENSITY].max() for intensity in filtered[:, INTENSITY]: assert intensity <= max
def test_query_random_spectra(db): """Test if looking for a spectra that is for sure in database will successfully returns this spectra. """ p, (mzs, spectra) = db for i, (mz, data) in enumerate(zip(mzs, spectra)): filtered = filter_data(mz, data, 0, 17, 50, 6) results = query(str(p), [i], [mz], [filtered], [], 0.02, 0, 0, 17, 50, 6, 0.) assert i in results seen_i = False for r in results[i]: assert 'id' in r assert 'bank_id' in r assert 'name' in r assert 'score' in r if r['id'] == i: seen_i = True assert pytest.approx(r['score']) == 1.0 assert seen_i