def safe_read_key(spectrum: Spectrum, key: str) -> Optional[float]: """ Read key from spectrum and convert to float or return 'None'. Tries to read the given key from the spectrum metadata and convert it to a float. In case an exception is thrown or the key is not present, returns 'None'. Parameters ---------- spectrum: Spectrum from which to read the key. key: Key to be read from the spectrum metadata. Returns ------- Either the key's value converted to float or 'None'. """ value = spectrum.get(key, default=None) if value is not None: try: value = float(value) except ValueError: # RT is in format that can't be converted to float -> set rt to None value = None return value
def test_clean_inchis_harmonize_strings(): """Test if inchi strings are made consistent in style. """ spectrum_in1 = Spectrum(mz=np.array([], dtype='float'), intensities=np.array([], dtype='float'), metadata={"inchi": 'InChI=1S/C6H12'}) spectrum_in2 = Spectrum(mz=np.array([], dtype='float'), intensities=np.array([], dtype='float'), metadata={"inchi": '1S/C6H12'}) spectrum1 = clean_inchis(spectrum_in1) spectrum2 = clean_inchis(spectrum_in2) assert spectrum1.get("inchi").startswith( '"InChI='), "InChI style not as expected" assert spectrum1 == spectrum2, 'after cleaning both spectra should be equal'
def test_train_new_word2vec_model_with_logger_and_saving(tmp_path): """Test training of a dummy model and save it.""" # Create fake corpus documents = [] for i in range(100): spectrum = Spectrum(mz=numpy.linspace(i, 9+i, 10), intensities=numpy.ones((10)).astype("float"), metadata={}) documents.append(SpectrumDocument(spectrum, n_decimals=1)) # Train model and write to file filename = os.path.join(tmp_path, "test.model") model = train_new_word2vec_model(documents, iterations=20, filename=filename, size=20, progress_logger=True) # Test if file exists assert os.path.isfile(filename), "Could not find saved model file." # Test if saved model seems to be correct model = gensim.models.Word2Vec.load(filename) assert model.sg == 0, "Expected different default value." assert model.negative == 5, "Expected different default value." assert model.window == 500, "Expected different default value." assert model.alpha == 0.025, "Expected different default value." assert model.min_alpha == 0.02, "Expected different default value." assert model.epochs == 20, "Expected differnt number of epochs." assert model.wv.vector_size == 20, "Expected differnt vector size." assert len(model.wv.vocab) == 109, "Expected different number of words in vocab." assert model.wv.get_vector(documents[0].words[1]).shape[0] == 20, "Expected differnt vector size."
def test_clean_compound_name_removing_known_non_name_parts(): """Test difficult but representative examples.""" test_name_strings = [ [ "MLS000863588-01!2-methoxy-3-methyl-9H-carbazole", "2-methoxy-3-methyl-9H-carbazole" ], ["NCGC00160217-01!SOPHOCARPINE", "SOPHOCARPINE"], ["0072_2-Mercaptobenzothiaz", "2-Mercaptobenzothiaz"], [ r"MassbankEU:ET110206 NPE_327.1704_12.2|N-succinylnorpheniramine", "N-succinylnorpheniramine" ], ["Massbank:CE000307 Trans-Zeatin-[d5]", "Trans-Zeatin-[d5]"], ["HMDB:HMDB00500-718 4-Hydroxybenzoic acid", "4-Hydroxybenzoic acid"], ["MoNA:2346734 Piroxicam (Feldene)", "Piroxicam (Feldene)"], ["ReSpect:PS013405 option1|option2|option3", "option3"], ["ReSpect:PS013405 option1name", "option1name"], [ "4,4-Dimethylcholest-8(9),24-dien-3.beta.-ol 231.2", "4,4-Dimethylcholest-8(9),24-dien-3.beta.-ol" ], ] for name_strings in test_name_strings: spectrum_in = Spectrum(mz=numpy.array([], dtype="float"), intensities=numpy.array([], dtype="float"), metadata={"compound_name": name_strings[0]}) spectrum = clean_compound_name(spectrum_in) assert spectrum.get("compound_name") == name_strings[ 1], "Expected different cleaned name."
def test_modified_cosine_with_mass_shift_5(): """Test modified cosine on two spectra with mass set shift.""" spectrum_1 = Spectrum(mz=numpy.array([100, 150, 200, 300, 500, 510, 1100], dtype="float"), intensities=numpy.array([700, 200, 100, 1000, 200, 5, 500], dtype="float"), metadata={"precursor_mz": 1000.0}) spectrum_2 = Spectrum(mz=numpy.array([55, 105, 205, 304.5, 494.5, 515.5, 1045], dtype="float"), intensities=numpy.array([700, 200, 100, 1000, 200, 5, 500], dtype="float"), metadata={"precursor_mz": 1005.0}) norm_spectrum_1 = normalize_intensities(spectrum_1) norm_spectrum_2 = normalize_intensities(spectrum_2) modified_cosine = ModifiedCosine() score = modified_cosine.pair(norm_spectrum_1, norm_spectrum_2) assert score["score"] == pytest.approx(0.081966, 0.0001), "Expected different cosine score." assert score["matches"] == 2, "Expected 2 matching peaks."
def test_select_by_relative_intensity_with_to_parameter_too_large(): mz = numpy.array([10, 20, 30, 40], dtype="float") intensities = numpy.array([1, 10, 100, 1000], dtype="float") spectrum_in = Spectrum(mz=mz, intensities=intensities) with pytest.raises(AssertionError): select_by_relative_intensity(spectrum_in, intensity_to=10.0)
def test_add_fingerprint_no_smiles_no_inchi(): """Test if fingerprint it generated correctly.""" spectrum_in = Spectrum(mz=numpy.array([], dtype="float"), intensities=numpy.array([], dtype="float"), metadata={}) spectrum = add_fingerprint(spectrum_in) assert spectrum.get("fingerprint", None) is None, "Expected None."
def test_add_losses_returns_new_spectrum_instance(): """Test if no change is done to empty spectrum.""" spectrum_in = Spectrum(mz=numpy.array([], dtype="float"), intensities=numpy.array([], dtype="float")) spectrum = add_losses(spectrum_in) assert spectrum == spectrum_in and spectrum is not spectrum_in
def test_add_losses_without_precursor_mz(): """Test if no changes are done without having a precursor-m/z.""" spectrum_in = Spectrum(mz=numpy.array([100, 150, 200, 300], dtype="float"), intensities=numpy.array([700, 200, 100, 1000], dtype="float")) spectrum = add_losses(spectrum_in) assert spectrum == spectrum_in and spectrum is not spectrum_in
def test_cosine_score_greedy_with_tolerance_2_0(): spectrum_1 = Spectrum(mz=numpy.array([100, 200, 299, 300, 301, 500, 510], dtype="float"), intensities=numpy.array([10, 10, 500, 100, 200, 20, 100], dtype="float"), metadata=dict()) spectrum_2 = Spectrum(mz=numpy.array([100, 200, 300, 301, 500, 512], dtype="float"), intensities=numpy.array([10, 10, 500, 100, 20, 100], dtype="float"), metadata=dict()) norm_spectrum_1 = normalize_intensities(spectrum_1) norm_spectrum_2 = normalize_intensities(spectrum_2) cosine_greedy = CosineGreedyVectorial(tolerance=2.0) score, n_matches = cosine_greedy(norm_spectrum_1, norm_spectrum_2) assert score == pytest.approx(0.903412, 0.0001), "Expected different cosine score." assert n_matches == 6
def test_modified_cosine_with_mass_shift_5_no_matches_expected(): """Test modified cosine on two spectra with no expected matches.""" spectrum_1 = Spectrum(mz=numpy.array([100, 200, 300], dtype="float"), intensities=numpy.array([10, 10, 500], dtype="float"), metadata={"precursor_mz": 1000.0}) spectrum_2 = Spectrum(mz=numpy.array([120, 220, 320], dtype="float"), intensities=numpy.array([10, 10, 500], dtype="float"), metadata={"precursor_mz": 1005}) norm_spectrum_1 = normalize_intensities(spectrum_1) norm_spectrum_2 = normalize_intensities(spectrum_2) modified_cosine = ModifiedCosine(tolerance=1.0) score = modified_cosine.pair(norm_spectrum_1, norm_spectrum_2) assert score["score"] == pytest.approx(0.0, 1e-5), "Expected different modified cosine score." assert score["matches"] == 0, "Expected 0 matching peaks."
def test_harmonize_undefined_inchi_na_3(): spectrum_in = Spectrum(mz=numpy.array([], dtype="float"), intensities=numpy.array([], dtype="float"), metadata={"inchi": "NA"}) spectrum = harmonize_undefined_inchi(spectrum_in) assert spectrum.get("inchi") == ""
def test_spectra(): """Returns a list with two spectra The spectra are created by using peaks from the first two spectra in 100_test_spectra.pickle, to make sure that the peaks occur in the s2v model. The other values are random. """ spectrum1 = Spectrum( mz=np.array([ 808.27356, 872.289917, 890.246277, 891.272888, 894.326416, 904.195679, 905.224548, 908.183472, 922.178101, 923.155762 ], dtype="float"), intensities=np.array([ 0.11106008, 0.12347332, 0.16352988, 0.17101522, 0.17312992, 0.19262333, 0.21442898, 0.42173288, 0.51071955, 1. ], dtype="float"), metadata={ 'pepmass': (907.0, None), 'spectrumid': 'CCMSLIB00000001760', 'precursor_mz': 907.0, # 'precursor_mz': 905.9927235480093, 'inchikey': 'SCYRNRIZFGMUSB-STOGWRBBSA-N', 'charge': 1 }) spectrum2 = Spectrum( mz=np.array([ 538.003174, 539.217773, 556.030396, 599.352783, 851.380859, 852.370605, 909.424438, 953.396606, 963.686768, 964.524658 ], dtype="float"), intensities=np.array([ 0.28046377, 0.28900242, 0.31933114, 0.32199162, 0.34214536, 0.35616456, 0.36216307, 0.41616014, 0.71323034, 1. ], dtype="float"), metadata={ 'pepmass': (928.0, None), 'spectrumid': 'CCMSLIB00000001761', 'precursor_mz': 928.0, # 'precursor_mz': 905.010782, 'inchikey': 'SCYRNRIZFGMUSB-STOGWRBBSA-N', # 'charge': 1 }) return [spectrum1, spectrum2]
def test_derive_ionmode_positive_adduct(): spectrum_in = Spectrum(mz=numpy.array([], dtype="float"), intensities=numpy.array([], dtype="float"), metadata={"adduct": "[M+H]"}) spectrum = derive_ionmode(spectrum_in) assert spectrum.get("ionmode") == "positive", "Expected different ionmode."
def test_harmonize_undefined_inchikey_no_data(): spectrum_in = Spectrum(mz=numpy.array([], dtype="float"), intensities=numpy.array([], dtype="float"), metadata={"inchikey": "no data"}) spectrum = harmonize_undefined_inchikey(spectrum_in) assert spectrum.get("inchikey") == ""
def test_modified_cosine_without_precursor_mz(): """Test without precursor-m/z. Should raise assertion error.""" spectrum_1 = Spectrum(mz=numpy.array([100, 150, 200, 300, 500, 510, 1100], dtype="float"), intensities=numpy.array([700, 200, 100, 1000, 200, 5, 500], dtype="float")) spectrum_2 = Spectrum(mz=numpy.array([100, 140, 190, 300, 490, 510, 1090], dtype="float"), intensities=numpy.array([700, 200, 100, 1000, 200, 5, 500], dtype="float")) norm_spectrum_1 = normalize_intensities(spectrum_1) norm_spectrum_2 = normalize_intensities(spectrum_2) modified_cosine = ModifiedCosine() with pytest.raises(AssertionError) as msg: modified_cosine.pair(norm_spectrum_1, norm_spectrum_2) expected_message = "Precursor_mz missing. Apply 'add_precursor_mz' filter first." assert str(msg.value) == expected_message
def test_add_losses_without_precursor_mz(): spectrum_in = Spectrum(mz=numpy.array([100, 150, 200, 300], dtype="float"), intensities=numpy.array([700, 200, 100, 1000], dtype="float")) spectrum = add_losses(spectrum_in) assert spectrum == spectrum_in and spectrum is not spectrum_in
def test_modified_cosine_with_mass_shift_5_tolerance_2(): """Test modified cosine on two spectra with mass set shift and tolerance.""" spectrum_1 = Spectrum(mz=numpy.array([100, 200, 299, 300, 301, 500, 510], dtype="float"), intensities=numpy.array([10, 10, 500, 100, 200, 20, 100], dtype="float"), metadata={"precursor_mz": 1000.0}) spectrum_2 = Spectrum(mz=numpy.array([105, 205, 305, 306, 505, 517], dtype="float"), intensities=numpy.array([10, 10, 500, 100, 20, 100], dtype="float"), metadata={"precursor_mz": 1005}) norm_spectrum_1 = normalize_intensities(spectrum_1) norm_spectrum_2 = normalize_intensities(spectrum_2) modified_cosine = ModifiedCosine(tolerance=2.0) score = modified_cosine.pair(norm_spectrum_1, norm_spectrum_2) assert score["score"] == pytest.approx(0.96788, 0.0001), "Expected different modified cosine score." assert score["matches"] == 6, "Expected 6 matching peaks."
def test_fingerprint_similarity_parallel_cosine_empty_fingerprint(): """Test cosine score matrix with empty fingerprint.""" fingerprint1 = numpy.array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) spectrum1 = Spectrum(mz=numpy.array([], dtype="float"), intensities=numpy.array([], dtype="float"), metadata={"fingerprint": fingerprint1}) fingerprint2 = numpy.array([0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1]) spectrum2 = Spectrum(mz=numpy.array([], dtype="float"), intensities=numpy.array([], dtype="float"), metadata={"fingerprint": fingerprint2}) similarity_measure = FingerprintSimilarityParallel(similarity_measure="cosine") score_matrix = similarity_measure([spectrum1, spectrum2], [spectrum1, spectrum2]) assert score_matrix == pytest.approx(numpy.array([[0, 0], [0, 1.]]), 0.001), "Expected different values."
def test_harmonize_undefined_smiles_empty_string(): spectrum_in = Spectrum(mz=numpy.array([], dtype="float"), intensities=numpy.array([], dtype="float"), metadata={"smiles": ""}) spectrum = harmonize_undefined_smiles(spectrum_in) assert spectrum.get("smiles") == ""
def test_cosine_greedy_with_arrays_symmetric(): """Test if matrix with is_symmetric=True works properly.""" spectrum_1 = Spectrum(mz=numpy.array([100, 200, 300], dtype="float"), intensities=numpy.array([0.1, 0.2, 1.0], dtype="float")) spectrum_2 = Spectrum(mz=numpy.array([110, 190, 290], dtype="float"), intensities=numpy.array([0.5, 0.2, 1.0], dtype="float")) spectrums = [spectrum_1, spectrum_2] cosine_greedy = CosineGreedy() scores = cosine_greedy.matrix(spectrums, spectrums, is_symmetric=True) assert scores[0][0][0] == pytest.approx( scores[1][1][0], 0.000001), "Expected different cosine score." assert scores[0][1][0] == pytest.approx( scores[1][0][0], 0.000001), "Expected different cosine score."
def test_derive_formula_from_name_examples(string_addition, expected_formula): spectrum_in = Spectrum(mz=numpy.array([], dtype="float"), intensities=numpy.array([], dtype="float"), metadata={"compound_name": "peptideXYZ [M+H+K] "+string_addition}) spectrum = derive_formula_from_name(spectrum_in) assert spectrum.get("formula") == expected_formula, "Expected different formula."
def test_precursormz_match_missing_precursormz(): """Test with missing precursormz.""" spectrum_1 = Spectrum(mz=numpy.array([], dtype="float"), intensities=numpy.array([], dtype="float"), metadata={"precursor_mz": 100.0}) spectrum_2 = Spectrum(mz=numpy.array([], dtype="float"), intensities=numpy.array([], dtype="float"), metadata={}) similarity_score = PrecursorMzMatch(tolerance=2.0) with pytest.raises(AssertionError) as msg: _ = similarity_score.pair(spectrum_1, spectrum_2) expected_message_part = "Missing precursor m/z." assert expected_message_part in str(msg.value), "Expected particular error message."
def test_harmonize_undefined_smiles_alias_nan_undefined_is_na(): spectrum_in = Spectrum(mz=numpy.array([], dtype="float"), intensities=numpy.array([], dtype="float"), metadata={"smiles": "nan"}) spectrum = harmonize_undefined_smiles( spectrum_in, aliases=["nodata", "NaN", "Nan", "nan"], undefined="n/a") assert spectrum.get("smiles") == "n/a"
def test_derive_formula_from_name_no_name_given(): spectrum_in = Spectrum(mz=numpy.array([], dtype="float"), intensities=numpy.array([], dtype="float"), metadata={}) spectrum = derive_formula_from_name(spectrum_in) assert spectrum.get("formula", None) is None, "Expected None for adduct." assert spectrum.get("compound_name", None) is None, "Expected None for name."
def test_derive_formula_from_name_default(): spectrum_in = Spectrum(mz=numpy.array([], dtype="float"), intensities=numpy.array([], dtype="float"), metadata={"compound_name": "peptideXYZ [M+H+K] C5H12NO2"}) spectrum = derive_formula_from_name(spectrum_in) assert spectrum.get("formula") == "C5H12NO2", "Expected different formula." assert spectrum.get("compound_name") == "peptideXYZ [M+H+K]", "Expected different cleaned name."
def test_fingerprint_similarity_pair_calculations(test_method, expected_score): """Test cosine score pair with two fingerprint.""" fingerprint1 = numpy.array( [1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0]) spectrum1 = Spectrum(mz=numpy.array([], dtype="float"), intensities=numpy.array([], dtype="float"), metadata={"fingerprint": fingerprint1}) fingerprint2 = numpy.array( [0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1]) spectrum2 = Spectrum(mz=numpy.array([], dtype="float"), intensities=numpy.array([], dtype="float"), metadata={"fingerprint": fingerprint2}) similarity_measure = FingerprintSimilarity(similarity_measure=test_method) score_pair = similarity_measure.pair(spectrum1, spectrum2) assert score_pair == pytest.approx(expected_score, 1e-6), "Expected different score."
def test_reduce_to_number_of_peaks_no_params(): """Use default parameters.""" mz = numpy.array([10, 20, 30, 40], dtype="float") intensities = numpy.array([0, 1, 10, 100], dtype="float") spectrum_in = Spectrum(mz=mz, intensities=intensities) spectrum = reduce_to_number_of_peaks(spectrum_in) assert spectrum == spectrum_in, "Expected no changes."
def test_spectrum_getters_return_copies(): """Test if getters return (deep)copies so that edits won't change the original entries.""" spectrum = Spectrum(mz=numpy.array([100.0, 101.0], dtype="float"), intensities=numpy.array([0.4, 0.5], dtype="float"), metadata={"testdata": 1}) # Get entries and modify testdata = spectrum.get("testdata") testdata += 1 assert spectrum.get("testdata") == 1, "Expected different entry" peaks_mz = spectrum.peaks.mz peaks_mz += 100.0 assert numpy.all(spectrum.peaks.mz == numpy.array( [100.0, 101.0])), "Expected different peaks.mz" metadata = spectrum.metadata metadata["added_info"] = "this" assert spectrum.metadata == { 'testdata': 1 }, "Expected metadata to remain unchanged"
def test_require_minimum_number_of_peaks_no_params(): mz = numpy.array([10, 20, 30, 40], dtype="float") intensities = numpy.array([0, 1, 10, 100], dtype="float") spectrum_in = Spectrum(mz=mz, intensities=intensities) spectrum = require_minimum_number_of_peaks(spectrum_in) assert spectrum is None, "Expected None because the number of peaks (4) is less than the default threshold (10)."