def test_save_as_mgf_single_spectrum(): """Test saving spectrum to .mgf file""" spectrum = SpectrumBuilder().with_mz( numpy.array([100, 200, 300], dtype="float")).with_intensities( numpy.array([10, 10, 500], dtype="float")).with_metadata( { "charge": -1, "inchi": '"InChI=1S/C6H12"', "pepmass": (100, 10.0), "test_field": "test" }, metadata_harmonization=False).build() # Write to test file with tempfile.TemporaryDirectory() as d: filename = os.path.join(d, "test.mgf") save_as_mgf(spectrum, filename) # test if file exists assert os.path.isfile(filename) # Test if content of mgf file is correct with open(filename, "r", encoding="utf-8") as f: mgf_content = f.readlines() assert mgf_content[0] == "BEGIN IONS\n" assert mgf_content[2] == "CHARGE=1-\n" assert mgf_content[4] == "TEST_FIELD=test\n" assert mgf_content[7].split(" ")[0] == "300.0"
def test_save_load_mgf_consistency(tmpdir, charge, ionmode, parent_mass): """Test saving and loading spectrum to .mgf file""" mz = numpy.array([100.1, 200.02, 300.003], dtype="float") intensities = numpy.array([0.01, 0.02, 1.0], dtype="float") metadata = { "precursor_mz": 200.5, "charge": charge, "ionmode": ionmode, "parent_mass": parent_mass } builder = SpectrumBuilder().with_mz(mz).with_intensities(intensities) spectrum1 = builder.with_metadata(metadata, metadata_harmonization=True).build() spectrum2 = builder.with_metadata(metadata, metadata_harmonization=True).build() # Write to test file filename = os.path.join(tmpdir, "test.mgf") save_as_mgf([spectrum1, spectrum2], filename) # Test if file exists assert os.path.isfile(filename) # Test importing spectra again spectrum_imports = list(load_from_mgf(filename)) assert spectrum_imports[0].get("precursor_mz") == 200.5 assert spectrum_imports[0].get("charge") == charge assert spectrum_imports[0].get("ionmode") == ionmode assert spectrum_imports[0].get("parent_mass") == str(parent_mass)
def test_save_as_mgf_spectrum_list(): """Test saving spectrum list to .mgf file""" mz = numpy.array([100, 200, 300], dtype="float") intensities = numpy.array([10, 10, 500], dtype="float") builder = SpectrumBuilder().with_mz(mz).with_intensities(intensities) spectrum1 = builder.with_metadata({ "test_field": "test1" }, metadata_harmonization=False).build() spectrum2 = builder.with_metadata({ "test_field": "test2" }, metadata_harmonization=False).build() # Write to test file with tempfile.TemporaryDirectory() as d: filename = os.path.join(d, "test.mgf") save_as_mgf([spectrum1, spectrum2], filename) # test if file exists assert os.path.isfile(filename) # Test if content of mgf file is correct with open(filename, "r", encoding="utf-8") as f: mgf_content = f.readlines() assert mgf_content[5] == mgf_content[12] == "END IONS\n" assert mgf_content[1].split("=")[1] == "test1\n" assert mgf_content[8].split("=")[1] == "test2\n"
def test_save_as_mgf_spectrum_list(): """Test saving spectrum list to .mgf file""" spectrum1 = Spectrum(mz=numpy.array([100, 200, 300], dtype="float"), intensities=numpy.array([10, 10, 500], dtype="float"), metadata={"test_field": "test1"}) spectrum2 = Spectrum(mz=numpy.array([100, 200, 300], dtype="float"), intensities=numpy.array([10, 10, 500], dtype="float"), metadata={"test_field": "test2"}) # Write to test file with tempfile.TemporaryDirectory() as d: filename = os.path.join(d, "test.mgf") save_as_mgf([spectrum1, spectrum2], filename) # test if file exists assert os.path.isfile(filename) # Test if content of mgf file is correct with open(filename, "r") as f: mgf_content = f.readlines() assert mgf_content[5] == mgf_content[12] == "END IONS\n" assert mgf_content[1].split("=")[1] == "test1\n" assert mgf_content[8].split("=")[1] == "test2\n"
def main(argv): parser = argparse.ArgumentParser( description="Compute MSP similarity scores") parser.add_argument("--spectra", type=str, required=True, help="Mass spectra file to be filtered.") parser.add_argument("--spectra_format", type=str, required=True, help="Format of spectra file.") parser.add_argument("--output", type=str, required=True, help="Filtered mass spectra file.") parser.add_argument( "-normalise_intensities", action='store_true', help="Normalize intensities of peaks (and losses) to unit height.") parser.add_argument( "-default_filters", action='store_true', help= "Collection of filters that are considered default and that do no require any (factory) arguments." ) parser.add_argument( "-clean_metadata", action='store_true', help= "Apply all adding and cleaning filters if possible, so that the spectra have canonical metadata." ) parser.add_argument( "-relative_intensity", action='store_true', help= "Keep only peaks within set relative intensity range (keep if to_intensity >= intensity >= from_intensity)." ) parser.add_argument("--from_intensity", type=float, help="Lower bound for intensity filter") parser.add_argument("--to_intensity", type=float, help="Upper bound for intensity filter") parser.add_argument( "-mz_range", action='store_true', help= "Keep only peaks between set m/z range (keep if to_mz >= m/z >= from_mz)." ) parser.add_argument("--from_mz", type=float, help="Lower bound for m/z filter") parser.add_argument("--to_mz", type=float, help="Upper bound for m/z filter") args = parser.parse_args() if not (args.normalise_intensities or args.default_filters or args.clean_metadata or args.relative_intensity or args.mz_range): raise ValueError('No filter selected.') if args.spectra_format == 'msp': spectra = list(load_from_msp(args.spectra)) elif args.queries_format == 'mgf': spectra = list(load_from_mgf(args.spectra)) else: raise ValueError( f'File format {args.spectra_format} not supported for mass spectra file.' ) filtered_spectra = [] for spectrum in spectra: if args.normalise_intensities: spectrum = normalize_intensities(spectrum) if args.default_filters: spectrum = default_filters(spectrum) if args.clean_metadata: filters = [ add_compound_name, add_precursor_mz, add_fingerprint, add_losses, add_parent_mass, add_retention_index, add_retention_time, clean_compound_name ] for metadata_filter in filters: spectrum = metadata_filter(spectrum) if args.relative_intensity: spectrum = select_by_relative_intensity(spectrum, args.from_intensity, args.to_intensity) if args.mz_range: spectrum = select_by_mz(spectrum, args.from_mz, args.to_mz) filtered_spectra.append(spectrum) if args.spectra_format == 'msp': save_as_msp(filtered_spectra, args.output) else: save_as_mgf(filtered_spectra, args.output) return 0