def test_idzip(): runner = CliRunner(mix_stderr=False) path = datafile("20150710_3um_AGP_001_29_30.mzML.gz") stdin_data = io.BytesIO(open(path, 'rb').read()) result = runner.invoke( indexing.idzip_compression, ['-'], input=stdin_data) assert b"Detected gzip input file" in result.stderr_bytes outbuff = io.BytesIO(result.stdout_bytes) outstream = _compression.GzipFile(fileobj=outbuff, mode='rb') instream = _compression.GzipFile(path, mode='rb') in_data = instream.read() out_data = outstream.read() assert in_data == out_data path = datafile("small.mzML") stdin_data = io.BytesIO(open(path, 'rb').read()) result = runner.invoke( indexing.idzip_compression, ['-'], input=stdin_data) assert b"Detected gzip input file" not in result.stderr_bytes outbuff = io.BytesIO(result.stdout_bytes) outstream = _compression.GzipFile(fileobj=outbuff, mode='rb') instream = io.open(path, mode='rb') in_data = instream.read() out_data = outstream.read() assert in_data == out_data
def test_ms_deisotope(): runner = CliRunner(mix_stderr=False) path = datafile("20150710_3um_AGP_001_29_30.mzML.gz") reference = datafile("20150710_3um_AGP_001_29_30.preprocessed.mzML.gz") outpath = tempfile.mktemp() result = runner.invoke(deisotoper.deisotope, [ "-b", 0, "-t", 20, "-tn", 10, "-m", 3, "-mn", 1, path, outpath ]) result_reader = ProcessedMzMLDeserializer(outpath) reference_reader = ProcessedMzMLDeserializer(_compression.get_opener(reference)) assert len(result_reader) == len(reference_reader) for a_bunch, b_bunch in zip(result_reader, reference_reader): assert len(a_bunch.products) == len(b_bunch.products) aprec = a_bunch.precursor bprec = b_bunch.precursor assert aprec.id == bprec.id diffa, diffb = diff_deconvoluted_peak_set( aprec.deconvoluted_peak_set, bprec.deconvoluted_peak_set) assert len(aprec.deconvoluted_peak_set) == len( bprec.deconvoluted_peak_set), "Peak Counts Diff On %r, (%r, %r)" % (aprec.id, diffa, diffb) assert aprec.deconvoluted_peak_set == bprec.deconvoluted_peak_set, "Peaks Diff On %r, (%r, %r)" % ( aprec.id, diffa, diffb) for aprod, bprod in zip(a_bunch.products, b_bunch.products): assert aprod.id == bprod.id diffa, diffb = diff_deconvoluted_peak_set(aprod.deconvoluted_peak_set, bprod.deconvoluted_peak_set) assert len(aprod.deconvoluted_peak_set) == len( bprod.deconvoluted_peak_set), "Peak Counts Diff On %r, (%r, %r)" % (aprod.id, diffa, diffb) assert aprod.deconvoluted_peak_set == bprod.deconvoluted_peak_set, "Peaks Diff On %r" % ( aprod.id, diffa, diffb) result_reader.close() reference_reader.close() os.remove(outpath)
def run_ms_deisotope(): runner = CliRunner(mix_stderr=False) path = datafile("20150710_3um_AGP_001_29_30.mzML.gz") reference = datafile("20150710_3um_AGP_001_29_30.preprocessed.mzML") result = runner.invoke( deisotoper.deisotope, ["-b", 0, "-t", 20, "-tn", 10, "-m", 3, "-mn", 1, path, reference]) print(runner.exit_code) print(result.stdout) runner.invoke(indexing.idzip_compression, reference, "-o", reference + '.gz') print(result.exit_code) os.remove(reference)
class TestFileMetadata(unittest.TestCase): path = datafile("three_test_scans.mzML") @property def reader(self): return infer_type.MSFileLoader(self.path) def test_file_information(self): reader = self.reader finfo = reader.file_description() assert "MS1 spectrum" in finfo assert reader.id_format == "no nativeID format" def test_source_file(self): id_fmt, fmt = file_information.SourceFile.guess_format(self.path) assert fmt == "mzML format" assert id_fmt == "no nativeID format" sf = file_information.SourceFile.from_path(self.path) assert not sf.has_checksum() sf.add_checksum('sha1') assert sf.has_checksum('sha1') and sf.has_checksum() assert sf.parameters["SHA-1"] == sf.checksum("sha1") assert sf.validate_checksum() other = sf.copy() assert sf == other
class TestMzXMLLoaderScanBehavior(unittest.TestCase): path = datafile("microscans.mzXML") @property def reader(self): return infer_type.MSFileLoader(self.path) @property def first_scan(self): return self.reader.next().precursor def test_id(self): loader = self.reader scan = next(loader).precursor self.assertEqual(scan.id, "210") scan = loader.get_scan_by_id("210") self.assertEqual(scan.id, "210") def test_polarity(self): self.assertEqual(self.first_scan.polarity, 1) def test_index(self): self.assertEqual(self.first_scan.polarity, 1) def test_arrays(self): self.assertEqual(len(self.first_scan.arrays), 2) def test_precursor_info(self): self.assertEqual(self.first_scan.precursor_information, None)
class TestScanTraits(unittest.TestCase): path = datafile("three_test_scans.mzML") @property def reader(self): return infer_type.MSFileLoader(self.path) def test_traits(self): bunch = next(self.reader) scan = bunch.precursor acquisition = scan.acquisition_information assert len(acquisition) == 1 scan_event = acquisition[0] assert not scan_event.has_ion_mobility() assert len(scan_event) == 1 scan_window = scan_event[0] assert scan_window.lower == 350 assert scan_window.upper == 1500 assert not scan_window.is_empty() assert scan_traits.ScanWindow(0, 0).is_empty() assert scan_traits.ScanWindow(350, 1500) == scan_window assert scan_window == scan_event.total_scan_window() assert scan_event == scan_event assert acquisition == acquisition scan = bunch.products[0] isolation = scan.isolation_window assert not isolation.is_empty() assert scan_traits.IsolationWindow(None, 200, None).is_empty() assert scan_traits.IsolationWindow(0, 200, 0).is_empty() assert isolation.spans(isolation.target) assert isolation == isolation
def test_mgf(): runner = CliRunner(mix_stderr=False) if os.path.exists("-idx.json"): raise IOError("Orphan index file exists before running test") path = datafile("small.mzML") result = runner.invoke(conversion.mgf, [path, '-'], catch_exceptions=False) lines = result.output.splitlines() count = 0 for line in lines: if "BEGIN" in line: count += 1 assert count == 34 if os.path.exists("-idx.json"): raise IOError("Orphan index file exists after running uncompressed test") result = runner.invoke(conversion.mgf, [path, '-z', '-']) assert _compression.starts_with_gz_magic(result.stdout_bytes) buff = io.BytesIO(result.stdout_bytes) reader = _compression.GzipFile(fileobj=buff, mode='rb') count = 0 for line in reader: if b"BEGIN" in line: count += 1 assert count == 34 if os.path.exists("-idx.json"): raise IOError("Orphan index file exists after running compressed test")
def test_extraction_quick_charge(self): scan = self.scan peak3, deconvoluter = self.build_deconvoluter(scan, peptide, use_quick_charge=True) deconvoluter._deconvolution_step(0, truncate_after=0.8, charge_range=(1, 4)) with open(datafile("extraction_quick_charge_averagine.pkl"), 'rb') as fh: reference_averagine = pickle.load(fh) diff = set(deconvoluter.averagine.backend) - set( reference_averagine.backend) assert len(diff) == 0 assert len(deconvoluter.averagine.backend) == 5134 assert reference_averagine == deconvoluter.averagine cluster3 = deconvoluter.peak_dependency_network.find_cluster_for(peak3) spanned3 = cluster3.fits_using_mz(peak3.mz) assert len(cluster3) == 1 assert len(spanned3) == 1 assert np.isclose(cluster3.best_fit.monoisotopic_peak.mz, 138.53090) assert cluster3.best_fit.charge == 4
class TestMzMLbLoaderScanBehavior(unittest.TestCase): path = datafile("20150710_3um_AGP_001_29_30.mzMLb") ref_path = datafile("20150710_3um_AGP_001_29_30.mzMLb") reader = None reference_reader = None @classmethod def setUpClass(cls): cls.reader = MzMLbLoader(cls.path) cls.reference_reader = infer_type.MSFileLoader(cls.ref_path) super(TestMzMLbLoaderScanBehavior, cls).setUpClass() @classmethod def tearDownClass(cls): cls.reader.close() cls.reference_reader.close() super(TestMzMLbLoaderScanBehavior, cls).tearDownClass() def test_infer(self): reader = infer_type.MSFileLoader(self.path) assert isinstance(reader, MzMLbLoader) def test_get_by_id_equiv(self): reader = self.reader reference_reader = self.reference_reader scan = reader.get_scan_by_id("scanId=1740226") ref = reference_reader.get_scan_by_id("scanId=1740226") assert scan == ref def test_start_from_equiv(self): reader = self.reader reference_reader = self.reference_reader n = len(reader) mid = reader[n // 2].scan_time reader.start_from_scan(rt=mid) reference_reader.start_from_scan(rt=mid) i = 0 for a, b in zip(reader, reference_reader): assert a == b i += 1 if i > 5: break
def test_describe(): runner = CliRunner(mix_stderr=False) path = datafile("small.mzML") result = runner.invoke(indexing.describe, [path]) lines = result.output.splitlines() assert "small.mzML" in lines[0] assert lines[1] == "File Format: mzML format" assert lines[2] == "ID Format: Thermo nativeID format" assert lines[3] == "Format Supports Random Access: True"
def test_task(): runner = CliRunner() path = datafile("small.mzML") result = runner.invoke(indexing.describe, [path]) print(result.output) lines = result.output.splitlines() assert "small.mzML" in lines[0] assert lines[1] == "File Format: mzML format" assert lines[2] == "ID Format: Thermo nativeID format" assert lines[3] == "Format Supports Random Access: True"
class TestFancyIterator(unittest.TestCase): complex_compressed_mzml = datafile("20150710_3um_AGP_001_29_30.mzML.gz") def _get_reader(self): return ms_deisotope.MSFileLoader(self.complex_compressed_mzml) def test_time_interval_iterator(self): reader = self._get_reader() tii = query.TimeIntervalIterator(reader, 29.5, 31) assert tii.has_ms1_scans() assert tii.has_msn_scans() n = 0 for precursor, products in tii: if n == 0: assert abs(precursor.scan_time - 29.5) < 1e-2 else: assert precursor.scan_time >= 29.5 n += 1 assert n == 24 def test_index_interval_iterator(self): reader = self._get_reader() iii = query.IndexIntervalIterator(reader, end=31) assert iii.has_ms1_scans() assert iii.has_msn_scans() assert iii.start == 0 n = 0 for precursor, products in iii: n += 1 assert precursor.index <= 31 assert n == 6 def test_ms_level_filter(self): reader = self._get_reader() flt = query.MSLevelFilter(reader, 2) n = 0 for batch in flt: assert batch.precursor is None n += 1 assert n == 52 def test_ms1_merger(self): reader = self._get_reader() trf = query.MS1MergingTransformer(reader) n = 0 n2 = 0 for batch in trf: n += 1 n2 += len(batch.products) assert n == 10 assert n2 == 260
class TestScanProcessor(unittest.TestCase): mzml_path = datafile("three_test_scans.mzML") def test_processor(self): proc = processor.ScanProcessor(self.mzml_path, ms1_deconvolution_args={ "averagine": glycopeptide, "scorer": PenalizedMSDeconVFitter(5., 2.) }) for scan_bunch in iter(proc): self.assertIsNotNone(scan_bunch) self.assertIsNotNone(scan_bunch.precursor) self.assertIsNotNone(scan_bunch.products)
class TestMzXMLLoaderScanBehavior(unittest.TestCase): path = datafile("microscans.mzXML") @property def reader(self): return infer_type.MSFileLoader(self.path) @property def first_scan(self): return self.reader.next().precursor def test_id(self): loader = self.reader scan = next(loader).precursor self.assertEqual(scan.id, "210") scan = loader.get_scan_by_id("210") self.assertEqual(scan.id, "210") def test_start_from_scan(self): loader = self.reader time = 0.4856916666666667 bunch = next(loader.start_from_scan(rt=time)) self.assertAlmostEqual(bunch.precursor.scan_time, time, 3) ix = bunch.precursor.index assert next( loader.start_from_scan(index=ix)).precursor == bunch.precursor def test_polarity(self): self.assertEqual(self.first_scan.polarity, 1) def test_index(self): self.assertEqual(self.first_scan.polarity, 1) def test_arrays(self): self.assertEqual(len(self.first_scan.arrays), 2) def test_precursor_info(self): self.assertEqual(self.first_scan.precursor_information, None) def test_file_description(self): file_info = self.reader.file_description() source_file = file_info.source_files[0] assert source_file.name == "AGP_tryptic_300ng_3microscans_glycoproteomics_nCE_27-35.raw" assert "location" not in source_file.parameters def test_data_processing(self): proc_info = self.reader.data_processing() assert len(proc_info) == 2
class TestMemoryScanSource(unittest.TestCase): path = datafile("three_test_scans.mzML") @property def source_reader(self): return infer_type.MSFileLoader(self.path) @property def prepare_source(self): source = self.source_reader loader = memory.MemoryScanLoader.build(source) return loader def test_iteration(self): g = iter(scan_ids) bunch = next(self.prepare_source) assert bunch.precursor.id == next(g) for product in bunch.products: assert product.id == next(g)
class TestMGFLoaderScanBehavior(unittest.TestCase): path = datafile("small.mgf") @property def reader(self): return infer_type.MSFileLoader(self.path) def test_index(self): reader = self.reader assert len(reader.index) == 34 scan = reader.get_scan_by_id('small.10.10') assert scan.id ==\ 'small.10.10' def test_scan_interface(self): reader = self.reader scan = next(reader) assert isinstance(scan, Scan) assert not scan.is_profile assert scan.precursor_information.precursor_scan_id is None
def test_mzml(): runner = CliRunner(mix_stderr=False) if os.path.exists("-idx.json"): raise IOError("Orphan index file exists before running test") path = datafile("small.mzML") result = runner.invoke(conversion.mzml, ['-p', '-c', path, '-']) buff = io.BytesIO(result.output.encode("utf-8")) reader = MzMLLoader(buff) n = len(reader) assert n == 48 if os.path.exists("-idx.json"): raise IOError( "Orphan index file exists after running uncompressed test") result = runner.invoke( conversion.mzml, ['-p', '-z', '-c', path, '-'], catch_exceptions=False) buff = io.BytesIO(result.stdout_bytes) reader = MzMLLoader(_compression.get_opener(buff)) n = len(reader) assert n == 48 if os.path.exists("-idx.json"): raise IOError("Orphan index file exists after running compressed test")
def test_extraction(self): scan = self.scan peak, deconvoluter = self.build_deconvoluter(scan, peptide) deconvoluter._deconvolution_step(0, truncate_after=0.8, charge_range=(1, 4)) with open(datafile("extraction_base_averagine.pkl"), 'rb') as fh: reference_averagine = pickle.load(fh) diff = set(deconvoluter.averagine.backend) - set( reference_averagine.backend) assert len(diff) == 0 assert len(deconvoluter.averagine.backend) == 8865 assert reference_averagine == deconvoluter.averagine cluster = deconvoluter.peak_dependency_network.find_cluster_for(peak) spanned = cluster.fits_using_mz(peak.mz) assert len(cluster) == 4 assert len(spanned) == 2 assert np.isclose(cluster.best_fit.monoisotopic_peak.mz, 138.19520) assert cluster.best_fit.charge == 3
class TestMGFLoaderScanBehavior(unittest.TestCase): path = datafile("small.mgf") @property def reader(self): return infer_type.MSFileLoader(self.path) def test_source_file_name(self): reader = self.reader assert reader.source_file_name.endswith("small.mgf") def test_index(self): reader = self.reader assert len(reader.index) == 34 scan = reader.get_scan_by_id('small.10.10') assert scan.id ==\ 'small.10.10' scan = reader[10] assert scan.index == 10 def test_get_time(self): reader = self.reader scan = reader.get_scan_by_time(0.3) assert scan.id == 'small.31.31' scan = next(reader.start_from_scan(rt=0.3, grouped=False)) assert scan.id == 'small.31.31' def test_annotations(self): scan = self.reader[10] assert scan.annotations == {} def test_scan_interface(self): reader = self.reader scan = next(reader) assert isinstance(scan, Scan) assert not scan.is_profile assert scan.precursor_information.precursor_scan_id is None
def test_extraction_cached_averagine(self): scan = self.scan cache = AveragineCache(peptide) cache.populate(truncate_after=0.8) peak2, deconvoluter = self.build_deconvoluter(scan, cache) deconvoluter._deconvolution_step(0, truncate_after=0.8, charge_range=(1, 4)) with open(datafile("extraction_cached_averagine.pkl"), 'rb') as fh: reference_averagine = pickle.load(fh) diff = set(deconvoluter.averagine.backend) - set( reference_averagine.backend) assert len(diff) == 0 assert len(deconvoluter.averagine.backend) == 23960 assert reference_averagine == deconvoluter.averagine cluster2 = deconvoluter.peak_dependency_network.find_cluster_for(peak2) spanned2 = cluster2.fits_using_mz(peak2.mz) assert len(cluster2) == 4 assert len(spanned2) == 2 assert np.isclose(cluster2.best_fit.monoisotopic_peak.mz, 138.19520) assert cluster2.best_fit.charge == 3
class TestScanClustering(unittest.TestCase): path = datafile( "AGP_tryptic_300ng_2microscans_glycoproteomics_nCE_27-30.preprocessed.mzML.gz" ) @property def reader(self): reader = ProcessedMzMLDeserializer(get_opener(self.path)) return reader def load_msms_scans(self, reader): products = list( map(reader.get_scan_by_id, reader.extended_index.msn_ids.keys())) return products def cluster_scans(self, scans): clusters = scan_clustering.cluster_scans(scans) return clusters def test_cluster_scans(self): reader = self.reader scans = self.load_msms_scans(reader) clusters = self.cluster_scans(scans) assert len(clusters) == 1124
def get_scan(self): scan_data = gzload(datafile("test_scan.pkl.gz")) scan = common.Scan(scan_data, mzml.MzMLDataInterface()) return scan
def make_scan(self): complex_compressed_mzml = datafile("20150710_3um_AGP_001_29_30.mzML.gz") reader = MSFileLoader(complex_compressed_mzml) bunch = next(reader) return bunch
def make_scan(self): complex_compressed_mzml = datafile("20150710_3um_AGP_001_29_30.mzML.gz") reader = MSFileLoader(complex_compressed_mzml) bunch = next(reader) return bunch
import unittest import ms_deisotope from ms_deisotope.feature_map import feature_map from ms_deisotope.test.common import datafile complex_compressed_mzml = datafile("20150710_3um_AGP_001_29_30.mzML.gz") class LCMSFeatureMapTest(unittest.TestCase): features = None @classmethod def setUpClass(cls): reader = ms_deisotope.MSFileLoader(complex_compressed_mzml) features = feature_map.LCMSFeatureForest.from_reader(reader) cls.features = features @classmethod def tearDownClass(cls): cls.features = None def test_forest(self): features = self.features assert len(features) == 4151 f = features.search(1161.50875) assert f is not None def test_search(self): features = self.features
class TestMzMLSerializer(unittest.TestCase): source_data_path = datafile("three_test_scans.mzML") def test_writer(self): source_reader = MzMLLoader(self.source_data_path) fd, name = tempfile.mkstemp() with open(name, 'wb') as fh: writer = MzMLSerializer(fh, n_spectra=len(source_reader.index), deconvoluted=True) description = source_reader.file_description() writer.add_file_information(description) writer.add_file_contents("profile spectrum") writer.add_file_contents("centroid spectrum") writer.remove_file_contents("profile spectrum") instrument_configs = source_reader.instrument_configuration() for config in instrument_configs: writer.add_instrument_configuration(config) software_list = source_reader.software_list() for software in software_list: writer.add_software(software) data_processing_list = source_reader.data_processing() for dp in data_processing_list: writer.add_data_processing(dp) processing = writer.build_processing_method() writer.add_data_processing(processing) bunch = next(source_reader) bunch.precursor.pick_peaks() bunch.precursor.deconvolute() for product in bunch.products: product.pick_peaks() product.deconvolute() writer.save(bunch) writer.complete() fh.flush() writer.format() source_reader.reset() processed_reader = ProcessedMzMLDeserializer( _compression.get_opener(writer.handle.name)) for a, b in zip(source_reader.instrument_configuration(), processed_reader.instrument_configuration()): assert a.analyzers == b.analyzers for a, b in zip(source_reader, processed_reader): assert a.precursor.id == b.precursor.id assert (a.precursor.acquisition_information == b.precursor.acquisition_information) for an, bn in zip(a.products, b.products): assert an.id == bn.id assert abs(an.precursor_information.neutral_mass - bn.precursor_information.neutral_mass) < 1e-6 processed_reader.reset() description = processed_reader.file_description() assert "profile spectrum" not in description.contents assert "centroid spectrum" in description.contents sf = description.source_files[0] assert 'location' not in sf.parameters assert sf.parameters[ 'SHA-1'] == 'a2a091b82f27676da87a6c7d17cc90d2d90b8fbf' index = processed_reader.extended_index pinfo = index.find_msms_by_precursor_mass( ms_deisotope.neutral_mass(562.7397, 2)) assert len(pinfo) > 0 processed_reader.close() try: os.remove(name) os.remove(processed_reader._index_file_name) except OSError: pass
class TestMzMLLoaderScanBehavior(unittest.TestCase): path = datafile("three_test_scans.mzML") @property def reader(self): return infer_type.MSFileLoader(self.path) def test_iteration(self): reader = self.reader i = 0 bunch = next(reader) if bunch.precursor: i += 1 i += len(bunch.products) self.assertEqual(i, 3) reader.reset() reader.make_iterator(grouped=False) scan = next(reader) scan._load() self.assertEqual(scan.index, 0) reader.close() def test_index(self): reader = self.reader bunch = next(reader) self.assertEqual(bunch.precursor.index, 0) for i, scan in enumerate(bunch.products, 1): self.assertEqual(scan.index, i) reader.close() def test_ms_level(self): reader = self.reader bunch = next(reader) self.assertEqual(bunch.precursor.ms_level, 1) for i, scan in enumerate(bunch.products, 1): self.assertEqual(scan.ms_level, 2) reader.close() def test_polarity(self): reader = self.reader bunch = next(reader) self.assertEqual(bunch.precursor.polarity, 1) reader.close() def test_activation(self): reader = self.reader bunch = next(reader) self.assertEqual(bunch.precursor.activation, None) for product in bunch.products: self.assertNotEqual(product.activation, None) self.assertEqual(product.activation.method, "beam-type collision-induced dissociation") reader.close() def test_precursor(self): reader = self.reader bunch = next(reader) self.assertEqual(bunch.precursor.precursor_information, None) for product in bunch.products: self.assertNotEqual(product.precursor_information, None) self.assertEqual(product.precursor_information.precursor, bunch.precursor) reader.close() def test_pick_peaks(self): reader = self.reader bunch = next(reader) scan = bunch.precursor.pick_peaks() self.assertEqual(len(scan.peak_set), 2107) reader.close() def test_pack(self): reader = self.reader bunch = next(reader) bunch.precursor.pick_peaks() self.assertEqual(bunch.precursor.pack().title, bunch.precursor.title) reader.close() def test_get_scan_by_id(self): reader = self.reader precursor = reader.get_scan_by_id(scan_ids[0]) self.assertEqual(precursor.id, scan_ids[0]) self.assertEqual(precursor.index, 0) product = reader.get_scan_by_id(scan_ids[2]) self.assertEqual(product.id, scan_ids[2]) self.assertEqual(product.index, 2) self.assertEqual(product.precursor_information.precursor_scan_id, scan_ids[0]) self.assertIs(precursor, reader.get_scan_by_id(scan_ids[0])) reader.close() def test_get_scan_by_index(self): reader = self.reader precursor = reader.get_scan_by_index(0) self.assertEqual(precursor.index, 0) self.assertEqual(precursor.id, scan_ids[0]) reader.close() def test_get_scan_by_time(self): reader = self.reader precursor = reader.get_scan_by_time(22.12829) self.assertEqual(precursor.id, scan_ids[0]) product = reader.get_scan_by_time(22.132753) self.assertEqual(product.index, 1) reader.close()
def get_scan(self): scan_data = gzload(datafile("test_scan.pkl.gz")) scan = common.Scan(scan_data, mzml.MzMLDataInterface()) return scan
def get_reference(self): processed_scan = gzload(datafile("test_scan_results.pkl.gz")) return processed_scan
class TestMzMLLoaderScanBehavior(unittest.TestCase): path = datafile("three_test_scans.mzML") only_ms2_path = datafile("only_ms2_mzml.mzML") @property def reader(self): reader = infer_type.MSFileLoader(self.path) assert len(reader.index) == 3 assert reader.index.from_index(0) == scan_ids[0] assert list(reader.index.index_sequence) == sorted( reader.index.index_sequence, key=lambda x: x[1]) return reader def test_index_building(self): MzMLLoader.prebuild_byte_offset_file(self.path) parser = MzMLLoader._parser_cls(self.path) assert parser._check_has_byte_offset_file() index = parser.index offsets = index['spectrum'] key_list = list(offsets.keys()) assert key_list == scan_ids offset_file_name = parser._byte_offset_filename try: os.remove(offset_file_name) except OSError: pass def test_index_integrity(self): reader = self.reader reader.make_iterator(grouped=False) for i, scan in enumerate(reader): assert i == scan.index def test_iteration(self): reader = self.reader i = 0 bunch = next(reader) if bunch.precursor: i += 1 i += len(bunch.products) self.assertEqual(i, 3) reader.reset() reader.make_iterator(grouped=False) scan = next(reader) scan._load() self.assertEqual(scan.index, 0) reader.close() def test_index(self): reader = self.reader bunch = next(reader) self.assertEqual(bunch.precursor.index, 0) for i, scan in enumerate(bunch.products, 1): self.assertEqual(scan.index, i) reader.close() def test_ms_level(self): reader = self.reader bunch = next(reader) self.assertEqual(bunch.precursor.ms_level, 1) for i, scan in enumerate(bunch.products, 1): self.assertEqual(scan.ms_level, 2) reader.close() def test_polarity(self): reader = self.reader bunch = next(reader) self.assertEqual(bunch.precursor.polarity, 1) reader.close() def test_activation(self): reader = self.reader bunch = next(reader) self.assertEqual(bunch.precursor.activation, None) for product in bunch.products: self.assertNotEqual(product.activation, None) self.assertEqual(product.activation.method, "beam-type collision-induced dissociation") reader.close() def test_precursor(self): reader = self.reader bunch = next(reader) self.assertEqual(bunch.precursor.precursor_information, None) for product in bunch.products: self.assertNotEqual(product.precursor_information, None) self.assertEqual(product.precursor_information.precursor, bunch.precursor) reader.close() def test_pick_peaks(self): reader = self.reader bunch = next(reader) scan = bunch.precursor.pick_peaks() self.assertEqual(len(scan.peak_set), 2108) reader.close() def test_pack(self): reader = self.reader bunch = next(reader) bunch.precursor.pick_peaks() self.assertEqual(bunch.precursor.pack().title, bunch.precursor.title) reader.close() def test_get_scan_by_id(self): reader = self.reader precursor = reader.get_scan_by_id(scan_ids[0]) self.assertEqual(precursor.id, scan_ids[0]) self.assertEqual(precursor.index, 0) product = reader.get_scan_by_id(scan_ids[2]) self.assertEqual(product.id, scan_ids[2]) self.assertEqual(product.index, 2) self.assertEqual(product.precursor_information.precursor_scan_id, scan_ids[0]) self.assertIs(precursor, reader.get_scan_by_id(scan_ids[0])) reader.close() def test_get_scan_by_index(self): reader = self.reader precursor = reader.get_scan_by_index(0) self.assertEqual(precursor.index, 0) self.assertEqual(precursor.id, scan_ids[0]) reader.close() def test_get_scan_by_time(self): reader = self.reader precursor = reader.get_scan_by_time(22.12829) self.assertEqual(precursor.id, scan_ids[0]) product = reader.get_scan_by_time(22.132753) self.assertEqual(product.index, 1) scan = reader.get_scan_by_time(float('inf')) assert scan == reader[-1] reader.close() def test_instrument_configuration(self): reader = self.reader bunch = next(reader) precursor = bunch.precursor config = precursor.instrument_configuration self.assertEqual(config.id, "IC1") assert "orbitrap" in config.analyzers def test_file_description(self): file_info = self.reader.file_description() assert "MS1 spectrum" in file_info.contents assert "MSn spectrum" in file_info.contents source_file = file_info.source_files[0] assert source_file.name == "three_test_scans.mzML" assert "location" not in source_file.parameters def test_acquisition_information(self): reader = self.reader bunch = next(reader) precursor = bunch.precursor acquisition = precursor.acquisition_information self.assertTrue( abs(acquisition[0].start_time - precursor.scan_time) < 1e-3) self.assertEqual(len(acquisition), 1) window = acquisition[0].total_scan_window() if window: self.assertIn(precursor.arrays.mz[len(precursor.arrays.mz) // 2], window) def test_annotations(self): reader = self.reader bunch = next(reader) precursor = bunch.precursor assert len(precursor.annotations) > 0 def test_iteration_mode_detection(self): reader = infer_type.MSFileLoader(self.only_ms2_path) assert reader.iteration_mode == 'single' def test_source_file_parsing(self): reader = self.reader finfo = reader.file_description() sf = finfo.source_files[0] assert sf.name == 'three_test_scans.mzML' assert isinstance(sf.path, str) reader = infer_type.MSFileLoader(self.only_ms2_path) finfo = reader.file_description() sf = finfo.source_files[0] assert sf.name == 'analysis.baf' assert isinstance(sf.path, str) def test_data_processing_parsing(self): reader = infer_type.MSFileLoader(self.only_ms2_path) assert len(reader.data_processing()[0]) == 3 def test_software_list(self): reader = infer_type.MSFileLoader(self.path) assert len(reader.software_list()) == 2
import ms_deisotope from ms_deisotope.feature_map import quick_index from ms_deisotope.test.common import datafile mzml_path = datafile("small.mzML") def test_quick_index(): reader = ms_deisotope.MSFileLoader(mzml_path) index, _interval_tree = quick_index.index(reader) n_1 = len(index.ms1_ids) n_n = len(index.msn_ids) assert n_1 == 14 assert n_n == 34
def make_scan(): reader = MSFileLoader(datafile("20150710_3um_AGP_001_29_30.mzML.gz")) scan = reader.get_scan_by_id("scanId=1740086") return scan
def get_reference(self): processed_scan = gzload(datafile("test_scan_results.pkl.gz")) return processed_scan
class TestThermoRawLoaderScanBehavior(unittest.TestCase): path = datafile("small.RAW") reference_mzml = datafile("small.mzML") reference_mgf = datafile("small.mgf") @property def reader(self): return infer_type.MSFileLoader(self.path) def test_iteration(self): reader = self.reader reader.start_from_scan('controllerType=0 controllerNumber=1 scan=10') bunch = next(reader) assert bunch.precursor.id == 'controllerType=0 controllerNumber=1 scan=9' bunch = next(reader) assert bunch.precursor.id == 'controllerType=0 controllerNumber=1 scan=15' reader.start_from_scan(rt=0.077788333333) bunch = next(reader) assert np.isclose(bunch.precursor.scan_time, 0.077788333333) def test_file_level_metadata(self): reader = self.reader desc = reader.file_description() assert desc.has_content("MS1 spectrum") assert desc.has_content("MSn spectrum") inst_config = reader.instrument_configuration() assert inst_config[0].analyzers[0] == 'orbitrap' def test_scan_level_data(self): reader = self.reader reader.start_from_scan('controllerType=0 controllerNumber=1 scan=10') bunch = next(reader) assert np.isclose(bunch.precursor.scan_time, 0.077788333333) assert len( bunch.precursor.pick_peaks( signal_to_noise_threshold=1.5).peak_set) == 3110 scan_window = bunch.precursor.acquisition_information.scan_list[0][0] assert scan_window.lower == 200.0 and scan_window.upper == 2000.0 product = bunch.products[0] assert product.ms_level == 2 assert product.index == 9 assert product.activation.energy == 35.0 assert np.isclose(product.precursor_information.mz, 810.7528) annotations = { '[Thermo Trailer Extra]Micro Scan Count': 3.0, '[Thermo Trailer Extra]Scan Event': 3.0, '[Thermo Trailer Extra]Scan Segment': 1.0, 'filter_string': 'ITMS + c ESI d Full ms2 [email protected] [210.00-1635.00]' } assert product.annotations == annotations assert np.isclose(product.isolation_window.target, 810.752807) assert product.isolation_window.lower == 1.0 assert not product.is_profile def test_size(self): reader = self.reader n = len(reader) assert n == 48 x = reader[-1] y = reader.get_scan_by_time(float('inf')) assert x == y def test_compat(self): raw_reader = self.reader mzml_reader = infer_type.MSFileLoader(self.reference_mzml) mgf_reader = infer_type.MSFileLoader(self.reference_mgf) mgf_scan = next(mgf_reader) mzml_scan = mzml_reader[2] raw_scan = raw_reader[2] self.assertEqual(mzml_scan, raw_scan) mgf_scan.pick_peaks() raw_scan.pick_peaks() self.assertEqual(raw_scan.peak_set, mgf_scan.peak_set) self.assertEqual(raw_scan.precursor_information.precursor, mzml_scan.precursor_information.precursor)