def _index_file_name(self): if isinstance(self.source_file, basestring): return ExtendedScanIndex.index_file_name(self.source_file) else: try: return ExtendedScanIndex.index_file_name(self.source_file.name) except AttributeError: return None
def __init__(self, handle, n_spectra=2e4, compression=writer.COMPRESSION_ZLIB, deconvoluted=True, sample_name=None, build_extra_index=True): self.handle = handle self.writer = writer.MzMLWriter(handle) self.n_spectra = n_spectra self.compression = compression self._has_started_writing_spectra = False self.writer.__enter__() self._run_tag = None self._spectrum_list_tag = None self._chromatogram_list_tag = None self.writer.controlled_vocabularies() self.deconvoluted = deconvoluted self.sample_name = sample_name self.file_contents_list = [] self.software_list = [] self.source_file_list = [] self.data_processing_list = [] self.instrument_configuration_list = [] self.sample_list = [] self.processing_parameters = [] self.total_ion_chromatogram_tracker = OrderedDict() self.base_peak_chromatogram_tracker = OrderedDict() self.sample_run = SampleRun(name=sample_name, uuid=str(uuid4())) self.add_sample({ "name": sample_name, "id": "sample_1", "params": [ { "name": "SampleRun-UUID", "value": self.sample_run.uuid }, ] }) self.chromatogram_queue = [] self.indexer = None if build_extra_index: self.indexer = ExtendedScanIndex()
def build_extended_index(self, header_only=True): self.reset() indexer = ExtendedScanIndex() iterator = self if header_only: iterator = self.iter_scan_headers() for bunch in iterator: indexer.add_scan_bunch(bunch) self.reset() self.extended_index = indexer try: with open(self._index_file_name, 'w') as handle: indexer.serialize(handle) except (IOError, OSError, AttributeError) as err: print(err) pass
def complete(self): """Finish writing to the output document. This closes the open list tags, empties the chromatogram accumulator, and closes the :obj:`<mzML>` tag, and attempts to flush the output file. """ if self._spectrum_list_tag is not None: self._spectrum_list_tag.__exit__(None, None, None) if self._run_tag is not None: self._make_default_chromatograms() self.write_chromatograms() if self._run_tag is not None: self._run_tag.__exit__(None, None, None) self.writer.__exit__(None, None, None) if self.indexer is not None: try: name = self.handle.name except AttributeError: name = "_detatched_mzml_index" try: with open(ExtendedScanIndex.index_file_name(name), 'w') as ixfile: self.indexer.serialize(ixfile) except IOError as e: warnings.warn( "Could not write extended index file due to error %r" % (e,)) try: self.writer.outfile.flush() except (IOError, AttributeError, ValueError): pass
def complete(self): """Finish writing to the output document. This closes the open list tags, empties the chromatogram accumulator, and closes the :obj:`<mzML>` tag, and attempts to flush the output file. """ if self._spectrum_list_tag is not None: self._spectrum_list_tag.__exit__(None, None, None) if self._run_tag is not None: self._make_default_chromatograms() self.write_chromatograms() if self._run_tag is not None: self._run_tag.__exit__(None, None, None) self.writer.__exit__(None, None, None) if self.indexer is not None: try: name = self.handle.name except AttributeError: name = "_detatched_mzml_index" try: with open(ExtendedScanIndex.index_file_name(name), 'w') as ixfile: self.indexer.serialize(ixfile) except IOError as e: warnings.warn( "Could not write extended index file due to error %r" % (e, )) try: self.writer.outfile.flush() except (IOError, AttributeError, ValueError): pass
def load_index_file(path): try: index = metadata_index[path] except KeyError: with open(path, 'rt') as fh: index = ExtendedScanIndex.load(fh) metadata_index[path] = index return index
def __init__(self, handle, n_spectra=2e4, compression=None, deconvoluted=True, sample_name=None, build_extra_index=True, data_encoding=None): if data_encoding is None: data_encoding = { writer.MZ_ARRAY: np.float64, writer.INTENSITY_ARRAY: np.float32, writer.CHARGE_ARRAY: np.int32, } if writer is None: raise ImportError( "Cannot write mzML without psims. Please install psims to use this feature." ) if compression is None: compression = writer.COMPRESSION_ZLIB self.handle = handle self.writer = writer.MzMLWriter(handle) self.n_spectra = n_spectra self.compression = compression self.data_encoding = data_encoding self._has_started_writing_spectra = False self.writer.__enter__() self._run_tag = None self._spectrum_list_tag = None self._chromatogram_list_tag = None self.writer.controlled_vocabularies() self.deconvoluted = deconvoluted self._initialize_description_lists() self._init_sample(sample_name) self.total_ion_chromatogram_tracker = OrderedDict() self.base_peak_chromatogram_tracker = OrderedDict() self.chromatogram_queue = [] self.indexer = None if build_extra_index: self.indexer = ExtendedScanIndex()
def complete(self): self._spectrum_list_tag.__exit__(None, None, None) self._make_default_chromatograms() self.write_chromatograms() self._run_tag.__exit__(None, None, None) self.writer.__exit__(None, None, None) if self.indexer is not None: try: name = self.handle.name except AttributeError: name = "_detatched_mzml_index" try: with open(ExtendedScanIndex.index_file_name(name), 'w') as ixfile: self.indexer.serialize(ixfile) except IOError: pass
def build_extended_index(self, header_only=True): self.reset() indexer = ExtendedScanIndex() iterator = self if header_only: iterator = self.iter_scan_headers() if self._has_ms1_scans(): for bunch in iterator: indexer.add_scan_bunch(bunch) else: for scan in iterator: indexer.add_scan(scan) self.reset() self.extended_index = indexer try: with open(self._index_file_name, 'w') as handle: indexer.serialize(handle) except (IOError, OSError, AttributeError, TypeError) as err: print(err)
def __init__(self, handle, n_spectra=2e4, compression=writer.COMPRESSION_ZLIB, deconvoluted=True, sample_name=None, build_extra_index=True): self.handle = handle self.writer = writer.MzMLWriter(handle) self.n_spectra = n_spectra self.compression = compression self._has_started_writing_spectra = False self.writer.__enter__() self._run_tag = None self._spectrum_list_tag = None self._chromatogram_list_tag = None self.writer.controlled_vocabularies() self.deconvoluted = deconvoluted self.sample_name = sample_name self.file_contents_list = [] self.software_list = [] self.source_file_list = [] self.data_processing_list = [] self.instrument_configuration_list = [] self.sample_list = [] self.processing_parameters = [] self.total_ion_chromatogram_tracker = OrderedDict() self.base_peak_chromatogram_tracker = OrderedDict() self.sample_run = SampleRun(name=sample_name, uuid=str(uuid4())) self.add_sample({ "name": sample_name, "id": "sample_1", "params": [ {"name": "SampleRun-UUID", "value": self.sample_run.uuid}, ]}) self.chromatogram_queue = [] self.indexer = None if build_extra_index: self.indexer = ExtendedScanIndex()
def __init__(self, handle, n_spectra=int(2e5), compression=None, deconvoluted=True, sample_name=None, build_extra_index=True, data_encoding=None): if data_encoding is None: data_encoding = { writer.MZ_ARRAY: np.float64, writer.INTENSITY_ARRAY: np.float32, writer.CHARGE_ARRAY: np.int32, } if writer is None: raise ImportError( "Cannot write mzML without psims. Please install psims to use this feature.") if compression is None: compression = writer.COMPRESSION_ZLIB super(MzMLSerializer, self).__init__() self.handle = handle self.writer = writer.MzMLWriter(handle) self.n_spectra = n_spectra self.compression = compression self.data_encoding = data_encoding self._has_started_writing_spectra = False self.writer.__enter__() self._run_tag = None self._spectrum_list_tag = None self._chromatogram_list_tag = None self.writer.controlled_vocabularies() self.deconvoluted = deconvoluted self._initialize_description_lists() self._init_sample(sample_name) self.total_ion_chromatogram_tracker = OrderedDict() self.base_peak_chromatogram_tracker = OrderedDict() self.chromatogram_queue = [] self.indexer = None if build_extra_index: self.indexer = ExtendedScanIndex()
def _index_file_name(self): return ExtendedScanIndex.index_file_name(self.source_file)
def read_index_file(self): with open(self._index_file_name) as handle: self.extended_index = ExtendedScanIndex.deserialize(handle)
class MzMLSerializer(ScanSerializerBase): """Write :mod:`ms_deisotope` data structures to a file in mzML format. Attributes ---------- base_peak_chromatogram_tracker : :class:`OrderedDict` Accumulated mapping of scan time to base peak intensity. This is used to write the *base peak chromatogram*. chromatogram_queue : :class:`list` Accumulate chromatogram data structures which will be written out after all spectra have been written to file. compression : :class:`str` The compression type to use for binary data arrays. Should be one of :obj:`"zlib"`, :obj:`"none"`, or :obj:`None` data_encoding : :class:`dict` or :class:`int` or :obj:`numpy.dtype` or :class:`str` The encoding specification to specify the binary encoding of numeric data arrays that is passed to :meth:`~.MzMLWriter.write_spectrum` and related methods. data_processing_list : :class:`list` List of packaged :class:`~.DataProcessingInformation` to write out deconvoluted : bool Indicates whether the translation should include extra deconvolution information file_contents_list : :class:`list` List of terms to include in the :obj:`<fileContents>` tag handle : file-like The file-like object being written to indexer : :class:`~.ExtendedScanIndex` The external index builder instrument_configuration_list : :class:`list` List of packaged :class:`~.InstrumentInformation` to write out n_spectra : int The number of spectra to provide a size for in the :obj:`<spectrumList>` processing_parameters : :class:`list` List of additional terms to include in a newly created :class:`~.DataProcessingInformation` sample_list : :class:`list` List of :class:`~.SampleRun` objects to write out sample_name : :class:`str` Default sample name sample_run : :class:`~.SampleRun` Description software_list : :class:`list` List of packaged :class:`~.Software` objects to write out source_file_list : :class:`list` List of packaged :class:`~.SourceFile` objects to write out total_ion_chromatogram_tracker : :class:`OrderedDict` Accumulated mapping of scan time to total intensity. This is used to write the *total ion chromatogram*. writer : :class:`~psims.mzml.writer.MzMLWriter` The lower level writer implementation """ def __init__(self, handle, n_spectra=2e4, compression=None, deconvoluted=True, sample_name=None, build_extra_index=True, data_encoding=None): if data_encoding is None: data_encoding = { writer.MZ_ARRAY: np.float64, writer.INTENSITY_ARRAY: np.float32, writer.CHARGE_ARRAY: np.int32, } if writer is None: raise ImportError( "Cannot write mzML without psims. Please install psims to use this feature." ) if compression is None: compression = writer.COMPRESSION_ZLIB self.handle = handle self.writer = writer.MzMLWriter(handle) self.n_spectra = n_spectra self.compression = compression self.data_encoding = data_encoding self._has_started_writing_spectra = False self.writer.__enter__() self._run_tag = None self._spectrum_list_tag = None self._chromatogram_list_tag = None self.writer.controlled_vocabularies() self.deconvoluted = deconvoluted self._initialize_description_lists() self._init_sample(sample_name) self.total_ion_chromatogram_tracker = OrderedDict() self.base_peak_chromatogram_tracker = OrderedDict() self.chromatogram_queue = [] self.indexer = None if build_extra_index: self.indexer = ExtendedScanIndex() def _init_sample(self, sample_name, **kwargs): self.sample_name = sample_name self.sample_run = SampleRun(name=sample_name, uuid=str(uuid4())) self.add_sample({ "name": self.sample_run.name, "id": "sample_1", "params": [ { "name": "SampleRun-UUID", "value": self.sample_run.uuid }, ] }) def _initialize_description_lists(self): self.file_contents_list = [] self.software_list = [] self.source_file_list = [] self.data_processing_list = [] self.instrument_configuration_list = [] self.sample_list = [] self.processing_parameters = [] def add_instrument_configuration(self, configuration): """Add an :class:`~.InstrumentInformation` object to the output document. Parameters ---------- configuration: :class:`~.InstrumentInformation` The instrument configuration to add """ component_list = [] for group in configuration.groups: tag = None if group.type == 'source': tag = self.writer.Source elif group.type == 'analyzer': tag = self.writer.Analyzer elif group.type == 'detector': tag = self.writer.Detector else: continue component_list.append( tag(order=group.order, params=[g.name for g in group])) config_element = self.writer.InstrumentConfiguration( configuration.id, component_list) self.instrument_configuration_list.append(config_element) def add_software(self, software_description): """Add a :class:`~.Software` object to the output document. Parameters ---------- software_description : :class:`~.Software` The software description to add """ self.software_list.append(software_description) def add_file_information(self, file_information): for key, value in file_information.contents.items(): if value is None: value = '' self.add_file_contents({str(key): value}) for source_file in file_information.source_files: self.add_source_file(source_file) def add_file_contents(self, file_contents): """Add a key to the resulting :obj:`<fileDescription>` of the output document. Parameters ---------- file_contents: :class:`str` or :class:`Mapping` The parameter to add """ self.file_contents_list.append(file_contents) def remove_file_contents(self, name): for i, content in enumerate(self.file_contents_list): if isinstance(content, Mapping): if 'name' in content: content = content['name'] elif len(content) == 1: content = list(content.keys())[0] else: continue if content == name: break else: raise KeyError(name) self.file_contents_list.pop(i) def add_source_file(self, source_file): """Add the :class:`~.SourceFile` to the output document Parameters ---------- source_file : :class:`~.SourceFile` The source fil to add """ unwrapped = { "name": source_file.name, "location": source_file.location, "id": source_file.id, "params": [] } unwrapped['params'].extend([ (getattr(key, 'accession', str(key)), value) for key, value in source_file.parameters.items() ]) if source_file.id_format: unwrapped['params'].append(str(source_file.id_format)) if source_file.file_format: unwrapped['params'].append(str(source_file.file_format)) self.source_file_list.append(unwrapped) def add_data_processing(self, data_processing_description): """Add a new :class:`~.DataProcessingInformation` or :class:`~ProcessingMethod` to the output document as a new :obj:`<dataProcessing>` entry describing one or more :obj:`<processingMethod>`s for a single referenced :class:`~.Software` instance. Parameters ---------- data_processing_description : :class:`~.DataProcessingInformation` or :class:`~.ProcessingMethod` Data manipulation sequence to add to the document """ if isinstance(data_processing_description, data_transformation.DataProcessingInformation): methods = [] for method in data_processing_description: content = [] for op, val in method: content.append({'name': op.name, 'value': val}) method_descr = { 'software_reference': method.software_id, 'order': method.order, 'params': content } methods.append(method_descr) payload = { 'id': data_processing_description.id, 'processing_methods': methods } self.data_processing_list.append(payload) elif isinstance(data_processing_description, data_transformation.ProcessingMethod): content = [] for op, val in data_processing_description: content.append({"name": op.name, 'value': val}) payload = { 'id': "data_processing_%d" % len(self.data_processing_list), 'processing_methods': [{ 'software_reference': data_processing_description.software_id, 'order': data_processing_description.order, 'params': content }] } self.data_processing_list.append(payload) else: self.data_processing_list.append(data_processing_description) def add_processing_parameter(self, name, value=None): """Add a new processing method to the writer's own :obj:`<dataProcessing>` element. Parameters ---------- name : str The processing technique's name value : obj The processing technique's value, if any """ self.processing_parameters.append({"name": name, "value": value}) def add_sample(self, sample): self.sample_list.append(sample) def copy_metadata_from(self, reader): """Copies the file-level metadata from an instance of :class:`~.ScanFileMetadataBase` into the metadata of the file to be written Parameters ---------- reader : :class:`~.ScanFileMetadataBase` The source to copy metadata from """ try: description = reader.file_description() self.add_file_information(description) except AttributeError: pass try: instrument_configs = reader.instrument_configuration() except AttributeError: instrument_configs = [] for config in instrument_configs: self.add_instrument_configuration(config) try: software_list = reader.software_list() except AttributeError: software_list = [] for software in software_list: self.add_software(software) try: data_processing_list = reader.data_processing() except AttributeError: data_processing_list = [] for data_processing_ in data_processing_list: self.add_data_processing(data_processing_) def _create_file_description(self): self.writer.file_description(self.file_contents_list, self.source_file_list) def _create_software_list(self): software_list = [] ms_deisotope_entries = [] for sw in self.software_list: d = {'id': sw.id, 'version': sw.version} if sw.is_name(sw.name): d[sw.name] = '' else: d['MS:1000799'] = sw.name d['params'] = list(sw.options.items()) if 'ms_deisotope' in str(sw.id): ms_deisotope_entries.append(str(sw.id)) software_list.append(d) for i in range(1, 100): query = 'ms_deisotope_%d' % i if query in ms_deisotope_entries: continue else: new_entry_id = query break else: new_entry_id = 'ms_deisotope_%s' % str(uuid4()) software_list.append({ "id": "ms_deisotope_1", 'version': lib_version, 'ms_deisotope': "", }) self.writer.software_list(software_list) def _create_sample_list(self): self.writer.sample_list(self.sample_list) def build_processing_method(self, order=1, picked_peaks=True, smoothing=True, baseline_reduction=True, additional_parameters=tuple(), software_id=None, data_processing_id=None): if software_id is None: software_id = "ms_deisotope_1" if data_processing_id is None: data_processing_id = 'ms_deisotope_processing_%d' % len( self.data_processing_list) method = data_transformation.ProcessingMethod(software_id=software_id) if self.deconvoluted: method.add("deisotoping") method.add("charge deconvolution") method.add("precursor recalculation") if picked_peaks: method.add("peak picking") if smoothing: method.add("smoothing") if baseline_reduction: method.add("baseline reduction") method.add("Conversion to mzML") method.update(additional_parameters) method.update(self.processing_parameters) method.order = order data_processing_info = data_transformation.DataProcessingInformation( [method], data_processing_id) # self.add_data_processing(data_processing_info) return data_processing_info def _create_data_processing_list(self): self.writer.data_processing_list(self.data_processing_list) def _create_instrument_configuration(self): self.writer.instrument_configuration_list( self.instrument_configuration_list) def _add_spectrum_list(self): self._create_file_description() self._create_sample_list() self._create_software_list() self._create_instrument_configuration() self._create_data_processing_list() self._run_tag = self.writer.run(id=self.sample_name or 1, sample='sample_1') self._run_tag.__enter__() self._spectrum_list_tag = self.writer.spectrum_list( count=self.n_spectra) self._spectrum_list_tag.__enter__() def has_started_writing_spectra(self): return self._has_started_writing_spectra def _pack_activation(self, activation_information): """Pack :class:`~.ActivationInformation` into a :class:`dict` structure which that :class:`~psims.mzml.writer.MzMLWriter` expects. Parameters ---------- activation_information: :class:`~.ActivationInformation` Returns ------- :class:`dict` """ params = [] params.append({ "name": str(activation_information.method), }) if activation_information.is_multiple_dissociation(): for method in activation_information.methods[1:]: params.append({"name": str(method)}) # NOTE: Only correct for CID/HCD spectra with absolute collision energies, but that is all I have # to test with. params.append({ "name": "collision energy", "value": activation_information.energy, "unitName": "electron volt" }) if activation_information.is_multiple_dissociation(): energies = activation_information.energies[1:] supplemental_energy = None if activation_information.has_supplemental_dissociation(): supplemental_energy = energies[-1] energies = energies[:-1] for energy in energies: params.append({ "name": "collision energy", "value": energy, "unitName": "electron volt" }) if supplemental_energy is not None: params.append({ "name": 'supplemental collision energy', "value": supplemental_energy, "unitName": "electron volt" }) for key, val in activation_information.data.items(): arg = {"name": key, "value": val} try: arg['unitName'] = val.unit_info except AttributeError: pass params.append(arg) return params def _pack_precursor_information(self, precursor_information, activation_information=None, isolation_window=None): """Repackage the :class:`~.PrecursorInformation`, :class:`~.ActivationInformation`, and :class:~.IsolationWindow` into the nested :class:`dict` structure that :class:`~psims.mzml.writer.MzMLWriter` expects. Parameters ---------- precursor_information : :class:`~.PrecursorInformation` activation_information : :class:`~.ActivationInformation`, optional isolation_window : :class:`~.IsolationWindow`, optional Returns ------- :class:`dict` """ # If the scan bunch has been fully deconvoluted and it's PrecursorInformation # filled in, its extracted fields will be populated and should be used, otherwise # use the default read values. extracted_neutral_mass = precursor_information.extracted_neutral_mass if (extracted_neutral_mass != 0): package = { "mz": precursor_information.extracted_mz, "intensity": precursor_information.extracted_intensity, "charge": precursor_information.extracted_charge, "scan_id": precursor_information.precursor_scan_id, "params": [{ "ms_deisotope:defaulted": precursor_information.defaulted }, { "ms_deisotope:orphan": precursor_information.orphan }] } if precursor_information.coisolation: for p in precursor_information.coisolation: package['params'].append({ "name": "ms_deisotope:coisolation", "value": "%f %f %d" % (p.neutral_mass, p.intensity, p.charge) }) else: package = { "mz": precursor_information.mz, "intensity": precursor_information.intensity, "charge": precursor_information.charge, "scan_id": precursor_information.precursor_scan_id } if package['charge'] == ChargeNotProvided: package["charge"] = None if activation_information is not None: package['activation'] = self._pack_activation( activation_information) if isolation_window is not None: package['isolation_window_args'] = { "lower": isolation_window.lower, "target": isolation_window.target, "upper": isolation_window.upper } return package def _prepare_extra_arrays(self, scan): extra_arrays = [] if self.deconvoluted: score_array = [peak.score for peak in scan.deconvoluted_peak_set] extra_arrays.append(("deconvolution score array", score_array)) envelope_array = envelopes_to_array( [peak.envelope for peak in scan.deconvoluted_peak_set]) extra_arrays.append(("isotopic envelopes array", envelope_array)) return extra_arrays def _get_annotations(self, scan): skip = { 'filter string', 'base peak intensity', 'base peak m/z', 'lowest observed m/z', 'highest observed m/z', 'total ion current', } annotations = [] for key, value in scan.annotations.items(): if key in skip: continue annotations.append({key: value}) return annotations def save_scan(self, scan, **kwargs): """Write a :class:`~.Scan` to the output document as a collection of related :obj:`<spectrum>` tags. .. note:: If no spectra have been written to the output document yet, this method will call :meth:`_add_spectrum_list` and writes all of the metadata lists out. After this point, no new document-level metadata can be added. Parameters ---------- scan: :class:`~.Scan` The scan to write. deconvoluted: :class:`bool` Whether the scan to write out should include deconvolution information """ if not self._has_started_writing_spectra: self._add_spectrum_list() self._has_started_writing_spectra = True deconvoluted = kwargs.get("deconvoluted", self.deconvoluted) if deconvoluted: centroided = True precursor_peaks = scan.deconvoluted_peak_set elif scan.peak_set: centroided = True precursor_peaks = scan.peak_set else: centroided = False precursor_peaks = scan.arrays polarity = scan.polarity if deconvoluted: charge_array = [p.charge for p in precursor_peaks] else: charge_array = None if centroided: descriptors = SpectrumDescription.from_peak_set(precursor_peaks) mz_array = [p.mz for p in precursor_peaks] intensity_array = [p.intensity for p in precursor_peaks] else: descriptors = SpectrumDescription.from_arrays(precursor_peaks) mz_array = precursor_peaks.mz intensity_array = precursor_peaks.intensity instrument_config = scan.instrument_configuration if instrument_config is None: instrument_config_id = None else: instrument_config_id = instrument_config.id scan_parameters, scan_window_list = self.extract_scan_event_parameters( scan) if scan.precursor_information: precursor_information = self._pack_precursor_information( scan.precursor_information, scan.activation, scan.isolation_window) else: precursor_information = None spectrum_params = [ { "name": "ms level", "value": scan.ms_level }, { "name": "MS1 spectrum" } if scan.ms_level == 1 else { "name": "MSn spectrum" }, ] + list(descriptors) spectrum_params.extend(self._get_annotations(scan)) self.writer.write_spectrum( mz_array, intensity_array, charge_array, id=scan.id, params=spectrum_params, centroided=centroided, polarity=polarity, scan_start_time=scan.scan_time, compression=self.compression, other_arrays=self._prepare_extra_arrays(scan), instrument_configuration_id=instrument_config_id, precursor_information=precursor_information, scan_params=scan_parameters, scan_window_list=scan_window_list, encoding=self.data_encoding) self.total_ion_chromatogram_tracker[scan.scan_time] = ( descriptors["total ion current"]) self.base_peak_chromatogram_tracker[scan.scan_time] = ( descriptors["base peak intensity"]) def save_scan_bunch(self, bunch, **kwargs): """Write a :class:`~.ScanBunch` to the output document as a collection of related :obj:`<spectrum>` tags. .. note:: If no spectra have been written to the output document yet, this method will call :meth:`_add_spectrum_list` and writes all of the metadata lists out. After this point, no new document-level metadata can be added. Parameters ---------- bunch : :class:`~.ScanBunch` The scan set to write. """ if bunch.precursor is not None: self.save_scan(bunch.precursor) for prod in bunch.products: self.save_scan(prod) if self.indexer is not None: self.indexer.add_scan_bunch(bunch) def extract_scan_event_parameters(self, scan): """Package :class:`~.ScanAcquisitionInformation` into a pair of :class:`list`s that :class:`~psims.mzml.writer.MzMLWriter` expects. Parameters ---------- scan : :class:`~.Scan` Returns ------- scan_parameters: :class:`list` Parameters qualifying the scan event (:class:`dict`) scan_window_list: :class:`list` Packed pairs of scan windows (:class:`list`) """ scan_parameters = [] scan_window_list = [] acquisition_info = scan.acquisition_information filter_string = scan.annotations.get("filter_string") if filter_string is not None: scan_parameters.append({ "name": "filter string", "value": filter_string }) if acquisition_info is not None and len(acquisition_info) > 0: scan_event = acquisition_info[0] if scan_event.has_ion_mobility(): scan_parameters.append({ "name": "ion mobility drift time", "value": scan_event.drift_time, "unit_name": "millisecond", 'unit_cv_ref': "UO", "unit_accession": 'UO:0000028' }) if scan_event.injection_time is not None: scan_parameters.append({ "accession": 'MS:1000927', "value": scan_event.injection_time, "unit_name": getattr(scan_event.injection_time, 'unit_info', None), }) traits = scan_event.traits.items() for name, value in traits: param = { "name": name, "value": value, 'unit_name': getattr(value, 'unit_info', None) } scan_parameters.append(param) scan_window_list = list(scan_event) return scan_parameters, scan_window_list def save_chromatogram(self, chromatogram_dict, chromatogram_type, params=None, **kwargs): time_array, intensity_array = zip(*chromatogram_dict.items()) self.writer.write_chromatogram(time_array, intensity_array, id=kwargs.get('id'), chromatogram_type=chromatogram_type, compression=self.compression, params=params) def _make_default_chromatograms(self): d = dict(chromatogram=self.total_ion_chromatogram_tracker, chromatogram_type='total ion current chromatogram', id='TIC') if len(self.total_ion_chromatogram_tracker) > 0: self.chromatogram_queue.append(d) d = dict(chromatogram=self.base_peak_chromatogram_tracker, chromatogram_type="basepeak chromatogram", id='BPC') if len(self.base_peak_chromatogram_tracker) > 0: self.chromatogram_queue.append(d) def write_chromatograms(self): self._chromatogram_list_tag = self.writer.chromatogram_list( count=len(self.chromatogram_queue)) with self._chromatogram_list_tag: for chromatogram in self.chromatogram_queue: self.save_chromatogram(chromatogram.pop("chromatogram"), **chromatogram) def complete(self): """Finish writing to the output document. This closes the open list tags, empties the chromatogram accumulator, and closes the :obj:`<mzML>` tag, and attempts to flush the output file. """ if self._spectrum_list_tag is not None: self._spectrum_list_tag.__exit__(None, None, None) if self._run_tag is not None: self._make_default_chromatograms() self.write_chromatograms() if self._run_tag is not None: self._run_tag.__exit__(None, None, None) self.writer.__exit__(None, None, None) if self.indexer is not None: try: name = self.handle.name except AttributeError: name = "_detatched_mzml_index" try: with open(ExtendedScanIndex.index_file_name(name), 'w') as ixfile: self.indexer.serialize(ixfile) except IOError as e: warnings.warn( "Could not write extended index file due to error %r" % (e, )) try: self.writer.outfile.flush() except (IOError, AttributeError, ValueError): pass def format(self): """This method is no longer needed. """ pass def __enter__(self): return self def __exit__(self, exc_type, exc_value, traceback): self.close() def close(self): self.complete() if hasattr(self.handle, "closed"): if not self.handle.closed: try: self.handle.close() except AttributeError: pass
class MzMLSerializer(ScanSerializerBase): """Write :mod:`ms_deisotope` data structures to a file in mzML format. Attributes ---------- base_peak_chromatogram_tracker : :class:`OrderedDict` Accumulated mapping of scan time to base peak intensity. This is used to write the *base peak chromatogram*. chromatogram_queue : :class:`list` Accumulate chromatogram data structures which will be written out after all spectra have been written to file. compression : :class:`str` The compression type to use for binary data arrays. Should be one of :obj:`"zlib"`, :obj:`"none"`, or :obj:`None` data_encoding : :class:`dict` or :class:`int` or :obj:`numpy.dtype` or :class:`str` The encoding specification to specify the binary encoding of numeric data arrays that is passed to :meth:`~.MzMLWriter.write_spectrum` and related methods. data_processing_list : :class:`list` List of packaged :class:`~.DataProcessingInformation` to write out deconvoluted : bool Indicates whether the translation should include extra deconvolution information file_contents_list : :class:`list` List of terms to include in the :obj:`<fileContents>` tag handle : file-like The file-like object being written to indexer : :class:`~.ExtendedScanIndex` The external index builder instrument_configuration_list : :class:`list` List of packaged :class:`~.InstrumentInformation` to write out n_spectra : int The number of spectra to provide a size for in the :obj:`<spectrumList>` processing_parameters : :class:`list` List of additional terms to include in a newly created :class:`~.DataProcessingInformation` sample_list : :class:`list` List of :class:`~.SampleRun` objects to write out sample_name : :class:`str` Default sample name sample_run : :class:`~.SampleRun` Description software_list : :class:`list` List of packaged :class:`~.Software` objects to write out source_file_list : :class:`list` List of packaged :class:`~.SourceFile` objects to write out total_ion_chromatogram_tracker : :class:`OrderedDict` Accumulated mapping of scan time to total intensity. This is used to write the *total ion chromatogram*. writer : :class:`~psims.mzml.writer.MzMLWriter` The lower level writer implementation """ def __init__(self, handle, n_spectra=int(2e5), compression=None, deconvoluted=True, sample_name=None, build_extra_index=True, data_encoding=None): if data_encoding is None: data_encoding = { writer.MZ_ARRAY: np.float64, writer.INTENSITY_ARRAY: np.float32, writer.CHARGE_ARRAY: np.int32, } if writer is None: raise ImportError( "Cannot write mzML without psims. Please install psims to use this feature.") if compression is None: compression = writer.COMPRESSION_ZLIB super(MzMLSerializer, self).__init__() self.handle = handle self.writer = writer.MzMLWriter(handle) self.n_spectra = n_spectra self.compression = compression self.data_encoding = data_encoding self._has_started_writing_spectra = False self.writer.__enter__() self._run_tag = None self._spectrum_list_tag = None self._chromatogram_list_tag = None self.writer.controlled_vocabularies() self.deconvoluted = deconvoluted self._initialize_description_lists() self._init_sample(sample_name) self.total_ion_chromatogram_tracker = OrderedDict() self.base_peak_chromatogram_tracker = OrderedDict() self.chromatogram_queue = [] self.indexer = None if build_extra_index: self.indexer = ExtendedScanIndex() def _init_sample(self, sample_name, **kwargs): self.sample_name = sample_name self.sample_run = SampleRun(name=sample_name, uuid=str(uuid4())) self.add_sample({ "name": self.sample_run.name, "id": "sample_1", "params": [ {"name": "SampleRun-UUID", "value": self.sample_run.uuid}, ]}) def _initialize_description_lists(self): self.file_contents_list = [] self.software_list = [] self.source_file_list = [] self.data_processing_list = [] self.instrument_configuration_list = [] self.sample_list = [] self.processing_parameters = [] def add_instrument_configuration(self, configuration): """Add an :class:`~.InstrumentInformation` object to the output document. Parameters ---------- configuration: :class:`~.InstrumentInformation` The instrument configuration to add """ component_list = [] for group in configuration.groups: tag = None if group.type == 'source': tag = self.writer.Source elif group.type == 'analyzer': tag = self.writer.Analyzer elif group.type == 'detector': tag = self.writer.Detector else: continue component_list.append( tag(order=group.order, params=[g.name for g in group])) config_element = self.writer.InstrumentConfiguration( configuration.id, component_list) self.instrument_configuration_list.append(config_element) def add_software(self, software_description): """Add a :class:`~.Software` object to the output document. Parameters ---------- software_description : :class:`~.Software` The software description to add """ self.software_list.append(software_description) def add_file_information(self, file_information): '''Add the information of a :class:`~.FileInformation` to the output document. Parameters ---------- file_information: :class:`~.FileInformation` The information to add. ''' for key, value in file_information.contents.items(): if value is None: value = '' self.add_file_contents({str(key): value}) for source_file in file_information.source_files: self.add_source_file(source_file) def add_file_contents(self, file_contents): """Add a key to the resulting :obj:`<fileDescription>` of the output document. Parameters ---------- file_contents: :class:`str` or :class:`Mapping` The parameter to add """ self.file_contents_list.append(file_contents) def remove_file_contents(self, name): """Remove a key to the resulting :obj:`<fileDescription>` of the output document. Parameters ---------- file_contents: :class:`str` or :class:`Mapping` The parameter to remove Raises ------ KeyError: When the content is not found. """ i = None for i, content in enumerate(self.file_contents_list): if isinstance(content, Mapping): if 'name' in content: content = content['name'] elif len(content) == 1: content = list(content.keys())[0] else: continue if content == name: break else: raise KeyError(name) if i is None: raise KeyError(name) self.file_contents_list.pop(i) def add_source_file(self, source_file): """Add the :class:`~.SourceFile` to the output document Parameters ---------- source_file : :class:`~.SourceFile` The source fil to add """ unwrapped = { "name": source_file.name, "location": source_file.location, "id": source_file.id, "params": [] } unwrapped['params'].extend([(getattr(key, 'accession', str(key)), value) for key, value in source_file.parameters.items()]) if source_file.id_format: unwrapped['params'].append(str(source_file.id_format)) if source_file.file_format: unwrapped['params'].append(str(source_file.file_format)) self.source_file_list.append(unwrapped) def add_data_processing(self, data_processing_description): """Add a new :class:`~.DataProcessingInformation` or :class:`~ProcessingMethod` to the output document as a new :obj:`<dataProcessing>` entry describing one or more :obj:`<processingMethod>`s for a single referenced :class:`~.Software` instance. Parameters ---------- data_processing_description : :class:`~.DataProcessingInformation` or :class:`~.ProcessingMethod` Data manipulation sequence to add to the document """ if isinstance(data_processing_description, data_transformation.DataProcessingInformation): methods = [] for method in data_processing_description: content = [] for op, val in method: content.append({'name': op.name, 'value': val}) method_descr = { 'software_reference': method.software_id, 'order': method.order, 'params': content } methods.append(method_descr) payload = { 'id': data_processing_description.id, 'processing_methods': methods } self.data_processing_list.append(payload) elif isinstance(data_processing_description, data_transformation.ProcessingMethod): content = [] for op, val in data_processing_description: content.append({"name": op.name, 'value': val}) payload = { 'id': "data_processing_%d" % len(self.data_processing_list), 'processing_methods': [{ 'software_reference': data_processing_description.software_id, 'order': data_processing_description.order, 'params': content }] } self.data_processing_list.append(payload) else: self.data_processing_list.append(data_processing_description) def add_processing_parameter(self, name, value=None): """Add a new processing method to the writer's own :obj:`<dataProcessing>` element. Parameters ---------- name : str The processing technique's name value : obj The processing technique's value, if any """ self.processing_parameters.append({"name": name, "value": value}) def add_sample(self, sample): self.sample_list.append(sample) def copy_metadata_from(self, reader): """Copies the file-level metadata from an instance of :class:`~.ScanFileMetadataBase` into the metadata of the file to be written Parameters ---------- reader : :class:`~.ScanFileMetadataBase` The source to copy metadata from """ try: description = reader.file_description() self.add_file_information(description) except AttributeError: pass try: instrument_configs = reader.instrument_configuration() except AttributeError: instrument_configs = [] for config in instrument_configs: self.add_instrument_configuration(config) try: software_list = reader.software_list() except AttributeError: software_list = [] for software in software_list: self.add_software(software) try: data_processing_list = reader.data_processing() except AttributeError: data_processing_list = [] for data_processing_ in data_processing_list: self.add_data_processing(data_processing_) def _create_file_description(self): self.writer.file_description( self.file_contents_list, self.source_file_list) def _create_software_list(self): software_list = [] ms_deisotope_entries = [] for sw in self.software_list: d = { 'id': sw.id, 'version': sw.version } if sw.is_name(sw.name): d[sw.name] = '' else: d['MS:1000799'] = sw.name d['params'] = list(sw.options.items()) if 'ms_deisotope' in str(sw.id): ms_deisotope_entries.append(str(sw.id)) software_list.append(d) for i in range(1, 100): query = 'ms_deisotope_%d' % i if query in ms_deisotope_entries: continue else: new_entry_id = query break else: new_entry_id = 'ms_deisotope_%s' % str(uuid4()) software_list.append({ "id": new_entry_id, 'version': lib_version, 'ms_deisotope': "", }) self.writer.software_list(software_list) def _create_sample_list(self): self.writer.sample_list(self.sample_list) def build_processing_method(self, order=1, picked_peaks=True, smoothing=True, baseline_reduction=True, additional_parameters=tuple(), software_id=None, data_processing_id=None): if software_id is None: software_id = "ms_deisotope_1" if data_processing_id is None: data_processing_id = 'ms_deisotope_processing_%d' % len( self.data_processing_list) method = data_transformation.ProcessingMethod(software_id=software_id) if self.deconvoluted: method.add("deisotoping") method.add("charge deconvolution") method.add("precursor recalculation") if picked_peaks: method.add("peak picking") if smoothing: method.add("smoothing") if baseline_reduction: method.add("baseline reduction") method.add("Conversion to mzML") method.update(additional_parameters) method.update(self.processing_parameters) method.order = order data_processing_info = data_transformation.DataProcessingInformation( [method], data_processing_id) # self.add_data_processing(data_processing_info) return data_processing_info def _create_data_processing_list(self): self.writer.data_processing_list(self.data_processing_list) def _create_instrument_configuration(self): self.writer.instrument_configuration_list( self.instrument_configuration_list) def _add_spectrum_list(self): self._create_file_description() self._create_sample_list() self._create_software_list() self._create_instrument_configuration() self._create_data_processing_list() self._run_tag = self.writer.run( id=self.sample_name or 1, sample='sample_1') self._run_tag.__enter__() self._spectrum_list_tag = self.writer.spectrum_list( count=self.n_spectra) self._spectrum_list_tag.__enter__() def has_started_writing_spectra(self): return self._has_started_writing_spectra def _pack_activation(self, activation_information): """Pack :class:`~.ActivationInformation` into a :class:`dict` structure which that :class:`~psims.mzml.writer.MzMLWriter` expects. Parameters ---------- activation_information: :class:`~.ActivationInformation` Returns ------- :class:`dict` """ params = [] params.append({ "name": str(activation_information.method), }) if activation_information.is_multiple_dissociation(): for method in activation_information.methods[1:]: params.append({"name": str(method)}) # NOTE: Only correct for CID/HCD spectra with absolute collision energies, but that is all I have # to test with. params.append({ "name": "collision energy", "value": activation_information.energy, "unitName": "electron volt" }) if activation_information.is_multiple_dissociation(): energies = activation_information.energies[1:] supplemental_energy = None if activation_information.has_supplemental_dissociation(): supplemental_energy = energies[-1] energies = energies[:-1] for energy in energies: params.append({ "name": "collision energy", "value": energy, "unitName": "electron volt" }) if supplemental_energy is not None: params.append({ "name": 'supplemental collision energy', "value": supplemental_energy, "unitName": "electron volt" }) for key, val in activation_information.data.items(): arg = { "name": key, "value": val } try: arg['unitName'] = val.unit_info except AttributeError: pass params.append(arg) return params def _pack_precursor_information(self, precursor_information, activation_information=None, isolation_window=None): """Repackage the :class:`~.PrecursorInformation`, :class:`~.ActivationInformation`, and :class:~.IsolationWindow` into the nested :class:`dict` structure that :class:`~psims.mzml.writer.MzMLWriter` expects. Parameters ---------- precursor_information : :class:`~.PrecursorInformation` activation_information : :class:`~.ActivationInformation`, optional isolation_window : :class:`~.IsolationWindow`, optional Returns ------- :class:`dict` """ # If the scan bunch has been fully deconvoluted and it's PrecursorInformation # filled in, its extracted fields will be populated and should be used, otherwise # use the default read values. extracted_neutral_mass = precursor_information.extracted_neutral_mass if (extracted_neutral_mass != 0): package = { "mz": precursor_information.extracted_mz, "intensity": precursor_information.extracted_intensity, "charge": precursor_information.extracted_charge, "scan_id": precursor_information.precursor_scan_id, "params": [ {"ms_deisotope:defaulted": precursor_information.defaulted}, {"ms_deisotope:orphan": precursor_information.orphan} ] } if precursor_information.coisolation: for p in precursor_information.coisolation: package['params'].append({ "name": "ms_deisotope:coisolation", "value": "%f %f %d" % (p.neutral_mass, p.intensity, p.charge) }) else: package = { "mz": precursor_information.mz, "intensity": precursor_information.intensity, "charge": precursor_information.charge, "scan_id": precursor_information.precursor_scan_id } if package['charge'] == ChargeNotProvided: package["charge"] = None if activation_information is not None: package['activation'] = self._pack_activation( activation_information) if isolation_window is not None: package['isolation_window_args'] = { "lower": isolation_window.lower, "target": isolation_window.target, "upper": isolation_window.upper } return package def _prepare_extra_arrays(self, scan): extra_arrays = [] if self.deconvoluted: score_array = [ peak.score for peak in scan.deconvoluted_peak_set ] extra_arrays.append(("deconvolution score array", score_array)) envelope_array = envelopes_to_array( [peak.envelope for peak in scan.deconvoluted_peak_set]) extra_arrays.append(("isotopic envelopes array", envelope_array)) return extra_arrays def _get_annotations(self, scan): skip = {'filter string', 'base peak intensity', 'base peak m/z', 'lowest observed m/z', 'highest observed m/z', 'total ion current', } annotations = [] for key, value in scan.annotations.items(): if key in skip: continue annotations.append({ key: value }) return annotations def save_scan(self, scan, **kwargs): """Write a :class:`~.Scan` to the output document as a collection of related :obj:`<spectrum>` tags. .. note:: If no spectra have been written to the output document yet, this method will call :meth:`_add_spectrum_list` and writes all of the metadata lists out. After this point, no new document-level metadata can be added. Parameters ---------- scan: :class:`~.Scan` The scan to write. deconvoluted: :class:`bool` Whether the scan to write out should include deconvolution information """ if not self._has_started_writing_spectra: self._add_spectrum_list() self._has_started_writing_spectra = True deconvoluted = kwargs.get("deconvoluted", self.deconvoluted) if deconvoluted: centroided = True precursor_peaks = scan.deconvoluted_peak_set elif scan.peak_set: centroided = True precursor_peaks = scan.peak_set else: centroided = False precursor_peaks = scan.arrays polarity = scan.polarity if deconvoluted: charge_array = [p.charge for p in precursor_peaks] else: charge_array = None if centroided: descriptors = SpectrumDescription.from_peak_set(precursor_peaks) mz_array = [p.mz for p in precursor_peaks] intensity_array = [p.intensity for p in precursor_peaks] else: descriptors = SpectrumDescription.from_arrays(precursor_peaks) mz_array = precursor_peaks.mz intensity_array = precursor_peaks.intensity instrument_config = scan.instrument_configuration if instrument_config is None: instrument_config_id = None else: instrument_config_id = instrument_config.id scan_parameters, scan_window_list = self.extract_scan_event_parameters( scan) if scan.precursor_information: precursor_information = self._pack_precursor_information( scan.precursor_information, scan.activation, scan.isolation_window) else: precursor_information = None spectrum_params = [ {"name": "ms level", "value": scan.ms_level}, {"name": "MS1 spectrum"} if scan.ms_level == 1 else {"name": "MSn spectrum"}, ] + list(descriptors) spectrum_params.extend(self._get_annotations(scan)) self.writer.write_spectrum( mz_array, intensity_array, charge_array, id=scan.id, params=spectrum_params, centroided=centroided, polarity=polarity, scan_start_time=scan.scan_time, compression=self.compression, other_arrays=self._prepare_extra_arrays(scan), instrument_configuration_id=instrument_config_id, precursor_information=precursor_information, scan_params=scan_parameters, scan_window_list=scan_window_list, encoding=self.data_encoding) self.total_ion_chromatogram_tracker[ scan.scan_time] = (descriptors["total ion current"]) self.base_peak_chromatogram_tracker[ scan.scan_time] = (descriptors["base peak intensity"]) def save_scan_bunch(self, bunch, **kwargs): """Write a :class:`~.ScanBunch` to the output document as a collection of related :obj:`<spectrum>` tags. .. note:: If no spectra have been written to the output document yet, this method will call :meth:`_add_spectrum_list` and writes all of the metadata lists out. After this point, no new document-level metadata can be added. Parameters ---------- bunch : :class:`~.ScanBunch` The scan set to write. """ if bunch.precursor is not None: self.save_scan(bunch.precursor) for prod in bunch.products: self.save_scan(prod) if self.indexer is not None: self.indexer.add_scan_bunch(bunch) def extract_scan_event_parameters(self, scan): """Package :class:`~.ScanAcquisitionInformation` into a pair of :class:`list`s that :class:`~psims.mzml.writer.MzMLWriter` expects. Parameters ---------- scan : :class:`~.Scan` Returns ------- scan_parameters: :class:`list` Parameters qualifying the scan event (:class:`dict`) scan_window_list: :class:`list` Packed pairs of scan windows (:class:`list`) """ scan_parameters = [] scan_window_list = [] acquisition_info = scan.acquisition_information filter_string = scan.annotations.get("filter_string") if filter_string is not None: scan_parameters.append({"name": "filter string", "value": filter_string}) if acquisition_info is not None and len(acquisition_info) > 0: scan_event = acquisition_info[0] if scan_event.has_ion_mobility(): scan_parameters.append({ "name": "ion mobility drift time", "value": scan_event.drift_time, "unit_name": "millisecond", 'unit_cv_ref': "UO", "unit_accession": 'UO:0000028' }) if scan_event.injection_time is not None: scan_parameters.append({ "accession": 'MS:1000927', "value": scan_event.injection_time, "unit_name": getattr(scan_event.injection_time, 'unit_info', None), }) traits = scan_event.traits.items() for name, value in traits: param = {"name": name, "value": value, 'unit_name': getattr(value, 'unit_info', None)} scan_parameters.append(param) scan_window_list = list(scan_event) return scan_parameters, scan_window_list def save_chromatogram(self, chromatogram_dict, chromatogram_type, params=None, **kwargs): time_array, intensity_array = zip(*chromatogram_dict.items()) self.writer.write_chromatogram( time_array, intensity_array, id=kwargs.get('id'), chromatogram_type=chromatogram_type, compression=self.compression, params=params) def _make_default_chromatograms(self): d = dict( chromatogram=self.total_ion_chromatogram_tracker, chromatogram_type='total ion current chromatogram', id='TIC') if len(self.total_ion_chromatogram_tracker) > 0: self.chromatogram_queue.append(d) d = dict( chromatogram=self.base_peak_chromatogram_tracker, chromatogram_type="basepeak chromatogram", id='BPC') if len(self.base_peak_chromatogram_tracker) > 0: self.chromatogram_queue.append(d) def write_chromatograms(self): self._chromatogram_list_tag = self.writer.chromatogram_list( count=len(self.chromatogram_queue)) with self._chromatogram_list_tag: for chromatogram in self.chromatogram_queue: self.save_chromatogram( chromatogram.pop("chromatogram"), **chromatogram) def complete(self): """Finish writing to the output document. This closes the open list tags, empties the chromatogram accumulator, and closes the :obj:`<mzML>` tag, and attempts to flush the output file. """ if self._spectrum_list_tag is not None: self._spectrum_list_tag.__exit__(None, None, None) if self._run_tag is not None: self._make_default_chromatograms() self.write_chromatograms() if self._run_tag is not None: self._run_tag.__exit__(None, None, None) self.writer.__exit__(None, None, None) if self.indexer is not None: try: name = self.handle.name except AttributeError: name = "_detatched_mzml_index" try: with open(ExtendedScanIndex.index_file_name(name), 'w') as ixfile: self.indexer.serialize(ixfile) except IOError as e: warnings.warn( "Could not write extended index file due to error %r" % (e,)) try: self.writer.outfile.flush() except (IOError, AttributeError, ValueError): pass def format(self): """This method is no longer needed. """ pass def close(self): self.complete() if hasattr(self.handle, "closed"): if not self.handle.closed: try: self.handle.close() except AttributeError: pass else: try: self.handle.close() except (AttributeError, ValueError, TypeError, OSError): pass
def read_index_file(self, index_path=None): if index_path is None: index_path = self._index_file_name with open(index_path) as handle: self.extended_index = ExtendedScanIndex.deserialize(handle)
class MzMLScanSerializer(ScanSerializerBase): def __init__(self, handle, n_spectra=2e4, compression=writer.COMPRESSION_ZLIB, deconvoluted=True, sample_name=None, build_extra_index=True): self.handle = handle self.writer = writer.MzMLWriter(handle) self.n_spectra = n_spectra self.compression = compression self._has_started_writing_spectra = False self.writer.__enter__() self._run_tag = None self._spectrum_list_tag = None self._chromatogram_list_tag = None self.writer.controlled_vocabularies() self.deconvoluted = deconvoluted self.sample_name = sample_name self.file_contents_list = [] self.software_list = [] self.source_file_list = [] self.data_processing_list = [] self.instrument_configuration_list = [] self.sample_list = [] self.processing_parameters = [] self.total_ion_chromatogram_tracker = OrderedDict() self.base_peak_chromatogram_tracker = OrderedDict() self.sample_run = SampleRun(name=sample_name, uuid=str(uuid4())) self.add_sample({ "name": sample_name, "id": "sample_1", "params": [ {"name": "SampleRun-UUID", "value": self.sample_run.uuid}, ]}) self.chromatogram_queue = [] self.indexer = None if build_extra_index: self.indexer = ExtendedScanIndex() def add_software(self, software_description): self.software_list.append(software_description) def add_file_contents(self, file_contents): self.file_contents_list.append(file_contents) def add_source_file(self, source_file_description): self.source_file_list.append(source_file_description) def add_data_processing(self, data_processing_description): self.data_processing_list.append(data_processing_description) def add_processing_parameter(self, name, value): self.processing_parameters.append({"name": name, "value": value}) def add_instrument_configuration(self, instrument_description): self.instrument_configuration_list.append(instrument_description) def add_sample(self, sample): self.sample_list.append(sample) def _create_file_description(self): self.writer.file_description( self.file_contents_list, self.source_file_list) def _create_software_list(self): self.writer.software_list([{ "id": "ms_deisotope_1", "name": "ms_deisotope" }]) def _create_sample_list(self): self.writer.sample_list(self.sample_list) def _build_processing_method(self, order=1, picked_peaks=True, smoothing=True, baseline_reduction=True, additional_parameters=tuple()): if self.deconvoluted: params = [ "deisotoping", "charge deconvolution", "precursor recalculation", ] else: params = [] if picked_peaks: params.append("peak picking") if smoothing: params.append("smoothing") if baseline_reduction: params.append("baseline reduction") params.append("Conversion to mzML") params.extend(additional_parameters) mapping = { "software_reference": "ms_deisotope_1", "order": order, "params": params } return mapping def _create_data_processing_list(self): n = len(self.data_processing_list) - 1 entry = { "id": "ms_deisotope_processing_1", "processing_methods": [self._build_processing_method( n, additional_parameters=self.processing_parameters)] } self.add_data_processing(entry) self.writer.data_processing_list(self.data_processing_list) def _create_instrument_configuration(self): self.writer.instrument_configuration_list( self.instrument_configuration_list) def _add_spectrum_list(self): self._create_file_description() self._create_software_list() self._create_instrument_configuration() self._create_data_processing_list() self._create_sample_list() self._run_tag = self.writer.run( id=self.sample_name, sample='sample_1') self._run_tag.__enter__() self._spectrum_list_tag = self.writer.spectrum_list( count=self.n_spectra) self._spectrum_list_tag.__enter__() def _pack_activation(self, activation_information): params = [] params.append({ "name": str(activation_information.method), }) # NOTE: Only correct for CID/HCD spectra with absolute collision energies, but that is all I have # to test with. params.append({ "name": "collision energy", "value": activation_information.energy, "unitName": "electron volts" }) for key, val in activation_information.data.items(): arg = { "name": key, "value": val } try: arg['unitName'] = val.unit_info except AttributeError: pass params.append(arg) return params def _pack_precursor_information(self, precursor_information, activation_information=None): # If the scan bunch has been fully deconvoluted and it's PrecursorInformation # filled in, its extracted fields will be populated and should be used, otherwise # use the default read values. if precursor_information.extracted_neutral_mass != 0: package = { "mz": precursor_information.extracted_mz, "intensity": precursor_information.extracted_intensity, "charge": precursor_information.extracted_charge, "scan_id": precursor_information.precursor_scan_id } else: package = { "mz": precursor_information.mz, "intensity": precursor_information.intensity, "charge": precursor_information.charge, "scan_id": precursor_information.precursor_scan_id } if activation_information is not None: package['activation'] = self._pack_activation(activation_information) return package def _prepare_extra_arrays(self, scan): extra_arrays = [] if self.deconvoluted: score_array = [ peak.score for peak in scan.deconvoluted_peak_set ] extra_arrays.append(("deconvolution score array", score_array)) envelope_array = envelopes_to_array([peak.envelope for peak in scan.deconvoluted_peak_set]) extra_arrays.append(("isotopic envelopes array", envelope_array)) return extra_arrays def save_scan_bunch(self, bunch, **kwargs): if not self._has_started_writing_spectra: self._add_spectrum_list() self._has_started_writing_spectra = True if self.deconvoluted: precursor_peaks = bunch.precursor.deconvoluted_peak_set else: precursor_peaks = bunch.precursor.peak_set if len(precursor_peaks) == 0: return polarity = bunch.precursor.polarity if self.deconvoluted: charge_array = [p.charge for p in precursor_peaks] else: charge_array = None descriptors = describe_spectrum(precursor_peaks) self.writer.write_spectrum( [p.mz for p in precursor_peaks], [p.intensity for p in precursor_peaks], charge_array, id=bunch.precursor.id, params=[ {"name": "ms level", "value": bunch.precursor.ms_level}, {"name": "MS1 spectrum"}] + descriptors, polarity=polarity, scan_start_time=bunch.precursor.scan_time, compression=self.compression, other_arrays=self._prepare_extra_arrays(bunch.precursor)) self.total_ion_chromatogram_tracker[ bunch.precursor.scan_time] = _total_intensity_from_descriptors(descriptors) self.base_peak_chromatogram_tracker[ bunch.precursor.scan_time] = _base_peak_from_descriptors(descriptors) for prod in bunch.products: if self.deconvoluted: product_peaks = prod.deconvoluted_peak_set else: product_peaks = prod.peak_set if len(product_peaks) == 0: continue descriptors = describe_spectrum(product_peaks) self.total_ion_chromatogram_tracker[ prod.scan_time] = _total_intensity_from_descriptors(descriptors) self.base_peak_chromatogram_tracker[ prod.scan_time] = _base_peak_from_descriptors(descriptors) if self.deconvoluted: charge_array = [p.charge for p in product_peaks] else: charge_array = None self.writer.write_spectrum( [p.mz for p in product_peaks], [p.intensity for p in product_peaks], charge_array, id=prod.id, params=[ {"name": "ms level", "value": prod.ms_level}, {"name": "MSn spectrum"}] + descriptors, polarity=prod.polarity, scan_start_time=prod.scan_time, precursor_information=self._pack_precursor_information( prod.precursor_information, prod.activation), compression=self.compression, other_arrays=self._prepare_extra_arrays(prod)) if self.indexer is not None: self.indexer.add_scan_bunch(bunch) def save_chromatogram(self, chromatogram_dict, chromatogram_type, params=None, **kwargs): time_array, intensity_array = zip(*chromatogram_dict.items()) self.writer.write_chromatogram( time_array, intensity_array, id=kwargs.get('id'), chromatogram_type=chromatogram_type, compression=self.compression, params=params) def _make_default_chromatograms(self): d = dict( chromatogram=self.total_ion_chromatogram_tracker, chromatogram_type='total ion current chromatogram', id='TIC') if len(self.total_ion_chromatogram_tracker) > 0: self.chromatogram_queue.append(d) d = dict( chromatogram=self.base_peak_chromatogram_tracker, chromatogram_type="basepeak chromatogram", id='BPC') if len(self.base_peak_chromatogram_tracker) > 0: self.chromatogram_queue.append(d) def write_chromatograms(self): self._chromatogram_list_tag = self.writer.chromatogram_list( count=len(self.chromatogram_queue)) with self._chromatogram_list_tag: for chromatogram in self.chromatogram_queue: self.save_chromatogram( chromatogram.pop("chromatogram"), **chromatogram) def complete(self): self._spectrum_list_tag.__exit__(None, None, None) self._make_default_chromatograms() self.write_chromatograms() self._run_tag.__exit__(None, None, None) self.writer.__exit__(None, None, None) if self.indexer is not None: try: name = self.handle.name except AttributeError: name = "_detatched_mzml_index" try: with open(ExtendedScanIndex.index_file_name(name), 'w') as ixfile: self.indexer.serialize(ixfile) except IOError: pass def format(self): try: self.writer.format() except OSError as e: if on_windows and e.errno == 32: pass
class MzMLScanSerializer(ScanSerializerBase): def __init__(self, handle, n_spectra=2e4, compression=writer.COMPRESSION_ZLIB, deconvoluted=True, sample_name=None, build_extra_index=True): self.handle = handle self.writer = writer.MzMLWriter(handle) self.n_spectra = n_spectra self.compression = compression self._has_started_writing_spectra = False self.writer.__enter__() self._run_tag = None self._spectrum_list_tag = None self._chromatogram_list_tag = None self.writer.controlled_vocabularies() self.deconvoluted = deconvoluted self.sample_name = sample_name self.file_contents_list = [] self.software_list = [] self.source_file_list = [] self.data_processing_list = [] self.instrument_configuration_list = [] self.sample_list = [] self.processing_parameters = [] self.total_ion_chromatogram_tracker = OrderedDict() self.base_peak_chromatogram_tracker = OrderedDict() self.sample_run = SampleRun(name=sample_name, uuid=str(uuid4())) self.add_sample({ "name": sample_name, "id": "sample_1", "params": [ { "name": "SampleRun-UUID", "value": self.sample_run.uuid }, ] }) self.chromatogram_queue = [] self.indexer = None if build_extra_index: self.indexer = ExtendedScanIndex() def add_software(self, software_description): self.software_list.append(software_description) def add_file_contents(self, file_contents): self.file_contents_list.append(file_contents) def add_source_file(self, source_file_description): self.source_file_list.append(source_file_description) def add_data_processing(self, data_processing_description): self.data_processing_list.append(data_processing_description) def add_processing_parameter(self, name, value): self.processing_parameters.append({"name": name, "value": value}) def add_instrument_configuration(self, instrument_description): self.instrument_configuration_list.append(instrument_description) def add_sample(self, sample): self.sample_list.append(sample) def _create_file_description(self): self.writer.file_description(self.file_contents_list, self.source_file_list) def _create_software_list(self): self.writer.software_list([{ "id": "ms_deisotope_1", "name": "ms_deisotope" }]) def _create_sample_list(self): self.writer.sample_list(self.sample_list) def _build_processing_method(self, order=1, picked_peaks=True, smoothing=True, baseline_reduction=True, additional_parameters=tuple()): if self.deconvoluted: params = [ "deisotoping", "charge deconvolution", "precursor recalculation", ] else: params = [] if picked_peaks: params.append("peak picking") if smoothing: params.append("smoothing") if baseline_reduction: params.append("baseline reduction") params.append("Conversion to mzML") params.extend(additional_parameters) mapping = { "software_reference": "ms_deisotope_1", "order": order, "params": params } return mapping def _create_data_processing_list(self): n = len(self.data_processing_list) - 1 entry = { "id": "ms_deisotope_processing_1", "processing_methods": [ self._build_processing_method( n, additional_parameters=self.processing_parameters) ] } self.add_data_processing(entry) self.writer.data_processing_list(self.data_processing_list) def _create_instrument_configuration(self): self.writer.instrument_configuration_list( self.instrument_configuration_list) def _add_spectrum_list(self): self._create_file_description() self._create_software_list() self._create_instrument_configuration() self._create_data_processing_list() self._create_sample_list() self._run_tag = self.writer.run(id=self.sample_name, sample='sample_1') self._run_tag.__enter__() self._spectrum_list_tag = self.writer.spectrum_list( count=self.n_spectra) self._spectrum_list_tag.__enter__() def _pack_activation(self, activation_information): params = [] params.append({ "name": str(activation_information.method), }) # NOTE: Only correct for CID/HCD spectra with absolute collision energies, but that is all I have # to test with. params.append({ "name": "collision energy", "value": activation_information.energy, "unitName": "electron volts" }) for key, val in activation_information.data.items(): arg = {"name": key, "value": val} try: arg['unitName'] = val.unit_info except AttributeError: pass params.append(arg) return params def _pack_precursor_information(self, precursor_information, activation_information=None): # If the scan bunch has been fully deconvoluted and it's PrecursorInformation # filled in, its extracted fields will be populated and should be used, otherwise # use the default read values. if precursor_information.extracted_neutral_mass != 0: package = { "mz": precursor_information.extracted_mz, "intensity": precursor_information.extracted_intensity, "charge": precursor_information.extracted_charge, "scan_id": precursor_information.precursor_scan_id } else: package = { "mz": precursor_information.mz, "intensity": precursor_information.intensity, "charge": precursor_information.charge, "scan_id": precursor_information.precursor_scan_id } if activation_information is not None: package['activation'] = self._pack_activation( activation_information) return package def _prepare_extra_arrays(self, scan): extra_arrays = [] if self.deconvoluted: score_array = [peak.score for peak in scan.deconvoluted_peak_set] extra_arrays.append(("deconvolution score array", score_array)) envelope_array = envelopes_to_array( [peak.envelope for peak in scan.deconvoluted_peak_set]) extra_arrays.append(("isotopic envelopes array", envelope_array)) return extra_arrays def save_scan_bunch(self, bunch, **kwargs): if not self._has_started_writing_spectra: self._add_spectrum_list() self._has_started_writing_spectra = True if self.deconvoluted: precursor_peaks = bunch.precursor.deconvoluted_peak_set else: precursor_peaks = bunch.precursor.peak_set if len(precursor_peaks) == 0: return polarity = bunch.precursor.polarity if self.deconvoluted: charge_array = [p.charge for p in precursor_peaks] else: charge_array = None descriptors = describe_spectrum(precursor_peaks) self.writer.write_spectrum([p.mz for p in precursor_peaks], [p.intensity for p in precursor_peaks], charge_array, id=bunch.precursor.id, params=[{ "name": "ms level", "value": bunch.precursor.ms_level }, { "name": "MS1 spectrum" }] + descriptors, polarity=polarity, scan_start_time=bunch.precursor.scan_time, compression=self.compression, other_arrays=self._prepare_extra_arrays( bunch.precursor)) self.total_ion_chromatogram_tracker[ bunch.precursor.scan_time] = _total_intensity_from_descriptors( descriptors) self.base_peak_chromatogram_tracker[ bunch.precursor.scan_time] = _base_peak_from_descriptors( descriptors) for prod in bunch.products: if self.deconvoluted: product_peaks = prod.deconvoluted_peak_set else: product_peaks = prod.peak_set if len(product_peaks) == 0: continue descriptors = describe_spectrum(product_peaks) self.total_ion_chromatogram_tracker[ prod.scan_time] = _total_intensity_from_descriptors( descriptors) self.base_peak_chromatogram_tracker[ prod.scan_time] = _base_peak_from_descriptors(descriptors) if self.deconvoluted: charge_array = [p.charge for p in product_peaks] else: charge_array = None self.writer.write_spectrum( [p.mz for p in product_peaks], [p.intensity for p in product_peaks], charge_array, id=prod.id, params=[{ "name": "ms level", "value": prod.ms_level }, { "name": "MSn spectrum" }] + descriptors, polarity=prod.polarity, scan_start_time=prod.scan_time, precursor_information=self._pack_precursor_information( prod.precursor_information, prod.activation), compression=self.compression, other_arrays=self._prepare_extra_arrays(prod)) if self.indexer is not None: self.indexer.add_scan_bunch(bunch) def save_chromatogram(self, chromatogram_dict, chromatogram_type, params=None, **kwargs): time_array, intensity_array = zip(*chromatogram_dict.items()) self.writer.write_chromatogram(time_array, intensity_array, id=kwargs.get('id'), chromatogram_type=chromatogram_type, compression=self.compression, params=params) def _make_default_chromatograms(self): d = dict(chromatogram=self.total_ion_chromatogram_tracker, chromatogram_type='total ion current chromatogram', id='TIC') if len(self.total_ion_chromatogram_tracker) > 0: self.chromatogram_queue.append(d) d = dict(chromatogram=self.base_peak_chromatogram_tracker, chromatogram_type="basepeak chromatogram", id='BPC') if len(self.base_peak_chromatogram_tracker) > 0: self.chromatogram_queue.append(d) def write_chromatograms(self): self._chromatogram_list_tag = self.writer.chromatogram_list( count=len(self.chromatogram_queue)) with self._chromatogram_list_tag: for chromatogram in self.chromatogram_queue: self.save_chromatogram(chromatogram.pop("chromatogram"), **chromatogram) def complete(self): self._spectrum_list_tag.__exit__(None, None, None) self._make_default_chromatograms() self.write_chromatograms() self._run_tag.__exit__(None, None, None) self.writer.__exit__(None, None, None) if self.indexer is not None: try: name = self.handle.name except AttributeError: name = "_detatched_mzml_index" try: with open(ExtendedScanIndex.index_file_name(name), 'w') as ixfile: self.indexer.serialize(ixfile) except IOError: pass def format(self): try: self.writer.format() except OSError as e: if on_windows and e.errno == 32: pass
path = key_index[key] reader, lock = reader_index[path] values = request.values print(values) with lock: scan = reader.get_scan_by_id(scan_id) response = format_scan(scan, values) return response if __name__ == "__main__": import sys for i, path in enumerate(sys.argv[1:]): print("Loading {0} with Key {1}".format(path, i)) reader = MSFileLoader(path) index_path = ExtendedScanIndex.index_file_name(path) if os.path.exists(index_path): file_index = ExtendedScanIndex.load(open(index_path, 'rt')) else: print("Indexing {0}".format(path)) reader.reset() file_index, scan_tree = quick_index.index(reader) reader.reset() with open(index_path, 'wt') as fh: file_index.dump(fh) print(file_index) metadata_index[path] = file_index reader_index[path] = reader, RLock() key_index[str(i)] = path app.run(threaded=True)