Example #1
0
 def _index_file_name(self):
     if isinstance(self.source_file, basestring):
         return ExtendedScanIndex.index_file_name(self.source_file)
     else:
         try:
             return ExtendedScanIndex.index_file_name(self.source_file.name)
         except AttributeError:
             return None
Example #2
0
 def _index_file_name(self):
     if isinstance(self.source_file, basestring):
         return ExtendedScanIndex.index_file_name(self.source_file)
     else:
         try:
             return ExtendedScanIndex.index_file_name(self.source_file.name)
         except AttributeError:
             return None
Example #3
0
    def __init__(self,
                 handle,
                 n_spectra=2e4,
                 compression=writer.COMPRESSION_ZLIB,
                 deconvoluted=True,
                 sample_name=None,
                 build_extra_index=True):
        self.handle = handle
        self.writer = writer.MzMLWriter(handle)
        self.n_spectra = n_spectra
        self.compression = compression
        self._has_started_writing_spectra = False

        self.writer.__enter__()
        self._run_tag = None
        self._spectrum_list_tag = None
        self._chromatogram_list_tag = None

        self.writer.controlled_vocabularies()
        self.deconvoluted = deconvoluted
        self.sample_name = sample_name

        self.file_contents_list = []
        self.software_list = []
        self.source_file_list = []
        self.data_processing_list = []
        self.instrument_configuration_list = []
        self.sample_list = []

        self.processing_parameters = []

        self.total_ion_chromatogram_tracker = OrderedDict()
        self.base_peak_chromatogram_tracker = OrderedDict()

        self.sample_run = SampleRun(name=sample_name, uuid=str(uuid4()))

        self.add_sample({
            "name":
            sample_name,
            "id":
            "sample_1",
            "params": [
                {
                    "name": "SampleRun-UUID",
                    "value": self.sample_run.uuid
                },
            ]
        })

        self.chromatogram_queue = []

        self.indexer = None
        if build_extra_index:
            self.indexer = ExtendedScanIndex()
 def build_extended_index(self, header_only=True):
     self.reset()
     indexer = ExtendedScanIndex()
     iterator = self
     if header_only:
         iterator = self.iter_scan_headers()
     for bunch in iterator:
         indexer.add_scan_bunch(bunch)
     self.reset()
     self.extended_index = indexer
     try:
         with open(self._index_file_name, 'w') as handle:
             indexer.serialize(handle)
     except (IOError, OSError, AttributeError) as err:
         print(err)
         pass
Example #5
0
    def complete(self):
        """Finish writing to the output document.

        This closes the open list tags, empties the chromatogram accumulator,
        and closes the :obj:`<mzML>` tag, and attempts to flush the output file.
        """
        if self._spectrum_list_tag is not None:
            self._spectrum_list_tag.__exit__(None, None, None)
        if self._run_tag is not None:
            self._make_default_chromatograms()
            self.write_chromatograms()
        if self._run_tag is not None:
            self._run_tag.__exit__(None, None, None)
        self.writer.__exit__(None, None, None)
        if self.indexer is not None:
            try:
                name = self.handle.name
            except AttributeError:
                name = "_detatched_mzml_index"
            try:
                with open(ExtendedScanIndex.index_file_name(name), 'w') as ixfile:
                    self.indexer.serialize(ixfile)
            except IOError as e:
                warnings.warn(
                    "Could not write extended index file due to error %r" % (e,))

        try:
            self.writer.outfile.flush()
        except (IOError, AttributeError, ValueError):
            pass
Example #6
0
    def complete(self):
        """Finish writing to the output document.

        This closes the open list tags, empties the chromatogram accumulator,
        and closes the :obj:`<mzML>` tag, and attempts to flush the output file.
        """
        if self._spectrum_list_tag is not None:
            self._spectrum_list_tag.__exit__(None, None, None)
        if self._run_tag is not None:
            self._make_default_chromatograms()
            self.write_chromatograms()
        if self._run_tag is not None:
            self._run_tag.__exit__(None, None, None)
        self.writer.__exit__(None, None, None)
        if self.indexer is not None:
            try:
                name = self.handle.name
            except AttributeError:
                name = "_detatched_mzml_index"
            try:
                with open(ExtendedScanIndex.index_file_name(name),
                          'w') as ixfile:
                    self.indexer.serialize(ixfile)
            except IOError as e:
                warnings.warn(
                    "Could not write extended index file due to error %r" %
                    (e, ))

        try:
            self.writer.outfile.flush()
        except (IOError, AttributeError, ValueError):
            pass
Example #7
0
def load_index_file(path):
    try:
        index = metadata_index[path]
    except KeyError:
        with open(path, 'rt') as fh:
            index = ExtendedScanIndex.load(fh)
        metadata_index[path] = index
        return index
Example #8
0
    def __init__(self,
                 handle,
                 n_spectra=2e4,
                 compression=None,
                 deconvoluted=True,
                 sample_name=None,
                 build_extra_index=True,
                 data_encoding=None):
        if data_encoding is None:
            data_encoding = {
                writer.MZ_ARRAY: np.float64,
                writer.INTENSITY_ARRAY: np.float32,
                writer.CHARGE_ARRAY: np.int32,
            }
        if writer is None:
            raise ImportError(
                "Cannot write mzML without psims. Please install psims to use this feature."
            )
        if compression is None:
            compression = writer.COMPRESSION_ZLIB
        self.handle = handle
        self.writer = writer.MzMLWriter(handle)
        self.n_spectra = n_spectra
        self.compression = compression
        self.data_encoding = data_encoding
        self._has_started_writing_spectra = False

        self.writer.__enter__()
        self._run_tag = None
        self._spectrum_list_tag = None
        self._chromatogram_list_tag = None

        self.writer.controlled_vocabularies()
        self.deconvoluted = deconvoluted

        self._initialize_description_lists()
        self._init_sample(sample_name)

        self.total_ion_chromatogram_tracker = OrderedDict()
        self.base_peak_chromatogram_tracker = OrderedDict()
        self.chromatogram_queue = []

        self.indexer = None
        if build_extra_index:
            self.indexer = ExtendedScanIndex()
 def complete(self):
     self._spectrum_list_tag.__exit__(None, None, None)
     self._make_default_chromatograms()
     self.write_chromatograms()
     self._run_tag.__exit__(None, None, None)
     self.writer.__exit__(None, None, None)
     if self.indexer is not None:
         try:
             name = self.handle.name
         except AttributeError:
             name = "_detatched_mzml_index"
         try:
             with open(ExtendedScanIndex.index_file_name(name), 'w') as ixfile:
                 self.indexer.serialize(ixfile)
         except IOError:
             pass
Example #10
0
 def complete(self):
     self._spectrum_list_tag.__exit__(None, None, None)
     self._make_default_chromatograms()
     self.write_chromatograms()
     self._run_tag.__exit__(None, None, None)
     self.writer.__exit__(None, None, None)
     if self.indexer is not None:
         try:
             name = self.handle.name
         except AttributeError:
             name = "_detatched_mzml_index"
         try:
             with open(ExtendedScanIndex.index_file_name(name),
                       'w') as ixfile:
                 self.indexer.serialize(ixfile)
         except IOError:
             pass
Example #11
0
    def build_extended_index(self, header_only=True):
        self.reset()
        indexer = ExtendedScanIndex()
        iterator = self
        if header_only:
            iterator = self.iter_scan_headers()
        if self._has_ms1_scans():
            for bunch in iterator:
                indexer.add_scan_bunch(bunch)
        else:
            for scan in iterator:
                indexer.add_scan(scan)

        self.reset()
        self.extended_index = indexer
        try:
            with open(self._index_file_name, 'w') as handle:
                indexer.serialize(handle)
        except (IOError, OSError, AttributeError, TypeError) as err:
            print(err)
Example #12
0
    def __init__(self, handle, n_spectra=2e4, compression=writer.COMPRESSION_ZLIB,
                 deconvoluted=True, sample_name=None, build_extra_index=True):
        self.handle = handle
        self.writer = writer.MzMLWriter(handle)
        self.n_spectra = n_spectra
        self.compression = compression
        self._has_started_writing_spectra = False

        self.writer.__enter__()
        self._run_tag = None
        self._spectrum_list_tag = None
        self._chromatogram_list_tag = None

        self.writer.controlled_vocabularies()
        self.deconvoluted = deconvoluted
        self.sample_name = sample_name

        self.file_contents_list = []
        self.software_list = []
        self.source_file_list = []
        self.data_processing_list = []
        self.instrument_configuration_list = []
        self.sample_list = []

        self.processing_parameters = []

        self.total_ion_chromatogram_tracker = OrderedDict()
        self.base_peak_chromatogram_tracker = OrderedDict()

        self.sample_run = SampleRun(name=sample_name, uuid=str(uuid4()))

        self.add_sample({
            "name": sample_name,
            "id": "sample_1",
            "params": [
                {"name": "SampleRun-UUID", "value": self.sample_run.uuid},
            ]})

        self.chromatogram_queue = []

        self.indexer = None
        if build_extra_index:
            self.indexer = ExtendedScanIndex()
Example #13
0
    def __init__(self, handle, n_spectra=int(2e5), compression=None,
                 deconvoluted=True, sample_name=None, build_extra_index=True,
                 data_encoding=None):
        if data_encoding is None:
            data_encoding = {
                writer.MZ_ARRAY: np.float64,
                writer.INTENSITY_ARRAY: np.float32,
                writer.CHARGE_ARRAY: np.int32,
            }
        if writer is None:
            raise ImportError(
                "Cannot write mzML without psims. Please install psims to use this feature.")
        if compression is None:
            compression = writer.COMPRESSION_ZLIB
        super(MzMLSerializer, self).__init__()
        self.handle = handle
        self.writer = writer.MzMLWriter(handle)
        self.n_spectra = n_spectra
        self.compression = compression
        self.data_encoding = data_encoding
        self._has_started_writing_spectra = False

        self.writer.__enter__()
        self._run_tag = None
        self._spectrum_list_tag = None
        self._chromatogram_list_tag = None

        self.writer.controlled_vocabularies()
        self.deconvoluted = deconvoluted

        self._initialize_description_lists()
        self._init_sample(sample_name)

        self.total_ion_chromatogram_tracker = OrderedDict()
        self.base_peak_chromatogram_tracker = OrderedDict()
        self.chromatogram_queue = []

        self.indexer = None
        if build_extra_index:
            self.indexer = ExtendedScanIndex()
Example #14
0
 def _index_file_name(self):
     return ExtendedScanIndex.index_file_name(self.source_file)
Example #15
0
 def read_index_file(self):
     with open(self._index_file_name) as handle:
         self.extended_index = ExtendedScanIndex.deserialize(handle)
Example #16
0
class MzMLSerializer(ScanSerializerBase):
    """Write :mod:`ms_deisotope` data structures to a file in mzML format.

    Attributes
    ----------
    base_peak_chromatogram_tracker : :class:`OrderedDict`
        Accumulated mapping of scan time to base peak intensity. This is
        used to write the *base peak chromatogram*.
    chromatogram_queue : :class:`list`
        Accumulate chromatogram data structures which will be written out
        after all spectra have been written to file.
    compression : :class:`str`
        The compression type to use for binary data arrays. Should be one of
        :obj:`"zlib"`, :obj:`"none"`, or :obj:`None`
    data_encoding : :class:`dict` or :class:`int` or :obj:`numpy.dtype` or :class:`str`
        The encoding specification to specify the binary encoding of numeric data arrays
        that is passed to :meth:`~.MzMLWriter.write_spectrum` and related methods.
    data_processing_list : :class:`list`
        List of packaged :class:`~.DataProcessingInformation` to write out
    deconvoluted : bool
        Indicates whether the translation should include extra deconvolution information
    file_contents_list : :class:`list`
        List of terms to include in the :obj:`<fileContents>` tag
    handle : file-like
        The file-like object being written to
    indexer : :class:`~.ExtendedScanIndex`
        The external index builder
    instrument_configuration_list : :class:`list`
        List of packaged :class:`~.InstrumentInformation` to write out
    n_spectra : int
        The number of spectra to provide a size for in the :obj:`<spectrumList>`
    processing_parameters : :class:`list`
        List of additional terms to include in a newly created :class:`~.DataProcessingInformation`
    sample_list : :class:`list`
        List of :class:`~.SampleRun` objects to write out
    sample_name : :class:`str`
        Default sample name
    sample_run : :class:`~.SampleRun`
        Description
    software_list : :class:`list`
        List of packaged :class:`~.Software` objects to write out
    source_file_list : :class:`list`
        List of packaged :class:`~.SourceFile` objects to write out
    total_ion_chromatogram_tracker : :class:`OrderedDict`
        Accumulated mapping of scan time to total intensity. This is
        used to write the *total ion chromatogram*.
    writer : :class:`~psims.mzml.writer.MzMLWriter`
        The lower level writer implementation
    """
    def __init__(self,
                 handle,
                 n_spectra=2e4,
                 compression=None,
                 deconvoluted=True,
                 sample_name=None,
                 build_extra_index=True,
                 data_encoding=None):
        if data_encoding is None:
            data_encoding = {
                writer.MZ_ARRAY: np.float64,
                writer.INTENSITY_ARRAY: np.float32,
                writer.CHARGE_ARRAY: np.int32,
            }
        if writer is None:
            raise ImportError(
                "Cannot write mzML without psims. Please install psims to use this feature."
            )
        if compression is None:
            compression = writer.COMPRESSION_ZLIB
        self.handle = handle
        self.writer = writer.MzMLWriter(handle)
        self.n_spectra = n_spectra
        self.compression = compression
        self.data_encoding = data_encoding
        self._has_started_writing_spectra = False

        self.writer.__enter__()
        self._run_tag = None
        self._spectrum_list_tag = None
        self._chromatogram_list_tag = None

        self.writer.controlled_vocabularies()
        self.deconvoluted = deconvoluted

        self._initialize_description_lists()
        self._init_sample(sample_name)

        self.total_ion_chromatogram_tracker = OrderedDict()
        self.base_peak_chromatogram_tracker = OrderedDict()
        self.chromatogram_queue = []

        self.indexer = None
        if build_extra_index:
            self.indexer = ExtendedScanIndex()

    def _init_sample(self, sample_name, **kwargs):
        self.sample_name = sample_name
        self.sample_run = SampleRun(name=sample_name, uuid=str(uuid4()))
        self.add_sample({
            "name":
            self.sample_run.name,
            "id":
            "sample_1",
            "params": [
                {
                    "name": "SampleRun-UUID",
                    "value": self.sample_run.uuid
                },
            ]
        })

    def _initialize_description_lists(self):
        self.file_contents_list = []
        self.software_list = []
        self.source_file_list = []
        self.data_processing_list = []
        self.instrument_configuration_list = []
        self.sample_list = []

        self.processing_parameters = []

    def add_instrument_configuration(self, configuration):
        """Add an :class:`~.InstrumentInformation` object to
        the output document.

        Parameters
        ----------
        configuration: :class:`~.InstrumentInformation`
            The instrument configuration to add
        """
        component_list = []
        for group in configuration.groups:
            tag = None
            if group.type == 'source':
                tag = self.writer.Source
            elif group.type == 'analyzer':
                tag = self.writer.Analyzer
            elif group.type == 'detector':
                tag = self.writer.Detector
            else:
                continue
            component_list.append(
                tag(order=group.order, params=[g.name for g in group]))
        config_element = self.writer.InstrumentConfiguration(
            configuration.id, component_list)
        self.instrument_configuration_list.append(config_element)

    def add_software(self, software_description):
        """Add a :class:`~.Software` object to the output document.

        Parameters
        ----------
        software_description : :class:`~.Software`
            The software description to add
        """
        self.software_list.append(software_description)

    def add_file_information(self, file_information):
        for key, value in file_information.contents.items():
            if value is None:
                value = ''
            self.add_file_contents({str(key): value})
        for source_file in file_information.source_files:
            self.add_source_file(source_file)

    def add_file_contents(self, file_contents):
        """Add a key to the resulting :obj:`<fileDescription>`
        of the output document.

        Parameters
        ----------
        file_contents: :class:`str` or :class:`Mapping`
            The parameter to add
        """
        self.file_contents_list.append(file_contents)

    def remove_file_contents(self, name):
        for i, content in enumerate(self.file_contents_list):
            if isinstance(content, Mapping):
                if 'name' in content:
                    content = content['name']
                elif len(content) == 1:
                    content = list(content.keys())[0]
                else:
                    continue
            if content == name:
                break
        else:
            raise KeyError(name)
        self.file_contents_list.pop(i)

    def add_source_file(self, source_file):
        """Add the :class:`~.SourceFile` to the output document

        Parameters
        ----------
        source_file : :class:`~.SourceFile`
            The source fil to add
        """
        unwrapped = {
            "name": source_file.name,
            "location": source_file.location,
            "id": source_file.id,
            "params": []
        }
        unwrapped['params'].extend([
            (getattr(key, 'accession', str(key)), value)
            for key, value in source_file.parameters.items()
        ])
        if source_file.id_format:
            unwrapped['params'].append(str(source_file.id_format))
        if source_file.file_format:
            unwrapped['params'].append(str(source_file.file_format))
        self.source_file_list.append(unwrapped)

    def add_data_processing(self, data_processing_description):
        """Add a new :class:`~.DataProcessingInformation` or :class:`~ProcessingMethod`
        to the output document as a new :obj:`<dataProcessing>` entry describing one or
        more :obj:`<processingMethod>`s for a single referenced :class:`~.Software`
        instance.

        Parameters
        ----------
        data_processing_description : :class:`~.DataProcessingInformation` or :class:`~.ProcessingMethod`
            Data manipulation sequence to add to the document
        """
        if isinstance(data_processing_description,
                      data_transformation.DataProcessingInformation):
            methods = []
            for method in data_processing_description:
                content = []
                for op, val in method:
                    content.append({'name': op.name, 'value': val})
                method_descr = {
                    'software_reference': method.software_id,
                    'order': method.order,
                    'params': content
                }
                methods.append(method_descr)
            payload = {
                'id': data_processing_description.id,
                'processing_methods': methods
            }
            self.data_processing_list.append(payload)
        elif isinstance(data_processing_description,
                        data_transformation.ProcessingMethod):
            content = []
            for op, val in data_processing_description:
                content.append({"name": op.name, 'value': val})
            payload = {
                'id':
                "data_processing_%d" % len(self.data_processing_list),
                'processing_methods': [{
                    'software_reference':
                    data_processing_description.software_id,
                    'order':
                    data_processing_description.order,
                    'params':
                    content
                }]
            }
            self.data_processing_list.append(payload)
        else:
            self.data_processing_list.append(data_processing_description)

    def add_processing_parameter(self, name, value=None):
        """Add a new processing method to the writer's own
        :obj:`<dataProcessing>` element.

        Parameters
        ----------
        name : str
            The processing technique's name
        value : obj
            The processing technique's value, if any
        """
        self.processing_parameters.append({"name": name, "value": value})

    def add_sample(self, sample):
        self.sample_list.append(sample)

    def copy_metadata_from(self, reader):
        """Copies the file-level metadata from an instance of :class:`~.ScanFileMetadataBase`
        into the metadata of the file to be written

        Parameters
        ----------
        reader : :class:`~.ScanFileMetadataBase`
            The source to copy metadata from
        """
        try:
            description = reader.file_description()
            self.add_file_information(description)
        except AttributeError:
            pass

        try:
            instrument_configs = reader.instrument_configuration()
        except AttributeError:
            instrument_configs = []
        for config in instrument_configs:
            self.add_instrument_configuration(config)

        try:
            software_list = reader.software_list()
        except AttributeError:
            software_list = []
        for software in software_list:
            self.add_software(software)

        try:
            data_processing_list = reader.data_processing()
        except AttributeError:
            data_processing_list = []
        for data_processing_ in data_processing_list:
            self.add_data_processing(data_processing_)

    def _create_file_description(self):
        self.writer.file_description(self.file_contents_list,
                                     self.source_file_list)

    def _create_software_list(self):
        software_list = []
        ms_deisotope_entries = []
        for sw in self.software_list:
            d = {'id': sw.id, 'version': sw.version}
            if sw.is_name(sw.name):
                d[sw.name] = ''
            else:
                d['MS:1000799'] = sw.name
            d['params'] = list(sw.options.items())
            if 'ms_deisotope' in str(sw.id):
                ms_deisotope_entries.append(str(sw.id))
            software_list.append(d)

        for i in range(1, 100):
            query = 'ms_deisotope_%d' % i
            if query in ms_deisotope_entries:
                continue
            else:
                new_entry_id = query
                break
        else:
            new_entry_id = 'ms_deisotope_%s' % str(uuid4())

        software_list.append({
            "id": "ms_deisotope_1",
            'version': lib_version,
            'ms_deisotope': "",
        })
        self.writer.software_list(software_list)

    def _create_sample_list(self):
        self.writer.sample_list(self.sample_list)

    def build_processing_method(self,
                                order=1,
                                picked_peaks=True,
                                smoothing=True,
                                baseline_reduction=True,
                                additional_parameters=tuple(),
                                software_id=None,
                                data_processing_id=None):
        if software_id is None:
            software_id = "ms_deisotope_1"
        if data_processing_id is None:
            data_processing_id = 'ms_deisotope_processing_%d' % len(
                self.data_processing_list)

        method = data_transformation.ProcessingMethod(software_id=software_id)
        if self.deconvoluted:
            method.add("deisotoping")
            method.add("charge deconvolution")
            method.add("precursor recalculation")

        if picked_peaks:
            method.add("peak picking")
        if smoothing:
            method.add("smoothing")
        if baseline_reduction:
            method.add("baseline reduction")

        method.add("Conversion to mzML")
        method.update(additional_parameters)
        method.update(self.processing_parameters)
        method.order = order
        data_processing_info = data_transformation.DataProcessingInformation(
            [method], data_processing_id)
        # self.add_data_processing(data_processing_info)
        return data_processing_info

    def _create_data_processing_list(self):
        self.writer.data_processing_list(self.data_processing_list)

    def _create_instrument_configuration(self):
        self.writer.instrument_configuration_list(
            self.instrument_configuration_list)

    def _add_spectrum_list(self):
        self._create_file_description()
        self._create_sample_list()
        self._create_software_list()
        self._create_instrument_configuration()
        self._create_data_processing_list()

        self._run_tag = self.writer.run(id=self.sample_name or 1,
                                        sample='sample_1')
        self._run_tag.__enter__()
        self._spectrum_list_tag = self.writer.spectrum_list(
            count=self.n_spectra)
        self._spectrum_list_tag.__enter__()

    def has_started_writing_spectra(self):
        return self._has_started_writing_spectra

    def _pack_activation(self, activation_information):
        """Pack :class:`~.ActivationInformation` into a :class:`dict` structure
        which that :class:`~psims.mzml.writer.MzMLWriter` expects.

        Parameters
        ----------
        activation_information: :class:`~.ActivationInformation`

        Returns
        -------
        :class:`dict`
        """
        params = []
        params.append({
            "name": str(activation_information.method),
        })
        if activation_information.is_multiple_dissociation():
            for method in activation_information.methods[1:]:
                params.append({"name": str(method)})
        # NOTE: Only correct for CID/HCD spectra with absolute collision energies, but that is all I have
        # to test with.
        params.append({
            "name": "collision energy",
            "value": activation_information.energy,
            "unitName": "electron volt"
        })
        if activation_information.is_multiple_dissociation():
            energies = activation_information.energies[1:]
            supplemental_energy = None
            if activation_information.has_supplemental_dissociation():
                supplemental_energy = energies[-1]
                energies = energies[:-1]
            for energy in energies:
                params.append({
                    "name": "collision energy",
                    "value": energy,
                    "unitName": "electron volt"
                })
            if supplemental_energy is not None:
                params.append({
                    "name": 'supplemental collision energy',
                    "value": supplemental_energy,
                    "unitName": "electron volt"
                })

        for key, val in activation_information.data.items():
            arg = {"name": key, "value": val}
            try:
                arg['unitName'] = val.unit_info
            except AttributeError:
                pass
            params.append(arg)
        return params

    def _pack_precursor_information(self,
                                    precursor_information,
                                    activation_information=None,
                                    isolation_window=None):
        """Repackage the :class:`~.PrecursorInformation`, :class:`~.ActivationInformation`,
        and :class:~.IsolationWindow` into the nested :class:`dict` structure that
        :class:`~psims.mzml.writer.MzMLWriter` expects.

        Parameters
        ----------
        precursor_information : :class:`~.PrecursorInformation`
        activation_information : :class:`~.ActivationInformation`, optional
        isolation_window : :class:`~.IsolationWindow`, optional

        Returns
        -------
        :class:`dict`
        """
        # If the scan bunch has been fully deconvoluted and it's PrecursorInformation
        # filled in, its extracted fields will be populated and should be used, otherwise
        # use the default read values.
        extracted_neutral_mass = precursor_information.extracted_neutral_mass
        if (extracted_neutral_mass != 0):
            package = {
                "mz":
                precursor_information.extracted_mz,
                "intensity":
                precursor_information.extracted_intensity,
                "charge":
                precursor_information.extracted_charge,
                "scan_id":
                precursor_information.precursor_scan_id,
                "params": [{
                    "ms_deisotope:defaulted":
                    precursor_information.defaulted
                }, {
                    "ms_deisotope:orphan": precursor_information.orphan
                }]
            }
            if precursor_information.coisolation:
                for p in precursor_information.coisolation:
                    package['params'].append({
                        "name":
                        "ms_deisotope:coisolation",
                        "value":
                        "%f %f %d" % (p.neutral_mass, p.intensity, p.charge)
                    })
        else:
            package = {
                "mz": precursor_information.mz,
                "intensity": precursor_information.intensity,
                "charge": precursor_information.charge,
                "scan_id": precursor_information.precursor_scan_id
            }
        if package['charge'] == ChargeNotProvided:
            package["charge"] = None
        if activation_information is not None:
            package['activation'] = self._pack_activation(
                activation_information)
        if isolation_window is not None:
            package['isolation_window_args'] = {
                "lower": isolation_window.lower,
                "target": isolation_window.target,
                "upper": isolation_window.upper
            }
        return package

    def _prepare_extra_arrays(self, scan):
        extra_arrays = []
        if self.deconvoluted:
            score_array = [peak.score for peak in scan.deconvoluted_peak_set]
            extra_arrays.append(("deconvolution score array", score_array))
            envelope_array = envelopes_to_array(
                [peak.envelope for peak in scan.deconvoluted_peak_set])
            extra_arrays.append(("isotopic envelopes array", envelope_array))
        return extra_arrays

    def _get_annotations(self, scan):
        skip = {
            'filter string',
            'base peak intensity',
            'base peak m/z',
            'lowest observed m/z',
            'highest observed m/z',
            'total ion current',
        }
        annotations = []
        for key, value in scan.annotations.items():
            if key in skip:
                continue
            annotations.append({key: value})
        return annotations

    def save_scan(self, scan, **kwargs):
        """Write a :class:`~.Scan` to the output document
        as a collection of related :obj:`<spectrum>` tags.

        .. note::

            If no spectra have been written to the output document
            yet, this method will call :meth:`_add_spectrum_list` and
            writes all of the metadata lists out. After this point,
            no new document-level metadata can be added.

        Parameters
        ----------
        scan: :class:`~.Scan`
            The scan to write.
        deconvoluted: :class:`bool`
            Whether the scan to write out should include deconvolution information
        """
        if not self._has_started_writing_spectra:
            self._add_spectrum_list()
            self._has_started_writing_spectra = True

        deconvoluted = kwargs.get("deconvoluted", self.deconvoluted)
        if deconvoluted:
            centroided = True
            precursor_peaks = scan.deconvoluted_peak_set
        elif scan.peak_set:
            centroided = True
            precursor_peaks = scan.peak_set
        else:
            centroided = False
            precursor_peaks = scan.arrays
        polarity = scan.polarity
        if deconvoluted:
            charge_array = [p.charge for p in precursor_peaks]
        else:
            charge_array = None

        if centroided:
            descriptors = SpectrumDescription.from_peak_set(precursor_peaks)
            mz_array = [p.mz for p in precursor_peaks]
            intensity_array = [p.intensity for p in precursor_peaks]
        else:
            descriptors = SpectrumDescription.from_arrays(precursor_peaks)
            mz_array = precursor_peaks.mz
            intensity_array = precursor_peaks.intensity

        instrument_config = scan.instrument_configuration
        if instrument_config is None:
            instrument_config_id = None
        else:
            instrument_config_id = instrument_config.id

        scan_parameters, scan_window_list = self.extract_scan_event_parameters(
            scan)

        if scan.precursor_information:
            precursor_information = self._pack_precursor_information(
                scan.precursor_information, scan.activation,
                scan.isolation_window)
        else:
            precursor_information = None

        spectrum_params = [
            {
                "name": "ms level",
                "value": scan.ms_level
            },
            {
                "name": "MS1 spectrum"
            } if scan.ms_level == 1 else {
                "name": "MSn spectrum"
            },
        ] + list(descriptors)

        spectrum_params.extend(self._get_annotations(scan))

        self.writer.write_spectrum(
            mz_array,
            intensity_array,
            charge_array,
            id=scan.id,
            params=spectrum_params,
            centroided=centroided,
            polarity=polarity,
            scan_start_time=scan.scan_time,
            compression=self.compression,
            other_arrays=self._prepare_extra_arrays(scan),
            instrument_configuration_id=instrument_config_id,
            precursor_information=precursor_information,
            scan_params=scan_parameters,
            scan_window_list=scan_window_list,
            encoding=self.data_encoding)

        self.total_ion_chromatogram_tracker[scan.scan_time] = (
            descriptors["total ion current"])
        self.base_peak_chromatogram_tracker[scan.scan_time] = (
            descriptors["base peak intensity"])

    def save_scan_bunch(self, bunch, **kwargs):
        """Write a :class:`~.ScanBunch` to the output document
        as a collection of related :obj:`<spectrum>` tags.

        .. note::

            If no spectra have been written to the output document
            yet, this method will call :meth:`_add_spectrum_list` and
            writes all of the metadata lists out. After this point,
            no new document-level metadata can be added.

        Parameters
        ----------
        bunch : :class:`~.ScanBunch`
            The scan set to write.
        """
        if bunch.precursor is not None:
            self.save_scan(bunch.precursor)

        for prod in bunch.products:
            self.save_scan(prod)

        if self.indexer is not None:
            self.indexer.add_scan_bunch(bunch)

    def extract_scan_event_parameters(self, scan):
        """Package :class:`~.ScanAcquisitionInformation` into a pair of
        :class:`list`s that :class:`~psims.mzml.writer.MzMLWriter` expects.

        Parameters
        ----------
        scan : :class:`~.Scan`

        Returns
        -------
        scan_parameters: :class:`list`
            Parameters qualifying the scan event (:class:`dict`)
        scan_window_list: :class:`list`
            Packed pairs of scan windows (:class:`list`)
        """
        scan_parameters = []
        scan_window_list = []
        acquisition_info = scan.acquisition_information
        filter_string = scan.annotations.get("filter_string")
        if filter_string is not None:
            scan_parameters.append({
                "name": "filter string",
                "value": filter_string
            })
        if acquisition_info is not None and len(acquisition_info) > 0:
            scan_event = acquisition_info[0]
            if scan_event.has_ion_mobility():
                scan_parameters.append({
                    "name": "ion mobility drift time",
                    "value": scan_event.drift_time,
                    "unit_name": "millisecond",
                    'unit_cv_ref': "UO",
                    "unit_accession": 'UO:0000028'
                })
            if scan_event.injection_time is not None:
                scan_parameters.append({
                    "accession":
                    'MS:1000927',
                    "value":
                    scan_event.injection_time,
                    "unit_name":
                    getattr(scan_event.injection_time, 'unit_info', None),
                })
            traits = scan_event.traits.items()
            for name, value in traits:
                param = {
                    "name": name,
                    "value": value,
                    'unit_name': getattr(value, 'unit_info', None)
                }
                scan_parameters.append(param)
            scan_window_list = list(scan_event)
        return scan_parameters, scan_window_list

    def save_chromatogram(self,
                          chromatogram_dict,
                          chromatogram_type,
                          params=None,
                          **kwargs):
        time_array, intensity_array = zip(*chromatogram_dict.items())
        self.writer.write_chromatogram(time_array,
                                       intensity_array,
                                       id=kwargs.get('id'),
                                       chromatogram_type=chromatogram_type,
                                       compression=self.compression,
                                       params=params)

    def _make_default_chromatograms(self):
        d = dict(chromatogram=self.total_ion_chromatogram_tracker,
                 chromatogram_type='total ion current chromatogram',
                 id='TIC')
        if len(self.total_ion_chromatogram_tracker) > 0:
            self.chromatogram_queue.append(d)

        d = dict(chromatogram=self.base_peak_chromatogram_tracker,
                 chromatogram_type="basepeak chromatogram",
                 id='BPC')
        if len(self.base_peak_chromatogram_tracker) > 0:
            self.chromatogram_queue.append(d)

    def write_chromatograms(self):
        self._chromatogram_list_tag = self.writer.chromatogram_list(
            count=len(self.chromatogram_queue))
        with self._chromatogram_list_tag:
            for chromatogram in self.chromatogram_queue:
                self.save_chromatogram(chromatogram.pop("chromatogram"),
                                       **chromatogram)

    def complete(self):
        """Finish writing to the output document.

        This closes the open list tags, empties the chromatogram accumulator,
        and closes the :obj:`<mzML>` tag, and attempts to flush the output file.
        """
        if self._spectrum_list_tag is not None:
            self._spectrum_list_tag.__exit__(None, None, None)
        if self._run_tag is not None:
            self._make_default_chromatograms()
            self.write_chromatograms()
        if self._run_tag is not None:
            self._run_tag.__exit__(None, None, None)
        self.writer.__exit__(None, None, None)
        if self.indexer is not None:
            try:
                name = self.handle.name
            except AttributeError:
                name = "_detatched_mzml_index"
            try:
                with open(ExtendedScanIndex.index_file_name(name),
                          'w') as ixfile:
                    self.indexer.serialize(ixfile)
            except IOError as e:
                warnings.warn(
                    "Could not write extended index file due to error %r" %
                    (e, ))

        try:
            self.writer.outfile.flush()
        except (IOError, AttributeError, ValueError):
            pass

    def format(self):
        """This method is no longer needed.
        """
        pass

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.close()

    def close(self):
        self.complete()
        if hasattr(self.handle, "closed"):
            if not self.handle.closed:
                try:
                    self.handle.close()
                except AttributeError:
                    pass
Example #17
0
 def read_index_file(self):
     with open(self._index_file_name) as handle:
         self.extended_index = ExtendedScanIndex.deserialize(handle)
Example #18
0
class MzMLSerializer(ScanSerializerBase):
    """Write :mod:`ms_deisotope` data structures to a file in mzML format.

    Attributes
    ----------
    base_peak_chromatogram_tracker : :class:`OrderedDict`
        Accumulated mapping of scan time to base peak intensity. This is
        used to write the *base peak chromatogram*.
    chromatogram_queue : :class:`list`
        Accumulate chromatogram data structures which will be written out
        after all spectra have been written to file.
    compression : :class:`str`
        The compression type to use for binary data arrays. Should be one of
        :obj:`"zlib"`, :obj:`"none"`, or :obj:`None`
    data_encoding : :class:`dict` or :class:`int` or :obj:`numpy.dtype` or :class:`str`
        The encoding specification to specify the binary encoding of numeric data arrays
        that is passed to :meth:`~.MzMLWriter.write_spectrum` and related methods.
    data_processing_list : :class:`list`
        List of packaged :class:`~.DataProcessingInformation` to write out
    deconvoluted : bool
        Indicates whether the translation should include extra deconvolution information
    file_contents_list : :class:`list`
        List of terms to include in the :obj:`<fileContents>` tag
    handle : file-like
        The file-like object being written to
    indexer : :class:`~.ExtendedScanIndex`
        The external index builder
    instrument_configuration_list : :class:`list`
        List of packaged :class:`~.InstrumentInformation` to write out
    n_spectra : int
        The number of spectra to provide a size for in the :obj:`<spectrumList>`
    processing_parameters : :class:`list`
        List of additional terms to include in a newly created :class:`~.DataProcessingInformation`
    sample_list : :class:`list`
        List of :class:`~.SampleRun` objects to write out
    sample_name : :class:`str`
        Default sample name
    sample_run : :class:`~.SampleRun`
        Description
    software_list : :class:`list`
        List of packaged :class:`~.Software` objects to write out
    source_file_list : :class:`list`
        List of packaged :class:`~.SourceFile` objects to write out
    total_ion_chromatogram_tracker : :class:`OrderedDict`
        Accumulated mapping of scan time to total intensity. This is
        used to write the *total ion chromatogram*.
    writer : :class:`~psims.mzml.writer.MzMLWriter`
        The lower level writer implementation
    """

    def __init__(self, handle, n_spectra=int(2e5), compression=None,
                 deconvoluted=True, sample_name=None, build_extra_index=True,
                 data_encoding=None):
        if data_encoding is None:
            data_encoding = {
                writer.MZ_ARRAY: np.float64,
                writer.INTENSITY_ARRAY: np.float32,
                writer.CHARGE_ARRAY: np.int32,
            }
        if writer is None:
            raise ImportError(
                "Cannot write mzML without psims. Please install psims to use this feature.")
        if compression is None:
            compression = writer.COMPRESSION_ZLIB
        super(MzMLSerializer, self).__init__()
        self.handle = handle
        self.writer = writer.MzMLWriter(handle)
        self.n_spectra = n_spectra
        self.compression = compression
        self.data_encoding = data_encoding
        self._has_started_writing_spectra = False

        self.writer.__enter__()
        self._run_tag = None
        self._spectrum_list_tag = None
        self._chromatogram_list_tag = None

        self.writer.controlled_vocabularies()
        self.deconvoluted = deconvoluted

        self._initialize_description_lists()
        self._init_sample(sample_name)

        self.total_ion_chromatogram_tracker = OrderedDict()
        self.base_peak_chromatogram_tracker = OrderedDict()
        self.chromatogram_queue = []

        self.indexer = None
        if build_extra_index:
            self.indexer = ExtendedScanIndex()

    def _init_sample(self, sample_name, **kwargs):
        self.sample_name = sample_name
        self.sample_run = SampleRun(name=sample_name, uuid=str(uuid4()))
        self.add_sample({
            "name": self.sample_run.name,
            "id": "sample_1",
            "params": [
                {"name": "SampleRun-UUID", "value": self.sample_run.uuid},
            ]})

    def _initialize_description_lists(self):
        self.file_contents_list = []
        self.software_list = []
        self.source_file_list = []
        self.data_processing_list = []
        self.instrument_configuration_list = []
        self.sample_list = []

        self.processing_parameters = []

    def add_instrument_configuration(self, configuration):
        """Add an :class:`~.InstrumentInformation` object to
        the output document.

        Parameters
        ----------
        configuration: :class:`~.InstrumentInformation`
            The instrument configuration to add
        """
        component_list = []
        for group in configuration.groups:
            tag = None
            if group.type == 'source':
                tag = self.writer.Source
            elif group.type == 'analyzer':
                tag = self.writer.Analyzer
            elif group.type == 'detector':
                tag = self.writer.Detector
            else:
                continue
            component_list.append(
                tag(order=group.order, params=[g.name for g in group]))
        config_element = self.writer.InstrumentConfiguration(
            configuration.id, component_list)
        self.instrument_configuration_list.append(config_element)

    def add_software(self, software_description):
        """Add a :class:`~.Software` object to the output document.

        Parameters
        ----------
        software_description : :class:`~.Software`
            The software description to add
        """
        self.software_list.append(software_description)

    def add_file_information(self, file_information):
        '''Add the information of a :class:`~.FileInformation` to the
        output document.

        Parameters
        ----------
        file_information: :class:`~.FileInformation`
            The information to add.
        '''
        for key, value in file_information.contents.items():
            if value is None:
                value = ''
            self.add_file_contents({str(key): value})
        for source_file in file_information.source_files:
            self.add_source_file(source_file)

    def add_file_contents(self, file_contents):
        """Add a key to the resulting :obj:`<fileDescription>`
        of the output document.

        Parameters
        ----------
        file_contents: :class:`str` or :class:`Mapping`
            The parameter to add
        """
        self.file_contents_list.append(file_contents)

    def remove_file_contents(self, name):
        """Remove a key to the resulting :obj:`<fileDescription>`
        of the output document.

        Parameters
        ----------
        file_contents: :class:`str` or :class:`Mapping`
            The parameter to remove

        Raises
        ------
        KeyError:
            When the content is not found.
        """
        i = None
        for i, content in enumerate(self.file_contents_list):
            if isinstance(content, Mapping):
                if 'name' in content:
                    content = content['name']
                elif len(content) == 1:
                    content = list(content.keys())[0]
                else:
                    continue
            if content == name:
                break
        else:
            raise KeyError(name)
        if i is None:
            raise KeyError(name)
        self.file_contents_list.pop(i)

    def add_source_file(self, source_file):
        """Add the :class:`~.SourceFile` to the output document

        Parameters
        ----------
        source_file : :class:`~.SourceFile`
            The source fil to add
        """
        unwrapped = {
            "name": source_file.name,
            "location": source_file.location,
            "id": source_file.id,
            "params": []
        }
        unwrapped['params'].extend([(getattr(key, 'accession', str(key)), value)
                                    for key, value in source_file.parameters.items()])
        if source_file.id_format:
            unwrapped['params'].append(str(source_file.id_format))
        if source_file.file_format:
            unwrapped['params'].append(str(source_file.file_format))
        self.source_file_list.append(unwrapped)

    def add_data_processing(self, data_processing_description):
        """Add a new :class:`~.DataProcessingInformation` or :class:`~ProcessingMethod`
        to the output document as a new :obj:`<dataProcessing>` entry describing one or
        more :obj:`<processingMethod>`s for a single referenced :class:`~.Software`
        instance.

        Parameters
        ----------
        data_processing_description : :class:`~.DataProcessingInformation` or :class:`~.ProcessingMethod`
            Data manipulation sequence to add to the document
        """
        if isinstance(data_processing_description, data_transformation.DataProcessingInformation):
            methods = []
            for method in data_processing_description:
                content = []
                for op, val in method:
                    content.append({'name': op.name, 'value': val})
                method_descr = {
                    'software_reference': method.software_id,
                    'order': method.order,
                    'params': content
                }
                methods.append(method_descr)
            payload = {
                'id': data_processing_description.id,
                'processing_methods': methods
            }
            self.data_processing_list.append(payload)
        elif isinstance(data_processing_description, data_transformation.ProcessingMethod):
            content = []
            for op, val in data_processing_description:
                content.append({"name": op.name, 'value': val})
            payload = {
                'id': "data_processing_%d" % len(self.data_processing_list),
                'processing_methods': [{
                    'software_reference': data_processing_description.software_id,
                    'order': data_processing_description.order,
                    'params': content
                }]
            }
            self.data_processing_list.append(payload)
        else:
            self.data_processing_list.append(data_processing_description)

    def add_processing_parameter(self, name, value=None):
        """Add a new processing method to the writer's own
        :obj:`<dataProcessing>` element.

        Parameters
        ----------
        name : str
            The processing technique's name
        value : obj
            The processing technique's value, if any
        """
        self.processing_parameters.append({"name": name, "value": value})

    def add_sample(self, sample):
        self.sample_list.append(sample)

    def copy_metadata_from(self, reader):
        """Copies the file-level metadata from an instance of :class:`~.ScanFileMetadataBase`
        into the metadata of the file to be written

        Parameters
        ----------
        reader : :class:`~.ScanFileMetadataBase`
            The source to copy metadata from
        """
        try:
            description = reader.file_description()
            self.add_file_information(description)
        except AttributeError:
            pass

        try:
            instrument_configs = reader.instrument_configuration()
        except AttributeError:
            instrument_configs = []
        for config in instrument_configs:
            self.add_instrument_configuration(config)

        try:
            software_list = reader.software_list()
        except AttributeError:
            software_list = []
        for software in software_list:
            self.add_software(software)

        try:
            data_processing_list = reader.data_processing()
        except AttributeError:
            data_processing_list = []
        for data_processing_ in data_processing_list:
            self.add_data_processing(data_processing_)

    def _create_file_description(self):
        self.writer.file_description(
            self.file_contents_list, self.source_file_list)

    def _create_software_list(self):
        software_list = []
        ms_deisotope_entries = []
        for sw in self.software_list:
            d = {
                'id': sw.id,
                'version': sw.version
            }
            if sw.is_name(sw.name):
                d[sw.name] = ''
            else:
                d['MS:1000799'] = sw.name
            d['params'] = list(sw.options.items())
            if 'ms_deisotope' in str(sw.id):
                ms_deisotope_entries.append(str(sw.id))
            software_list.append(d)

        for i in range(1, 100):
            query = 'ms_deisotope_%d' % i
            if query in ms_deisotope_entries:
                continue
            else:
                new_entry_id = query
                break
        else:
            new_entry_id = 'ms_deisotope_%s' % str(uuid4())

        software_list.append({
            "id": new_entry_id,
            'version': lib_version,
            'ms_deisotope': "",
        })
        self.writer.software_list(software_list)

    def _create_sample_list(self):
        self.writer.sample_list(self.sample_list)

    def build_processing_method(self, order=1, picked_peaks=True, smoothing=True,
                                baseline_reduction=True, additional_parameters=tuple(),
                                software_id=None, data_processing_id=None):
        if software_id is None:
            software_id = "ms_deisotope_1"
        if data_processing_id is None:
            data_processing_id = 'ms_deisotope_processing_%d' % len(
                self.data_processing_list)

        method = data_transformation.ProcessingMethod(software_id=software_id)
        if self.deconvoluted:
            method.add("deisotoping")
            method.add("charge deconvolution")
            method.add("precursor recalculation")

        if picked_peaks:
            method.add("peak picking")
        if smoothing:
            method.add("smoothing")
        if baseline_reduction:
            method.add("baseline reduction")

        method.add("Conversion to mzML")
        method.update(additional_parameters)
        method.update(self.processing_parameters)
        method.order = order
        data_processing_info = data_transformation.DataProcessingInformation(
            [method], data_processing_id)
        # self.add_data_processing(data_processing_info)
        return data_processing_info

    def _create_data_processing_list(self):
        self.writer.data_processing_list(self.data_processing_list)

    def _create_instrument_configuration(self):
        self.writer.instrument_configuration_list(
            self.instrument_configuration_list)

    def _add_spectrum_list(self):
        self._create_file_description()
        self._create_sample_list()
        self._create_software_list()
        self._create_instrument_configuration()
        self._create_data_processing_list()

        self._run_tag = self.writer.run(
            id=self.sample_name or 1,
            sample='sample_1')
        self._run_tag.__enter__()
        self._spectrum_list_tag = self.writer.spectrum_list(
            count=self.n_spectra)
        self._spectrum_list_tag.__enter__()

    def has_started_writing_spectra(self):
        return self._has_started_writing_spectra

    def _pack_activation(self, activation_information):
        """Pack :class:`~.ActivationInformation` into a :class:`dict` structure
        which that :class:`~psims.mzml.writer.MzMLWriter` expects.

        Parameters
        ----------
        activation_information: :class:`~.ActivationInformation`

        Returns
        -------
        :class:`dict`
        """
        params = []
        params.append({
            "name": str(activation_information.method),
        })
        if activation_information.is_multiple_dissociation():
            for method in activation_information.methods[1:]:
                params.append({"name": str(method)})
        # NOTE: Only correct for CID/HCD spectra with absolute collision energies, but that is all I have
        # to test with.
        params.append({
            "name": "collision energy",
            "value": activation_information.energy,
            "unitName": "electron volt"
        })
        if activation_information.is_multiple_dissociation():
            energies = activation_information.energies[1:]
            supplemental_energy = None
            if activation_information.has_supplemental_dissociation():
                supplemental_energy = energies[-1]
                energies = energies[:-1]
            for energy in energies:
                params.append({
                    "name": "collision energy",
                    "value": energy,
                    "unitName": "electron volt"
                })
            if supplemental_energy is not None:
                params.append({
                    "name": 'supplemental collision energy',
                    "value": supplemental_energy,
                    "unitName": "electron volt"
                })

        for key, val in activation_information.data.items():
            arg = {
                "name": key,
                "value": val
            }
            try:
                arg['unitName'] = val.unit_info
            except AttributeError:
                pass
            params.append(arg)
        return params

    def _pack_precursor_information(self, precursor_information, activation_information=None,
                                    isolation_window=None):
        """Repackage the :class:`~.PrecursorInformation`, :class:`~.ActivationInformation`,
        and :class:~.IsolationWindow` into the nested :class:`dict` structure that
        :class:`~psims.mzml.writer.MzMLWriter` expects.

        Parameters
        ----------
        precursor_information : :class:`~.PrecursorInformation`
        activation_information : :class:`~.ActivationInformation`, optional
        isolation_window : :class:`~.IsolationWindow`, optional

        Returns
        -------
        :class:`dict`
        """
        # If the scan bunch has been fully deconvoluted and it's PrecursorInformation
        # filled in, its extracted fields will be populated and should be used, otherwise
        # use the default read values.
        extracted_neutral_mass = precursor_information.extracted_neutral_mass
        if (extracted_neutral_mass != 0):
            package = {
                "mz": precursor_information.extracted_mz,
                "intensity": precursor_information.extracted_intensity,
                "charge": precursor_information.extracted_charge,
                "scan_id": precursor_information.precursor_scan_id,
                "params": [
                    {"ms_deisotope:defaulted": precursor_information.defaulted},
                    {"ms_deisotope:orphan": precursor_information.orphan}
                ]
            }
            if precursor_information.coisolation:
                for p in precursor_information.coisolation:
                    package['params'].append({
                        "name": "ms_deisotope:coisolation",
                        "value": "%f %f %d" % (p.neutral_mass, p.intensity, p.charge)
                    })
        else:
            package = {
                "mz": precursor_information.mz,
                "intensity": precursor_information.intensity,
                "charge": precursor_information.charge,
                "scan_id": precursor_information.precursor_scan_id
            }
        if package['charge'] == ChargeNotProvided:
            package["charge"] = None
        if activation_information is not None:
            package['activation'] = self._pack_activation(
                activation_information)
        if isolation_window is not None:
            package['isolation_window_args'] = {
                "lower": isolation_window.lower,
                "target": isolation_window.target,
                "upper": isolation_window.upper
            }
        return package

    def _prepare_extra_arrays(self, scan):
        extra_arrays = []
        if self.deconvoluted:
            score_array = [
                peak.score for peak in scan.deconvoluted_peak_set
            ]
            extra_arrays.append(("deconvolution score array", score_array))
            envelope_array = envelopes_to_array(
                [peak.envelope for peak in scan.deconvoluted_peak_set])
            extra_arrays.append(("isotopic envelopes array", envelope_array))
        return extra_arrays

    def _get_annotations(self, scan):
        skip = {'filter string', 'base peak intensity', 'base peak m/z', 'lowest observed m/z',
                'highest observed m/z', 'total ion current', }
        annotations = []
        for key, value in scan.annotations.items():
            if key in skip:
                continue
            annotations.append({
                key: value
            })
        return annotations

    def save_scan(self, scan, **kwargs):
        """Write a :class:`~.Scan` to the output document
        as a collection of related :obj:`<spectrum>` tags.

        .. note::

            If no spectra have been written to the output document
            yet, this method will call :meth:`_add_spectrum_list` and
            writes all of the metadata lists out. After this point,
            no new document-level metadata can be added.

        Parameters
        ----------
        scan: :class:`~.Scan`
            The scan to write.
        deconvoluted: :class:`bool`
            Whether the scan to write out should include deconvolution information
        """
        if not self._has_started_writing_spectra:
            self._add_spectrum_list()
            self._has_started_writing_spectra = True

        deconvoluted = kwargs.get("deconvoluted", self.deconvoluted)
        if deconvoluted:
            centroided = True
            precursor_peaks = scan.deconvoluted_peak_set
        elif scan.peak_set:
            centroided = True
            precursor_peaks = scan.peak_set
        else:
            centroided = False
            precursor_peaks = scan.arrays
        polarity = scan.polarity
        if deconvoluted:
            charge_array = [p.charge for p in precursor_peaks]
        else:
            charge_array = None

        if centroided:
            descriptors = SpectrumDescription.from_peak_set(precursor_peaks)
            mz_array = [p.mz for p in precursor_peaks]
            intensity_array = [p.intensity for p in precursor_peaks]
        else:
            descriptors = SpectrumDescription.from_arrays(precursor_peaks)
            mz_array = precursor_peaks.mz
            intensity_array = precursor_peaks.intensity

        instrument_config = scan.instrument_configuration
        if instrument_config is None:
            instrument_config_id = None
        else:
            instrument_config_id = instrument_config.id

        scan_parameters, scan_window_list = self.extract_scan_event_parameters(
            scan)

        if scan.precursor_information:
            precursor_information = self._pack_precursor_information(
                scan.precursor_information,
                scan.activation,
                scan.isolation_window)
        else:
            precursor_information = None

        spectrum_params = [
            {"name": "ms level", "value": scan.ms_level},
            {"name": "MS1 spectrum"} if scan.ms_level == 1 else {"name": "MSn spectrum"},
        ] + list(descriptors)

        spectrum_params.extend(self._get_annotations(scan))

        self.writer.write_spectrum(
            mz_array, intensity_array,
            charge_array,
            id=scan.id, params=spectrum_params,
            centroided=centroided,
            polarity=polarity,
            scan_start_time=scan.scan_time,
            compression=self.compression,
            other_arrays=self._prepare_extra_arrays(scan),
            instrument_configuration_id=instrument_config_id,
            precursor_information=precursor_information,
            scan_params=scan_parameters,
            scan_window_list=scan_window_list,
            encoding=self.data_encoding)

        self.total_ion_chromatogram_tracker[
            scan.scan_time] = (descriptors["total ion current"])
        self.base_peak_chromatogram_tracker[
            scan.scan_time] = (descriptors["base peak intensity"])

    def save_scan_bunch(self, bunch, **kwargs):
        """Write a :class:`~.ScanBunch` to the output document
        as a collection of related :obj:`<spectrum>` tags.

        .. note::

            If no spectra have been written to the output document
            yet, this method will call :meth:`_add_spectrum_list` and
            writes all of the metadata lists out. After this point,
            no new document-level metadata can be added.

        Parameters
        ----------
        bunch : :class:`~.ScanBunch`
            The scan set to write.
        """
        if bunch.precursor is not None:
            self.save_scan(bunch.precursor)

        for prod in bunch.products:
            self.save_scan(prod)

        if self.indexer is not None:
            self.indexer.add_scan_bunch(bunch)

    def extract_scan_event_parameters(self, scan):
        """Package :class:`~.ScanAcquisitionInformation` into a pair of
        :class:`list`s that :class:`~psims.mzml.writer.MzMLWriter` expects.

        Parameters
        ----------
        scan : :class:`~.Scan`

        Returns
        -------
        scan_parameters: :class:`list`
            Parameters qualifying the scan event (:class:`dict`)
        scan_window_list: :class:`list`
            Packed pairs of scan windows (:class:`list`)
        """
        scan_parameters = []
        scan_window_list = []
        acquisition_info = scan.acquisition_information
        filter_string = scan.annotations.get("filter_string")
        if filter_string is not None:
            scan_parameters.append({"name": "filter string", "value": filter_string})
        if acquisition_info is not None and len(acquisition_info) > 0:
            scan_event = acquisition_info[0]
            if scan_event.has_ion_mobility():
                scan_parameters.append({
                    "name": "ion mobility drift time",
                    "value": scan_event.drift_time,
                    "unit_name": "millisecond",
                    'unit_cv_ref': "UO",
                    "unit_accession": 'UO:0000028'
                })
            if scan_event.injection_time is not None:
                scan_parameters.append({
                    "accession": 'MS:1000927', "value": scan_event.injection_time,
                    "unit_name": getattr(scan_event.injection_time, 'unit_info', None),
                })
            traits = scan_event.traits.items()
            for name, value in traits:
                param = {"name": name, "value": value, 'unit_name': getattr(value, 'unit_info', None)}
                scan_parameters.append(param)
            scan_window_list = list(scan_event)
        return scan_parameters, scan_window_list

    def save_chromatogram(self, chromatogram_dict, chromatogram_type, params=None, **kwargs):
        time_array, intensity_array = zip(*chromatogram_dict.items())
        self.writer.write_chromatogram(
            time_array, intensity_array, id=kwargs.get('id'),
            chromatogram_type=chromatogram_type, compression=self.compression,
            params=params)

    def _make_default_chromatograms(self):
        d = dict(
            chromatogram=self.total_ion_chromatogram_tracker,
            chromatogram_type='total ion current chromatogram',
            id='TIC')
        if len(self.total_ion_chromatogram_tracker) > 0:
            self.chromatogram_queue.append(d)

        d = dict(
            chromatogram=self.base_peak_chromatogram_tracker,
            chromatogram_type="basepeak chromatogram",
            id='BPC')
        if len(self.base_peak_chromatogram_tracker) > 0:
            self.chromatogram_queue.append(d)

    def write_chromatograms(self):
        self._chromatogram_list_tag = self.writer.chromatogram_list(
            count=len(self.chromatogram_queue))
        with self._chromatogram_list_tag:
            for chromatogram in self.chromatogram_queue:
                self.save_chromatogram(
                    chromatogram.pop("chromatogram"),
                    **chromatogram)

    def complete(self):
        """Finish writing to the output document.

        This closes the open list tags, empties the chromatogram accumulator,
        and closes the :obj:`<mzML>` tag, and attempts to flush the output file.
        """
        if self._spectrum_list_tag is not None:
            self._spectrum_list_tag.__exit__(None, None, None)
        if self._run_tag is not None:
            self._make_default_chromatograms()
            self.write_chromatograms()
        if self._run_tag is not None:
            self._run_tag.__exit__(None, None, None)
        self.writer.__exit__(None, None, None)
        if self.indexer is not None:
            try:
                name = self.handle.name
            except AttributeError:
                name = "_detatched_mzml_index"
            try:
                with open(ExtendedScanIndex.index_file_name(name), 'w') as ixfile:
                    self.indexer.serialize(ixfile)
            except IOError as e:
                warnings.warn(
                    "Could not write extended index file due to error %r" % (e,))

        try:
            self.writer.outfile.flush()
        except (IOError, AttributeError, ValueError):
            pass

    def format(self):
        """This method is no longer needed.
        """
        pass

    def close(self):
        self.complete()
        if hasattr(self.handle, "closed"):
            if not self.handle.closed:
                try:
                    self.handle.close()
                except AttributeError:
                    pass
        else:
            try:
                self.handle.close()
            except (AttributeError, ValueError, TypeError, OSError):
                pass
Example #19
0
 def _index_file_name(self):
     return ExtendedScanIndex.index_file_name(self.source_file)
Example #20
0
 def read_index_file(self, index_path=None):
     if index_path is None:
         index_path = self._index_file_name
     with open(index_path) as handle:
         self.extended_index = ExtendedScanIndex.deserialize(handle)
Example #21
0
class MzMLScanSerializer(ScanSerializerBase):

    def __init__(self, handle, n_spectra=2e4, compression=writer.COMPRESSION_ZLIB,
                 deconvoluted=True, sample_name=None, build_extra_index=True):
        self.handle = handle
        self.writer = writer.MzMLWriter(handle)
        self.n_spectra = n_spectra
        self.compression = compression
        self._has_started_writing_spectra = False

        self.writer.__enter__()
        self._run_tag = None
        self._spectrum_list_tag = None
        self._chromatogram_list_tag = None

        self.writer.controlled_vocabularies()
        self.deconvoluted = deconvoluted
        self.sample_name = sample_name

        self.file_contents_list = []
        self.software_list = []
        self.source_file_list = []
        self.data_processing_list = []
        self.instrument_configuration_list = []
        self.sample_list = []

        self.processing_parameters = []

        self.total_ion_chromatogram_tracker = OrderedDict()
        self.base_peak_chromatogram_tracker = OrderedDict()

        self.sample_run = SampleRun(name=sample_name, uuid=str(uuid4()))

        self.add_sample({
            "name": sample_name,
            "id": "sample_1",
            "params": [
                {"name": "SampleRun-UUID", "value": self.sample_run.uuid},
            ]})

        self.chromatogram_queue = []

        self.indexer = None
        if build_extra_index:
            self.indexer = ExtendedScanIndex()

    def add_software(self, software_description):
        self.software_list.append(software_description)

    def add_file_contents(self, file_contents):
        self.file_contents_list.append(file_contents)

    def add_source_file(self, source_file_description):
        self.source_file_list.append(source_file_description)

    def add_data_processing(self, data_processing_description):
        self.data_processing_list.append(data_processing_description)

    def add_processing_parameter(self, name, value):
        self.processing_parameters.append({"name": name, "value": value})

    def add_instrument_configuration(self, instrument_description):
        self.instrument_configuration_list.append(instrument_description)

    def add_sample(self, sample):
        self.sample_list.append(sample)

    def _create_file_description(self):
        self.writer.file_description(
            self.file_contents_list, self.source_file_list)

    def _create_software_list(self):
        self.writer.software_list([{
            "id": "ms_deisotope_1",
            "name": "ms_deisotope"
        }])

    def _create_sample_list(self):
        self.writer.sample_list(self.sample_list)

    def _build_processing_method(self, order=1, picked_peaks=True, smoothing=True,
                                 baseline_reduction=True, additional_parameters=tuple()):
        if self.deconvoluted:
            params = [
                "deisotoping",
                "charge deconvolution",
                "precursor recalculation",
            ]
        else:
            params = []

        if picked_peaks:
            params.append("peak picking")
        if smoothing:
            params.append("smoothing")
        if baseline_reduction:
            params.append("baseline reduction")
        params.append("Conversion to mzML")

        params.extend(additional_parameters)

        mapping = {
            "software_reference": "ms_deisotope_1",
            "order": order,
            "params": params
        }
        return mapping

    def _create_data_processing_list(self):
        n = len(self.data_processing_list) - 1
        entry = {
            "id": "ms_deisotope_processing_1",
            "processing_methods": [self._build_processing_method(
                n, additional_parameters=self.processing_parameters)]
        }
        self.add_data_processing(entry)
        self.writer.data_processing_list(self.data_processing_list)

    def _create_instrument_configuration(self):
        self.writer.instrument_configuration_list(
            self.instrument_configuration_list)

    def _add_spectrum_list(self):
        self._create_file_description()
        self._create_software_list()
        self._create_instrument_configuration()
        self._create_data_processing_list()
        self._create_sample_list()

        self._run_tag = self.writer.run(
            id=self.sample_name,
            sample='sample_1')
        self._run_tag.__enter__()
        self._spectrum_list_tag = self.writer.spectrum_list(
            count=self.n_spectra)
        self._spectrum_list_tag.__enter__()

    def _pack_activation(self, activation_information):
        params = []
        params.append({
            "name": str(activation_information.method),
        })
        # NOTE: Only correct for CID/HCD spectra with absolute collision energies, but that is all I have
        # to test with.
        params.append({
            "name": "collision energy",
            "value": activation_information.energy,
            "unitName": "electron volts"
        })
        for key, val in activation_information.data.items():
            arg = {
                "name": key,
                "value": val
            }
            try:
                arg['unitName'] = val.unit_info
            except AttributeError:
                pass
            params.append(arg)
        return params

    def _pack_precursor_information(self, precursor_information, activation_information=None):
        # If the scan bunch has been fully deconvoluted and it's PrecursorInformation
        # filled in, its extracted fields will be populated and should be used, otherwise
        # use the default read values.
        if precursor_information.extracted_neutral_mass != 0:
            package = {
                "mz": precursor_information.extracted_mz,
                "intensity": precursor_information.extracted_intensity,
                "charge": precursor_information.extracted_charge,
                "scan_id": precursor_information.precursor_scan_id
            }
        else:
            package = {
                "mz": precursor_information.mz,
                "intensity": precursor_information.intensity,
                "charge": precursor_information.charge,
                "scan_id": precursor_information.precursor_scan_id
            }
        if activation_information is not None:
            package['activation'] = self._pack_activation(activation_information)
        return package

    def _prepare_extra_arrays(self, scan):
        extra_arrays = []
        if self.deconvoluted:
            score_array = [
                peak.score for peak in scan.deconvoluted_peak_set
            ]
            extra_arrays.append(("deconvolution score array", score_array))
            envelope_array = envelopes_to_array([peak.envelope for peak in scan.deconvoluted_peak_set])
            extra_arrays.append(("isotopic envelopes array", envelope_array))
        return extra_arrays

    def save_scan_bunch(self, bunch, **kwargs):
        if not self._has_started_writing_spectra:
            self._add_spectrum_list()
            self._has_started_writing_spectra = True

        if self.deconvoluted:
            precursor_peaks = bunch.precursor.deconvoluted_peak_set
        else:
            precursor_peaks = bunch.precursor.peak_set

        if len(precursor_peaks) == 0:
            return

        polarity = bunch.precursor.polarity
        if self.deconvoluted:
            charge_array = [p.charge for p in precursor_peaks]
        else:
            charge_array = None

        descriptors = describe_spectrum(precursor_peaks)

        self.writer.write_spectrum(
            [p.mz for p in precursor_peaks], [p.intensity for p in precursor_peaks], charge_array,
            id=bunch.precursor.id, params=[
                {"name": "ms level", "value": bunch.precursor.ms_level},
                {"name": "MS1 spectrum"}] + descriptors,
            polarity=polarity,
            scan_start_time=bunch.precursor.scan_time,
            compression=self.compression,
            other_arrays=self._prepare_extra_arrays(bunch.precursor))

        self.total_ion_chromatogram_tracker[
            bunch.precursor.scan_time] = _total_intensity_from_descriptors(descriptors)
        self.base_peak_chromatogram_tracker[
            bunch.precursor.scan_time] = _base_peak_from_descriptors(descriptors)

        for prod in bunch.products:
            if self.deconvoluted:
                product_peaks = prod.deconvoluted_peak_set
            else:
                product_peaks = prod.peak_set
            if len(product_peaks) == 0:
                continue
            descriptors = describe_spectrum(product_peaks)

            self.total_ion_chromatogram_tracker[
                prod.scan_time] = _total_intensity_from_descriptors(descriptors)
            self.base_peak_chromatogram_tracker[
                prod.scan_time] = _base_peak_from_descriptors(descriptors)

            if self.deconvoluted:
                charge_array = [p.charge for p in product_peaks]
            else:
                charge_array = None

            self.writer.write_spectrum(
                [p.mz for p in product_peaks], [p.intensity for p in product_peaks], charge_array,
                id=prod.id, params=[
                    {"name": "ms level", "value": prod.ms_level},
                    {"name": "MSn spectrum"}] + descriptors,
                polarity=prod.polarity,
                scan_start_time=prod.scan_time, precursor_information=self._pack_precursor_information(
                    prod.precursor_information, prod.activation),
                compression=self.compression,
                other_arrays=self._prepare_extra_arrays(prod))

        if self.indexer is not None:
            self.indexer.add_scan_bunch(bunch)

    def save_chromatogram(self, chromatogram_dict, chromatogram_type, params=None, **kwargs):
        time_array, intensity_array = zip(*chromatogram_dict.items())
        self.writer.write_chromatogram(
            time_array, intensity_array, id=kwargs.get('id'),
            chromatogram_type=chromatogram_type, compression=self.compression,
            params=params)

    def _make_default_chromatograms(self):
        d = dict(
            chromatogram=self.total_ion_chromatogram_tracker,
            chromatogram_type='total ion current chromatogram',
            id='TIC')
        if len(self.total_ion_chromatogram_tracker) > 0:
            self.chromatogram_queue.append(d)

        d = dict(
            chromatogram=self.base_peak_chromatogram_tracker,
            chromatogram_type="basepeak chromatogram",
            id='BPC')
        if len(self.base_peak_chromatogram_tracker) > 0:
            self.chromatogram_queue.append(d)

    def write_chromatograms(self):
        self._chromatogram_list_tag = self.writer.chromatogram_list(
            count=len(self.chromatogram_queue))
        with self._chromatogram_list_tag:
            for chromatogram in self.chromatogram_queue:
                self.save_chromatogram(
                    chromatogram.pop("chromatogram"),
                    **chromatogram)

    def complete(self):
        self._spectrum_list_tag.__exit__(None, None, None)
        self._make_default_chromatograms()
        self.write_chromatograms()
        self._run_tag.__exit__(None, None, None)
        self.writer.__exit__(None, None, None)
        if self.indexer is not None:
            try:
                name = self.handle.name
            except AttributeError:
                name = "_detatched_mzml_index"
            try:
                with open(ExtendedScanIndex.index_file_name(name), 'w') as ixfile:
                    self.indexer.serialize(ixfile)
            except IOError:
                pass

    def format(self):
        try:
            self.writer.format()
        except OSError as e:
            if on_windows and e.errno == 32:
                pass
Example #22
0
class MzMLScanSerializer(ScanSerializerBase):
    def __init__(self,
                 handle,
                 n_spectra=2e4,
                 compression=writer.COMPRESSION_ZLIB,
                 deconvoluted=True,
                 sample_name=None,
                 build_extra_index=True):
        self.handle = handle
        self.writer = writer.MzMLWriter(handle)
        self.n_spectra = n_spectra
        self.compression = compression
        self._has_started_writing_spectra = False

        self.writer.__enter__()
        self._run_tag = None
        self._spectrum_list_tag = None
        self._chromatogram_list_tag = None

        self.writer.controlled_vocabularies()
        self.deconvoluted = deconvoluted
        self.sample_name = sample_name

        self.file_contents_list = []
        self.software_list = []
        self.source_file_list = []
        self.data_processing_list = []
        self.instrument_configuration_list = []
        self.sample_list = []

        self.processing_parameters = []

        self.total_ion_chromatogram_tracker = OrderedDict()
        self.base_peak_chromatogram_tracker = OrderedDict()

        self.sample_run = SampleRun(name=sample_name, uuid=str(uuid4()))

        self.add_sample({
            "name":
            sample_name,
            "id":
            "sample_1",
            "params": [
                {
                    "name": "SampleRun-UUID",
                    "value": self.sample_run.uuid
                },
            ]
        })

        self.chromatogram_queue = []

        self.indexer = None
        if build_extra_index:
            self.indexer = ExtendedScanIndex()

    def add_software(self, software_description):
        self.software_list.append(software_description)

    def add_file_contents(self, file_contents):
        self.file_contents_list.append(file_contents)

    def add_source_file(self, source_file_description):
        self.source_file_list.append(source_file_description)

    def add_data_processing(self, data_processing_description):
        self.data_processing_list.append(data_processing_description)

    def add_processing_parameter(self, name, value):
        self.processing_parameters.append({"name": name, "value": value})

    def add_instrument_configuration(self, instrument_description):
        self.instrument_configuration_list.append(instrument_description)

    def add_sample(self, sample):
        self.sample_list.append(sample)

    def _create_file_description(self):
        self.writer.file_description(self.file_contents_list,
                                     self.source_file_list)

    def _create_software_list(self):
        self.writer.software_list([{
            "id": "ms_deisotope_1",
            "name": "ms_deisotope"
        }])

    def _create_sample_list(self):
        self.writer.sample_list(self.sample_list)

    def _build_processing_method(self,
                                 order=1,
                                 picked_peaks=True,
                                 smoothing=True,
                                 baseline_reduction=True,
                                 additional_parameters=tuple()):
        if self.deconvoluted:
            params = [
                "deisotoping",
                "charge deconvolution",
                "precursor recalculation",
            ]
        else:
            params = []

        if picked_peaks:
            params.append("peak picking")
        if smoothing:
            params.append("smoothing")
        if baseline_reduction:
            params.append("baseline reduction")
        params.append("Conversion to mzML")

        params.extend(additional_parameters)

        mapping = {
            "software_reference": "ms_deisotope_1",
            "order": order,
            "params": params
        }
        return mapping

    def _create_data_processing_list(self):
        n = len(self.data_processing_list) - 1
        entry = {
            "id":
            "ms_deisotope_processing_1",
            "processing_methods": [
                self._build_processing_method(
                    n, additional_parameters=self.processing_parameters)
            ]
        }
        self.add_data_processing(entry)
        self.writer.data_processing_list(self.data_processing_list)

    def _create_instrument_configuration(self):
        self.writer.instrument_configuration_list(
            self.instrument_configuration_list)

    def _add_spectrum_list(self):
        self._create_file_description()
        self._create_software_list()
        self._create_instrument_configuration()
        self._create_data_processing_list()
        self._create_sample_list()

        self._run_tag = self.writer.run(id=self.sample_name, sample='sample_1')
        self._run_tag.__enter__()
        self._spectrum_list_tag = self.writer.spectrum_list(
            count=self.n_spectra)
        self._spectrum_list_tag.__enter__()

    def _pack_activation(self, activation_information):
        params = []
        params.append({
            "name": str(activation_information.method),
        })
        # NOTE: Only correct for CID/HCD spectra with absolute collision energies, but that is all I have
        # to test with.
        params.append({
            "name": "collision energy",
            "value": activation_information.energy,
            "unitName": "electron volts"
        })
        for key, val in activation_information.data.items():
            arg = {"name": key, "value": val}
            try:
                arg['unitName'] = val.unit_info
            except AttributeError:
                pass
            params.append(arg)
        return params

    def _pack_precursor_information(self,
                                    precursor_information,
                                    activation_information=None):
        # If the scan bunch has been fully deconvoluted and it's PrecursorInformation
        # filled in, its extracted fields will be populated and should be used, otherwise
        # use the default read values.
        if precursor_information.extracted_neutral_mass != 0:
            package = {
                "mz": precursor_information.extracted_mz,
                "intensity": precursor_information.extracted_intensity,
                "charge": precursor_information.extracted_charge,
                "scan_id": precursor_information.precursor_scan_id
            }
        else:
            package = {
                "mz": precursor_information.mz,
                "intensity": precursor_information.intensity,
                "charge": precursor_information.charge,
                "scan_id": precursor_information.precursor_scan_id
            }
        if activation_information is not None:
            package['activation'] = self._pack_activation(
                activation_information)
        return package

    def _prepare_extra_arrays(self, scan):
        extra_arrays = []
        if self.deconvoluted:
            score_array = [peak.score for peak in scan.deconvoluted_peak_set]
            extra_arrays.append(("deconvolution score array", score_array))
            envelope_array = envelopes_to_array(
                [peak.envelope for peak in scan.deconvoluted_peak_set])
            extra_arrays.append(("isotopic envelopes array", envelope_array))
        return extra_arrays

    def save_scan_bunch(self, bunch, **kwargs):
        if not self._has_started_writing_spectra:
            self._add_spectrum_list()
            self._has_started_writing_spectra = True

        if self.deconvoluted:
            precursor_peaks = bunch.precursor.deconvoluted_peak_set
        else:
            precursor_peaks = bunch.precursor.peak_set

        if len(precursor_peaks) == 0:
            return

        polarity = bunch.precursor.polarity
        if self.deconvoluted:
            charge_array = [p.charge for p in precursor_peaks]
        else:
            charge_array = None

        descriptors = describe_spectrum(precursor_peaks)

        self.writer.write_spectrum([p.mz for p in precursor_peaks],
                                   [p.intensity for p in precursor_peaks],
                                   charge_array,
                                   id=bunch.precursor.id,
                                   params=[{
                                       "name": "ms level",
                                       "value": bunch.precursor.ms_level
                                   }, {
                                       "name": "MS1 spectrum"
                                   }] + descriptors,
                                   polarity=polarity,
                                   scan_start_time=bunch.precursor.scan_time,
                                   compression=self.compression,
                                   other_arrays=self._prepare_extra_arrays(
                                       bunch.precursor))

        self.total_ion_chromatogram_tracker[
            bunch.precursor.scan_time] = _total_intensity_from_descriptors(
                descriptors)
        self.base_peak_chromatogram_tracker[
            bunch.precursor.scan_time] = _base_peak_from_descriptors(
                descriptors)

        for prod in bunch.products:
            if self.deconvoluted:
                product_peaks = prod.deconvoluted_peak_set
            else:
                product_peaks = prod.peak_set
            if len(product_peaks) == 0:
                continue
            descriptors = describe_spectrum(product_peaks)

            self.total_ion_chromatogram_tracker[
                prod.scan_time] = _total_intensity_from_descriptors(
                    descriptors)
            self.base_peak_chromatogram_tracker[
                prod.scan_time] = _base_peak_from_descriptors(descriptors)

            if self.deconvoluted:
                charge_array = [p.charge for p in product_peaks]
            else:
                charge_array = None

            self.writer.write_spectrum(
                [p.mz for p in product_peaks],
                [p.intensity for p in product_peaks],
                charge_array,
                id=prod.id,
                params=[{
                    "name": "ms level",
                    "value": prod.ms_level
                }, {
                    "name": "MSn spectrum"
                }] + descriptors,
                polarity=prod.polarity,
                scan_start_time=prod.scan_time,
                precursor_information=self._pack_precursor_information(
                    prod.precursor_information, prod.activation),
                compression=self.compression,
                other_arrays=self._prepare_extra_arrays(prod))

        if self.indexer is not None:
            self.indexer.add_scan_bunch(bunch)

    def save_chromatogram(self,
                          chromatogram_dict,
                          chromatogram_type,
                          params=None,
                          **kwargs):
        time_array, intensity_array = zip(*chromatogram_dict.items())
        self.writer.write_chromatogram(time_array,
                                       intensity_array,
                                       id=kwargs.get('id'),
                                       chromatogram_type=chromatogram_type,
                                       compression=self.compression,
                                       params=params)

    def _make_default_chromatograms(self):
        d = dict(chromatogram=self.total_ion_chromatogram_tracker,
                 chromatogram_type='total ion current chromatogram',
                 id='TIC')
        if len(self.total_ion_chromatogram_tracker) > 0:
            self.chromatogram_queue.append(d)

        d = dict(chromatogram=self.base_peak_chromatogram_tracker,
                 chromatogram_type="basepeak chromatogram",
                 id='BPC')
        if len(self.base_peak_chromatogram_tracker) > 0:
            self.chromatogram_queue.append(d)

    def write_chromatograms(self):
        self._chromatogram_list_tag = self.writer.chromatogram_list(
            count=len(self.chromatogram_queue))
        with self._chromatogram_list_tag:
            for chromatogram in self.chromatogram_queue:
                self.save_chromatogram(chromatogram.pop("chromatogram"),
                                       **chromatogram)

    def complete(self):
        self._spectrum_list_tag.__exit__(None, None, None)
        self._make_default_chromatograms()
        self.write_chromatograms()
        self._run_tag.__exit__(None, None, None)
        self.writer.__exit__(None, None, None)
        if self.indexer is not None:
            try:
                name = self.handle.name
            except AttributeError:
                name = "_detatched_mzml_index"
            try:
                with open(ExtendedScanIndex.index_file_name(name),
                          'w') as ixfile:
                    self.indexer.serialize(ixfile)
            except IOError:
                pass

    def format(self):
        try:
            self.writer.format()
        except OSError as e:
            if on_windows and e.errno == 32:
                pass
Example #23
0
    path = key_index[key]
    reader, lock = reader_index[path]
    values = request.values
    print(values)
    with lock:
        scan = reader.get_scan_by_id(scan_id)
        response = format_scan(scan, values)
    return response


if __name__ == "__main__":
    import sys
    for i, path in enumerate(sys.argv[1:]):
        print("Loading {0} with Key {1}".format(path, i))
        reader = MSFileLoader(path)
        index_path = ExtendedScanIndex.index_file_name(path)
        if os.path.exists(index_path):
            file_index = ExtendedScanIndex.load(open(index_path, 'rt'))
        else:
            print("Indexing {0}".format(path))
            reader.reset()
            file_index, scan_tree = quick_index.index(reader)
            reader.reset()
            with open(index_path, 'wt') as fh:
                file_index.dump(fh)
        print(file_index)
        metadata_index[path] = file_index
        reader_index[path] = reader, RLock()
        key_index[str(i)] = path

    app.run(threaded=True)
Example #24
0
 def read_index_file(self, index_path=None):
     if index_path is None:
         index_path = self._index_file_name
     with open(index_path) as handle:
         self.extended_index = ExtendedScanIndex.deserialize(handle)