def convert_map_to_scan_peak_list(feature_map, peak_loader, time_precision=4, deconvoluted=True):
    metadata_map = {}
    scan_accumulator = defaultdict(list)
    for scan_id, metadata in peak_loader.extended_index.ms1_ids.items():
        metadata_map[round(metadata["scan_time"], time_precision)] = metadata
    for feature in feature_map:
        for node in feature:
            scan_accumulator[round(node.time, time_precision)].extend(node.members)

    packed = []
    for key, peaks in sorted(scan_accumulator.items(), key=lambda x: x[0]):
        template = peak_loader.get_scan_by_time(key)
        if deconvoluted:
            peak_set = PeakSet([])
            deconvoluted_peak_set = DeconvolutedPeakSet(peaks)
        else:
            peak_set = PeakSet(peaks)
            deconvoluted_peak_set = DeconvolutedPeakSet([])
        peak_set.reindex()
        deconvoluted_peak_set.reindex()
        scan = ProcessedScan(
            template.id, template.title, None, template.ms_level, template.scan_time,
            template.index, peak_set, deconvoluted_peak_set, template.polarity,
            None)
        packed.append(scan)
    return packed
def _simplify_peak_set(peaks, bin_width=5.0):
    bin_edges = np.arange(0, peaks[-1].mz + bin_width, bin_width)
    bins = []
    for i, bin_edge in enumerate(bin_edges, 1):
        if i == len(bin_edges):
            next_edge = bin_edges[-1] + bin_width
        else:
            next_edge = bin_edges[i]
        subset = peaks.between(bin_edge, next_edge)
        bins.append(subset)

    thresholds = []
    reduced_subsets = {}
    k = 0
    for b in bins:
        if len(b) > 0:
            bin_intensities = np.array([p.intensity for p in b])
            thresholds.append(np.max(bin_intensities) / 3.)
            for p in b:
                if p.intensity > thresholds[-1]:
                    reduced_subsets[p.peak_count] = p
            k += (bin_intensities > thresholds[-1]).sum()
        else:
            thresholds.append(0.0)
    subset_peaks = PeakSet(sorted(reduced_subsets.values(),
                                  key=lambda x: x.mz)).clone()
    subset_peaks.reindex()
    return PeakIndex(np.array([]), np.array([]), subset_peaks)
Beispiel #3
0
 def _pick_peaks_vendor(self, scan, *args, **kwargs):
     scan_info = Business.Scan.FromFile(self._source, scan.scan_number + 1)
     if scan_info.HasCentroidStream:
         stream = self._source.GetCentroidStream(scan.scan_number + 1, 0)
         mzs = stream.Masses
         intens = stream.Intensities
         peaks = PeakSet([simple_peak(mzs[i], intens[i], 0.001) for i in range(len(mzs))])
         peaks.reindex()
         arrays = self._scan_arrays(scan)
         return PeakIndex(arrays[0], arrays[1], peaks)
     else:
         raise NotImplementedError()
Beispiel #4
0
def deserialize_peak_set(scan_dict):
    mz_array = scan_dict['m/z array']
    intensity_array = scan_dict['intensity array']
    n = len(scan_dict['m/z array'])
    peaks = []
    for i in range(n):
        peak = FittedPeak(
            mz_array[i], intensity_array[i], 1, i, i,
            0, intensity_array[i], 0, 0)
        peaks.append(peak)
    peak_set = PeakSet(peaks)
    peak_set.reindex()
    return PeakIndex(np.array([]), np.array([]), peak_set)
Beispiel #5
0
 def _make_scan(self, data):
     scan = super(ProcessedMzMLDeserializer, self)._make_scan(data)
     if scan.precursor_information:
         scan.precursor_information.default()
         selected_ion_dict = self._get_selected_ion(data)
         scan.precursor_information.orphan = selected_ion_dict.get(
             "ms_deisotope:orphan") == "true"
         scan.precursor_information.defaulted = selected_ion_dict.get(
             "ms_deisotope:defaulted") == "true"
         scan.annotations['precursor purity'] = data.get(
             'precursor purity', 0)
     if "isotopic envelopes array" in data:
         scan.peak_set = PeakIndex(np.array([]), np.array([]), PeakSet([]))
         scan.deconvoluted_peak_set = self.deserialize_deconvoluted_peak_set(
             data)
         if self.has_extended_index(
         ) and scan.id in self.extended_index.ms1_ids:
             chosen_indices = self.extended_index.ms1_ids[
                 scan.id]['msms_peaks']
             for ix in chosen_indices:
                 scan.deconvoluted_peak_set[ix].chosen_for_msms = True
     else:
         scan.peak_set = self.deserialize_peak_set(data)
         scan.deconvoluted_peak_set = None
     packed = scan.pack()
     return packed
Beispiel #6
0
def envelopes_to_peak_set(self):
    """Convert a set of deconvoluted peaks with fitted isotopic envelopes into a
    set of centroids representing those envelope peaks.

    Returns
    -------
    :class:`ms_peak_picker.PeakSet`
    """
    peaks = []
    for peak in self:
        for point in peak.envelope:
            peaks.append(
                simple_peak(point.mz, point.intensity, peak.full_width_at_half_max))
    new_peak_set = FittedPeakSet(peaks)
    new_peak_set.reindex()
    return new_peak_set
Beispiel #7
0
    def convert(self, fitted=True, deconvoluted=True):
        precursor_information = self.precursor_information.convert(
        ) if self.precursor_information is not None else None

        session = object_session(self)
        conn = session.connection()

        if fitted:
            q = conn.execute(select([FittedPeak.__table__]).where(
                FittedPeak.__table__.c.scan_id == self.id)).fetchall()

            peak_set_items = list(
                map(make_memory_fitted_peak, q))

            peak_set = PeakSet(peak_set_items)
            peak_set._index()
            peak_index = PeakIndex(np.array([], dtype=np.float64), np.array(
                [], dtype=np.float64), peak_set)
        else:
            peak_index = PeakIndex(np.array([], dtype=np.float64), np.array(
                [], dtype=np.float64), PeakSet([]))

        if deconvoluted:
            q = conn.execute(select([DeconvolutedPeak.__table__]).where(
                DeconvolutedPeak.__table__.c.scan_id == self.id)).fetchall()

            deconvoluted_peak_set_items = list(
                map(make_memory_deconvoluted_peak, q))

            deconvoluted_peak_set = DeconvolutedPeakSet(
                deconvoluted_peak_set_items)
            deconvoluted_peak_set._reindex()
        else:
            deconvoluted_peak_set = DeconvolutedPeakSet([])

        info = self.info or {}

        scan = ProcessedScan(
            self.scan_id, self.title, precursor_information, int(self.ms_level),
            float(self.scan_time), self.index, peak_index, deconvoluted_peak_set,
            activation=info.get('activation'))
        return scan
Beispiel #8
0
def deserialize_peak_set(scan_dict):
    mz_array = scan_dict['m/z array']
    intensity_array = scan_dict['intensity array']
    n = len(scan_dict['m/z array'])
    peaks = []
    for i in range(n):
        peak = FittedPeak(mz_array[i], intensity_array[i], 1, i, i, 0,
                          intensity_array[i], 0, 0)
        peaks.append(peak)
    peak_set = PeakSet(peaks)
    peak_set.reindex()
    return PeakIndex(np.array([]), np.array([]), peak_set)
Beispiel #9
0
    def parse(self):
        path = self.path
        for line in open(path):
            if not line:
                break
            if re.match(r"\d+\.?[0-9]*", line):
                mz, intensity = map(float, re.findall(r"\d+\.?[0-9]*", line))

                self.peaklist.append(
                    deconvolution.FittedPeak(mz, intensity, 0, 0, 0, 0, 0))
        self.peaklist = PeakIndex(np.array([]), np.array([]),
                                  PeakSet(self.peaklist))
Beispiel #10
0
def convert_map_to_scan_peak_list(feature_map,
                                  peak_loader,
                                  time_precision=4,
                                  deconvoluted=True):
    metadata_map = {}
    scan_accumulator = defaultdict(list)
    for scan_id, metadata in peak_loader.extended_index.ms1_ids.items():
        metadata_map[round(metadata["scan_time"], time_precision)] = metadata
    for feature in feature_map:
        for node in feature:
            scan_accumulator[round(node.time,
                                   time_precision)].extend(node.members)

    packed = []
    for key, peaks in sorted(scan_accumulator.items(), key=lambda x: x[0]):
        template = peak_loader.get_scan_by_time(key)
        if deconvoluted:
            peak_set = PeakSet([])
            deconvoluted_peak_set = DeconvolutedPeakSet(peaks)
        else:
            peak_set = PeakSet(peaks)
            deconvoluted_peak_set = DeconvolutedPeakSet([])
        peak_set.reindex()
        deconvoluted_peak_set.reindex()
        scan = ProcessedScan(template.id, template.title, None,
                             template.ms_level, template.scan_time,
                             template.index, peak_set, deconvoluted_peak_set,
                             template.polarity, None)
        packed.append(scan)
    return packed
Beispiel #11
0
 def _make_scan(self, data):
     scan = super(ProcessedMzMLDeserializer, self)._make_scan(data)
     if scan.precursor_information:
         scan.precursor_information.default()
     if "isotopic envelopes array" in data:
         scan.peak_set = PeakIndex(np.array([]), np.array([]), PeakSet([]))
         scan.deconvoluted_peak_set = deserialize_deconvoluted_peak_set(
             data)
         if scan.id in self.extended_index.ms1_ids:
             chosen_indices = self.extended_index.ms1_ids[
                 scan.id]['msms_peaks']
             for ix in chosen_indices:
                 scan.deconvoluted_peak_set[ix].chosen_for_msms = True
     else:
         scan.peak_set = deserialize_peak_set(data)
         scan.deconvoluted_peak_set = None
     return scan.pack()
Beispiel #12
0
def prepare_peaklist(peaks):
    '''Ensure ``peaks`` is a :class:`~.PeakSet` object,
    converting from other compatible types as needed. Additionally, make a deep
    copy of the peaks as signal subtraction methods will modify peaks in place.

    This function ensures that any of the following common input types are coerced
    to the appropriate type:

    1. :class:`ms_peak_picker.PeakSet` will be copied and indexed
    2. :class:`ms_peak_picker.PeakIndex` will have its peaks extracted and copied
    3. Any other *sequence* of :class:`PeakLike` objects (objects having an mz and
       intensity attribute) will be converted into a :class:`ms_peak_picker.PeakSet`
    4. Any *sequence* of :class:`tuple` or :class:`list` having at least two entries
       will be converted into a :class:`ms_peak_picker.PeakSet` with the m/z value
       of each peak being the the `p[0]` of each entry and the intensity `p[1]`. Any
       other entries will be ignored.

    Parameters
    ----------
    peaks: Sequence
        Any sequence of :class:`~.FittedPeak` objects, objects
        with ``mz`` and ``intensity`` attributes, or :class:`list` / :class:`tuple`
        objects containing paired values for ``mz`` and ``intensity``

    Returns
    -------
    :class:`~.PeakSet`
    '''
    if isinstance(peaks, PeakIndex):
        peaks = PeakSet(peaks.peaks).clone()
    else:
        peaks = tuple(peaks)
        if len(peaks) == 0:
            return PeakSet([])
        if not isinstance(peaks[0], FittedPeak):
            if is_peak(peaks[0]):
                peaks = [simple_peak(p.mz, p.intensity, 0.01) for p in peaks]
            elif isinstance(peaks[0], (list, tuple)):
                peaks = [simple_peak(p[0], p[1], 0.01) for p in peaks]
            else:
                raise TypeError("Cannot convert peaks into a PeakSet")

        peaks = PeakSet(peaks).clone()
    peaks.reindex()
    return peaks
def make_peak_index(fitted_peaks):
    ps = PeakSet(fitted_peaks)
    ps._index()
    return PeakIndex(np.array([], dtype=float), np.array([], dtype=float), ps)
Beispiel #14
0
def make_peak_index(fitted_peaks):
    ps = PeakSet(fitted_peaks)
    ps._index()
    return ps
Beispiel #15
0
def make_peak_index(fitted_peaks):
    ps = PeakSet(fitted_peaks)
    ps._index()
    return PeakIndex(np.array([], dtype=float), np.array([], dtype=float), ps)
Beispiel #16
0
    def pick_peaks(self, *args, **kwargs):
        """A wrapper around :func:`ms_peak_picker.pick_peaks` which will populate the
        :attr:`peak_set` attribute of this scan.

        Parameters
        ----------
        fit_type : str, optional
            The name of the peak model to use. One of "quadratic", "gaussian", "lorentzian", or "apex"
        signal_to_noise_threshold : int, optional
            Minimum signal-to-noise measurement to accept a peak
        intensity_threshold : int, optional
            Minimum intensity measurement to accept a peak
        threshold_data : bool, optional
            Whether to apply thresholds to the data
        target_envelopes : list, optional
            A sequence of (start m/z, end m/z) pairs, limiting peak picking to only those intervals
        transforms : list, optional
            A list of :class:`scan_filter.FilterBase` instances or callable that
            accepts (mz_array, intensity_array) and returns (mz_array, intensity_array) or
            `str` matching one of the premade names in `scan_filter.filter_register`
        verbose : bool, optional
            Whether to log extra information while picking peaks
        start_mz : float, optional
            A minimum m/z value to start picking peaks from
        stop_mz : float, optional
            A maximum m/z value to stop picking peaks after
        *args :
            Passed along to :func:`ms_peak_picker.pick_peaks`
        **kwargs :
            Passed along to :func:`ms_peak_picker.pick_peaks`

        Returns
        -------
        Scan
            Returns self
        """
        # Check to see if the user requested one of the ms_peak_picker fits or wanted
        # to use the vendor peak picker if provided.
        fit_type_k = kwargs.get("fit_type")
        if len(args) > 0:
            fit_type_a = args[0]
        else:
            fit_type_a = None
        if fit_type_k == 'vendor' or fit_type_a == 'vendor':
            try:
                peaks = self.source._pick_peaks_vendor(self._data, *args,
                                                       **kwargs)
                self.peak_set = peaks
                return self
            except NotImplementedError:
                pass
        # Prepare the peak picking parameters
        mzs, intensities = self.arrays
        if len(mzs) == 0:
            self.peak_set = PeakIndex(mzs, intensities, PeakSet([]))
            return self
        if self.is_profile:
            peak_mode = 'profile'
        else:
            peak_mode = 'centroid'

        kwargs.setdefault('peak_mode', peak_mode)

        self.peak_set = pick_peaks(mzs, intensities, *args, **kwargs)
        return self