def convert_map_to_scan_peak_list(feature_map, peak_loader, time_precision=4, deconvoluted=True): metadata_map = {} scan_accumulator = defaultdict(list) for scan_id, metadata in peak_loader.extended_index.ms1_ids.items(): metadata_map[round(metadata["scan_time"], time_precision)] = metadata for feature in feature_map: for node in feature: scan_accumulator[round(node.time, time_precision)].extend(node.members) packed = [] for key, peaks in sorted(scan_accumulator.items(), key=lambda x: x[0]): template = peak_loader.get_scan_by_time(key) if deconvoluted: peak_set = PeakSet([]) deconvoluted_peak_set = DeconvolutedPeakSet(peaks) else: peak_set = PeakSet(peaks) deconvoluted_peak_set = DeconvolutedPeakSet([]) peak_set.reindex() deconvoluted_peak_set.reindex() scan = ProcessedScan( template.id, template.title, None, template.ms_level, template.scan_time, template.index, peak_set, deconvoluted_peak_set, template.polarity, None) packed.append(scan) return packed
def _simplify_peak_set(peaks, bin_width=5.0): bin_edges = np.arange(0, peaks[-1].mz + bin_width, bin_width) bins = [] for i, bin_edge in enumerate(bin_edges, 1): if i == len(bin_edges): next_edge = bin_edges[-1] + bin_width else: next_edge = bin_edges[i] subset = peaks.between(bin_edge, next_edge) bins.append(subset) thresholds = [] reduced_subsets = {} k = 0 for b in bins: if len(b) > 0: bin_intensities = np.array([p.intensity for p in b]) thresholds.append(np.max(bin_intensities) / 3.) for p in b: if p.intensity > thresholds[-1]: reduced_subsets[p.peak_count] = p k += (bin_intensities > thresholds[-1]).sum() else: thresholds.append(0.0) subset_peaks = PeakSet(sorted(reduced_subsets.values(), key=lambda x: x.mz)).clone() subset_peaks.reindex() return PeakIndex(np.array([]), np.array([]), subset_peaks)
def _pick_peaks_vendor(self, scan, *args, **kwargs): scan_info = Business.Scan.FromFile(self._source, scan.scan_number + 1) if scan_info.HasCentroidStream: stream = self._source.GetCentroidStream(scan.scan_number + 1, 0) mzs = stream.Masses intens = stream.Intensities peaks = PeakSet([simple_peak(mzs[i], intens[i], 0.001) for i in range(len(mzs))]) peaks.reindex() arrays = self._scan_arrays(scan) return PeakIndex(arrays[0], arrays[1], peaks) else: raise NotImplementedError()
def deserialize_peak_set(scan_dict): mz_array = scan_dict['m/z array'] intensity_array = scan_dict['intensity array'] n = len(scan_dict['m/z array']) peaks = [] for i in range(n): peak = FittedPeak( mz_array[i], intensity_array[i], 1, i, i, 0, intensity_array[i], 0, 0) peaks.append(peak) peak_set = PeakSet(peaks) peak_set.reindex() return PeakIndex(np.array([]), np.array([]), peak_set)
def _make_scan(self, data): scan = super(ProcessedMzMLDeserializer, self)._make_scan(data) if scan.precursor_information: scan.precursor_information.default() selected_ion_dict = self._get_selected_ion(data) scan.precursor_information.orphan = selected_ion_dict.get( "ms_deisotope:orphan") == "true" scan.precursor_information.defaulted = selected_ion_dict.get( "ms_deisotope:defaulted") == "true" scan.annotations['precursor purity'] = data.get( 'precursor purity', 0) if "isotopic envelopes array" in data: scan.peak_set = PeakIndex(np.array([]), np.array([]), PeakSet([])) scan.deconvoluted_peak_set = self.deserialize_deconvoluted_peak_set( data) if self.has_extended_index( ) and scan.id in self.extended_index.ms1_ids: chosen_indices = self.extended_index.ms1_ids[ scan.id]['msms_peaks'] for ix in chosen_indices: scan.deconvoluted_peak_set[ix].chosen_for_msms = True else: scan.peak_set = self.deserialize_peak_set(data) scan.deconvoluted_peak_set = None packed = scan.pack() return packed
def envelopes_to_peak_set(self): """Convert a set of deconvoluted peaks with fitted isotopic envelopes into a set of centroids representing those envelope peaks. Returns ------- :class:`ms_peak_picker.PeakSet` """ peaks = [] for peak in self: for point in peak.envelope: peaks.append( simple_peak(point.mz, point.intensity, peak.full_width_at_half_max)) new_peak_set = FittedPeakSet(peaks) new_peak_set.reindex() return new_peak_set
def convert(self, fitted=True, deconvoluted=True): precursor_information = self.precursor_information.convert( ) if self.precursor_information is not None else None session = object_session(self) conn = session.connection() if fitted: q = conn.execute(select([FittedPeak.__table__]).where( FittedPeak.__table__.c.scan_id == self.id)).fetchall() peak_set_items = list( map(make_memory_fitted_peak, q)) peak_set = PeakSet(peak_set_items) peak_set._index() peak_index = PeakIndex(np.array([], dtype=np.float64), np.array( [], dtype=np.float64), peak_set) else: peak_index = PeakIndex(np.array([], dtype=np.float64), np.array( [], dtype=np.float64), PeakSet([])) if deconvoluted: q = conn.execute(select([DeconvolutedPeak.__table__]).where( DeconvolutedPeak.__table__.c.scan_id == self.id)).fetchall() deconvoluted_peak_set_items = list( map(make_memory_deconvoluted_peak, q)) deconvoluted_peak_set = DeconvolutedPeakSet( deconvoluted_peak_set_items) deconvoluted_peak_set._reindex() else: deconvoluted_peak_set = DeconvolutedPeakSet([]) info = self.info or {} scan = ProcessedScan( self.scan_id, self.title, precursor_information, int(self.ms_level), float(self.scan_time), self.index, peak_index, deconvoluted_peak_set, activation=info.get('activation')) return scan
def deserialize_peak_set(scan_dict): mz_array = scan_dict['m/z array'] intensity_array = scan_dict['intensity array'] n = len(scan_dict['m/z array']) peaks = [] for i in range(n): peak = FittedPeak(mz_array[i], intensity_array[i], 1, i, i, 0, intensity_array[i], 0, 0) peaks.append(peak) peak_set = PeakSet(peaks) peak_set.reindex() return PeakIndex(np.array([]), np.array([]), peak_set)
def parse(self): path = self.path for line in open(path): if not line: break if re.match(r"\d+\.?[0-9]*", line): mz, intensity = map(float, re.findall(r"\d+\.?[0-9]*", line)) self.peaklist.append( deconvolution.FittedPeak(mz, intensity, 0, 0, 0, 0, 0)) self.peaklist = PeakIndex(np.array([]), np.array([]), PeakSet(self.peaklist))
def convert_map_to_scan_peak_list(feature_map, peak_loader, time_precision=4, deconvoluted=True): metadata_map = {} scan_accumulator = defaultdict(list) for scan_id, metadata in peak_loader.extended_index.ms1_ids.items(): metadata_map[round(metadata["scan_time"], time_precision)] = metadata for feature in feature_map: for node in feature: scan_accumulator[round(node.time, time_precision)].extend(node.members) packed = [] for key, peaks in sorted(scan_accumulator.items(), key=lambda x: x[0]): template = peak_loader.get_scan_by_time(key) if deconvoluted: peak_set = PeakSet([]) deconvoluted_peak_set = DeconvolutedPeakSet(peaks) else: peak_set = PeakSet(peaks) deconvoluted_peak_set = DeconvolutedPeakSet([]) peak_set.reindex() deconvoluted_peak_set.reindex() scan = ProcessedScan(template.id, template.title, None, template.ms_level, template.scan_time, template.index, peak_set, deconvoluted_peak_set, template.polarity, None) packed.append(scan) return packed
def _make_scan(self, data): scan = super(ProcessedMzMLDeserializer, self)._make_scan(data) if scan.precursor_information: scan.precursor_information.default() if "isotopic envelopes array" in data: scan.peak_set = PeakIndex(np.array([]), np.array([]), PeakSet([])) scan.deconvoluted_peak_set = deserialize_deconvoluted_peak_set( data) if scan.id in self.extended_index.ms1_ids: chosen_indices = self.extended_index.ms1_ids[ scan.id]['msms_peaks'] for ix in chosen_indices: scan.deconvoluted_peak_set[ix].chosen_for_msms = True else: scan.peak_set = deserialize_peak_set(data) scan.deconvoluted_peak_set = None return scan.pack()
def prepare_peaklist(peaks): '''Ensure ``peaks`` is a :class:`~.PeakSet` object, converting from other compatible types as needed. Additionally, make a deep copy of the peaks as signal subtraction methods will modify peaks in place. This function ensures that any of the following common input types are coerced to the appropriate type: 1. :class:`ms_peak_picker.PeakSet` will be copied and indexed 2. :class:`ms_peak_picker.PeakIndex` will have its peaks extracted and copied 3. Any other *sequence* of :class:`PeakLike` objects (objects having an mz and intensity attribute) will be converted into a :class:`ms_peak_picker.PeakSet` 4. Any *sequence* of :class:`tuple` or :class:`list` having at least two entries will be converted into a :class:`ms_peak_picker.PeakSet` with the m/z value of each peak being the the `p[0]` of each entry and the intensity `p[1]`. Any other entries will be ignored. Parameters ---------- peaks: Sequence Any sequence of :class:`~.FittedPeak` objects, objects with ``mz`` and ``intensity`` attributes, or :class:`list` / :class:`tuple` objects containing paired values for ``mz`` and ``intensity`` Returns ------- :class:`~.PeakSet` ''' if isinstance(peaks, PeakIndex): peaks = PeakSet(peaks.peaks).clone() else: peaks = tuple(peaks) if len(peaks) == 0: return PeakSet([]) if not isinstance(peaks[0], FittedPeak): if is_peak(peaks[0]): peaks = [simple_peak(p.mz, p.intensity, 0.01) for p in peaks] elif isinstance(peaks[0], (list, tuple)): peaks = [simple_peak(p[0], p[1], 0.01) for p in peaks] else: raise TypeError("Cannot convert peaks into a PeakSet") peaks = PeakSet(peaks).clone() peaks.reindex() return peaks
def make_peak_index(fitted_peaks): ps = PeakSet(fitted_peaks) ps._index() return PeakIndex(np.array([], dtype=float), np.array([], dtype=float), ps)
def make_peak_index(fitted_peaks): ps = PeakSet(fitted_peaks) ps._index() return ps
def pick_peaks(self, *args, **kwargs): """A wrapper around :func:`ms_peak_picker.pick_peaks` which will populate the :attr:`peak_set` attribute of this scan. Parameters ---------- fit_type : str, optional The name of the peak model to use. One of "quadratic", "gaussian", "lorentzian", or "apex" signal_to_noise_threshold : int, optional Minimum signal-to-noise measurement to accept a peak intensity_threshold : int, optional Minimum intensity measurement to accept a peak threshold_data : bool, optional Whether to apply thresholds to the data target_envelopes : list, optional A sequence of (start m/z, end m/z) pairs, limiting peak picking to only those intervals transforms : list, optional A list of :class:`scan_filter.FilterBase` instances or callable that accepts (mz_array, intensity_array) and returns (mz_array, intensity_array) or `str` matching one of the premade names in `scan_filter.filter_register` verbose : bool, optional Whether to log extra information while picking peaks start_mz : float, optional A minimum m/z value to start picking peaks from stop_mz : float, optional A maximum m/z value to stop picking peaks after *args : Passed along to :func:`ms_peak_picker.pick_peaks` **kwargs : Passed along to :func:`ms_peak_picker.pick_peaks` Returns ------- Scan Returns self """ # Check to see if the user requested one of the ms_peak_picker fits or wanted # to use the vendor peak picker if provided. fit_type_k = kwargs.get("fit_type") if len(args) > 0: fit_type_a = args[0] else: fit_type_a = None if fit_type_k == 'vendor' or fit_type_a == 'vendor': try: peaks = self.source._pick_peaks_vendor(self._data, *args, **kwargs) self.peak_set = peaks return self except NotImplementedError: pass # Prepare the peak picking parameters mzs, intensities = self.arrays if len(mzs) == 0: self.peak_set = PeakIndex(mzs, intensities, PeakSet([])) return self if self.is_profile: peak_mode = 'profile' else: peak_mode = 'centroid' kwargs.setdefault('peak_mode', peak_mode) self.peak_set = pick_peaks(mzs, intensities, *args, **kwargs) return self