def test_graph_deconvolution(self): scan = self.make_scan() scan.pick_peaks() self.assertIsNotNone(scan.peak_set) algorithm_type = CompositionListPeakDependenceGraphDeconvoluter decon_config = { "composition_list": self.compositions, "scorer": PenalizedMSDeconVFitter(5., 2.), "use_subtraction": True } deconresult = deconvolute_peaks(scan.peak_set, decon_config, charge_range=(-1, -8), deconvoluter_type=algorithm_type) dpeaks = deconresult.peak_set n_cases = sum(map(len, self.charges)) # assert len(dpeaks) == n_cases if not (len(dpeaks) == n_cases): tids, ions = self.make_tids() tids, ions = zip( *sorted(zip(tids, ions), key=lambda x: x[0].monoisotopic_mz)) seen = set() for i, dp in enumerate(sorted(dpeaks, key=lambda x: x.mz)): ix = self.get_nearest_index(dp.mz, tids) logger.warning("%0.3f %d %0.3f %r (Matched %d)", dp.neutral_mass, dp.charge, dp.score, dp.solution, ix) seen.add(ix) indices = set(range(len(ions))) missed = list(indices - seen) deconvoluter = algorithm_type(scan.peak_set.clone(), **decon_config) for ix in missed: tid = deconvoluter.generate_theoretical_isotopic_cluster( *ions[ix]) assert np.isclose(sum(p.intensity for p in tid), 1.0) monoisotopic_peak = deconvoluter.peaklist.has_peak( tid[0].mz, 2e-5) if monoisotopic_peak is not None: tid = deconvoluter.recalibrate_theoretical_mz( tid, monoisotopic_peak.mz) eid = deconvoluter.match_theoretical_isotopic_distribution( tid.peaklist, 2e-5) missed_peaks = count_placeholders(eid) deconvoluter.scale_theoretical_distribution(tid, eid) score = deconvoluter.scorer.evaluate(deconvoluter.peaklist, eid, tid.peaklist) fit_record = deconvoluter.fit_composition_at_charge(*ions[ix]) eid = fit_record.experimental tid = fit_record.theoretical rep_eid = drop_placeholders(eid) validation = (len(rep_eid) < 2), ( len(rep_eid) < len(tid) / 2.), (len(rep_eid) == 1 and fit_record.charge > 1) composition, charge = ions[ix] logger.warning( "Missed %r %d (%d missed peaks, score = %0.3f, record = %r, validation = %r)" % (composition, charge, missed_peaks, score, fit_record, validation)) assert not missed
def test_finalize(self, feature_fit, charge_carrier=PROTON, detection_threshold=0.1, max_missed_peaks=1): start_time, end_time = find_bounds(feature_fit, detection_threshold) feat_iter = FeatureSetIterator( feature_fit.features, start_time, end_time) base_tid = feature_fit.theoretical charge = feature_fit.charge abs_charge = abs(charge) for eid in feat_iter: cleaned_eid, tid, n_missing = conform_envelopes(eid, base_tid) rep_eid = drop_placeholders(cleaned_eid) n_real_peaks = len(rep_eid) invalid = n_real_peaks == 0 or (n_real_peaks == 1 and abs_charge > 1) or n_missing > max_missed_peaks score = self.scorer.evaluate(None, cleaned_eid, tid) yield feat_iter.current_time, score, n_missing, invalid
def test_graph_deconvolution(self): scan = self.make_scan() scan.pick_peaks() self.assertIsNotNone(scan.peak_set) algorithm_type = CompositionListPeakDependenceGraphDeconvoluter decon_config = { "composition_list": self.compositions, "scorer": PenalizedMSDeconVFitter(5., 2.), "use_subtraction": True } deconresult = deconvolute_peaks( scan.peak_set, decon_config, charge_range=(-1, -8), deconvoluter_type=algorithm_type) dpeaks = deconresult.peak_set n_cases = sum(map(len, self.charges)) # assert len(dpeaks) == n_cases if not (len(dpeaks) == n_cases): tids, ions = self.make_tids() tids, ions = zip(*sorted(zip(tids, ions), key=lambda x: x[0].monoisotopic_mz)) seen = set() for i, dp in enumerate(sorted(dpeaks, key=lambda x: x.mz)): ix = self.get_nearest_index(dp.mz, tids) logger.warning("%0.3f %d %0.3f %r (Matched %d)", dp.neutral_mass, dp.charge, dp.score, dp.solution, ix) seen.add(ix) indices = set(range(len(ions))) missed = list(indices - seen) deconvoluter = algorithm_type(scan.peak_set.clone(), **decon_config) for ix in missed: tid = deconvoluter.generate_theoretical_isotopic_cluster(*ions[ix]) assert np.isclose(sum(p.intensity for p in tid), 1.0) monoisotopic_peak = deconvoluter.peaklist.has_peak(tid[0].mz, 2e-5) if monoisotopic_peak is not None: tid = deconvoluter.recalibrate_theoretical_mz(tid, monoisotopic_peak.mz) eid = deconvoluter.match_theoretical_isotopic_distribution( tid.peaklist, 2e-5) missed_peaks = count_placeholders(eid) deconvoluter.scale_theoretical_distribution(tid, eid) score = deconvoluter.scorer.evaluate(deconvoluter.peaklist, eid, tid.peaklist) fit_record = deconvoluter.fit_composition_at_charge(*ions[ix]) eid = fit_record.experimental tid = fit_record.theoretical rep_eid = drop_placeholders(eid) validation = (len(rep_eid) < 2), (len(rep_eid) < len(tid) / 2.), ( len(rep_eid) == 1 and fit_record.charge > 1) composition, charge = ions[ix] logger.warning("Missed %r %d (%d missed peaks, score = %0.3f, record = %r, validation = %r)" % ( composition, charge, missed_peaks, score, fit_record, validation)) assert not missed
def finalize_fit(self, feature_fit, charge_carrier=PROTON, subtract=True, detection_threshold=0.1, max_missed_peaks=1): nodes = [] start_time, end_time = find_bounds(feature_fit, detection_threshold) feat_iter = FeatureSetIterator(feature_fit.features, start_time, end_time) base_tid = feature_fit.theoretical charge = feature_fit.charge abs_charge = abs(charge) for eid in feat_iter: cleaned_eid, tid, n_missing = conform_envelopes( eid, base_tid.truncated_tid) rep_eid = drop_placeholders(cleaned_eid) n_real_peaks = len(rep_eid) if n_real_peaks == 0 or (n_real_peaks == 1 and abs_charge > 1) or \ n_missing > max_missed_peaks: continue score = self.scorer.evaluate(None, cleaned_eid, tid) is_valid = True if np.isnan(score) or score < 0: is_valid = False envelope = [(e.mz, min(e.intensity, t.intensity)) for e, t in zip(cleaned_eid, tid)] if is_valid: total_abundance = sum(p[1] for p in envelope) monoisotopic_mass = neutral_mass(base_tid.monoisotopic_mz, charge, charge_carrier=charge_carrier) reference_peak = first_peak(cleaned_eid) dpeak = DeconvolutedPeak( neutral_mass=monoisotopic_mass, intensity=total_abundance, charge=charge, signal_to_noise=mean(p.signal_to_noise for p in rep_eid), index=reference_peak.index, full_width_at_half_max=mean(p.full_width_at_half_max for p in rep_eid), a_to_a2_ratio=a_to_a2_ratio(tid), most_abundant_mass=neutral_mass( most_abundant_mz(cleaned_eid), charge, charge_carrier=charge_carrier), average_mass=neutral_mass(average_mz(cleaned_eid), charge, charge_carrier=charge_carrier), score=score, envelope=envelope, mz=base_tid.monoisotopic_mz, area=sum(e.area for e in cleaned_eid)) time = feat_iter.current_time precursor_info_set = [] for peak in rep_eid: pinfo = self.precursor_map.precursor_for_peak( (time, peak.index)) if pinfo is not None: precursor_info_set.append(pinfo) node = DeconvolutedLCMSFeatureTreeNode(time, [dpeak], precursor_info_set) nodes.append(node) if subtract: for fpeak, tpeak in zip(cleaned_eid, envelope): # If a theoretical peak uses up more than 70% # of the abundance of a single peak, this peak # should not contribute meaninfully to any other # fits from now on. Set it's abundance to 1.0 as # if it were fully used up. ruin = (fpeak.intensity * 0.7) < tpeak[1] if ruin: fpeak.intensity = 1.0 else: fpeak.intensity -= tpeak[1] if fpeak.intensity < 0: fpeak.intensity = 1.0 for feature in feature_fit.features: if feature is None or isinstance(feature, EmptyFeature): continue feature.invalidate() if len(nodes) < self.minimum_size: return None result_feature = DeconvolutedLCMSFeature( nodes, feature_fit.charge, score=feature_fit.score, n_features=len(feature_fit), supporters=feature_fit.supporters) return result_feature
def finalize_fit(self, feature_fit, charge_carrier=PROTON, subtract=True, detection_threshold=0.1, max_missed_peaks=1): nodes = [] start_time, end_time = find_bounds(feature_fit, detection_threshold) feat_iter = FeatureSetIterator( feature_fit.features, start_time, end_time) base_tid = feature_fit.theoretical charge = feature_fit.charge abs_charge = abs(charge) for eid in feat_iter: cleaned_eid, tid, n_missing = conform_envelopes(eid, base_tid.truncated_tid) rep_eid = drop_placeholders(cleaned_eid) n_real_peaks = len(rep_eid) if n_real_peaks == 0 or (n_real_peaks == 1 and abs_charge > 1) or \ n_missing > max_missed_peaks: continue score = self.scorer.evaluate(None, cleaned_eid, tid) is_valid = True if np.isnan(score) or score < 0: is_valid = False envelope = [(e.mz, min(e.intensity, t.intensity)) for e, t in zip(cleaned_eid, tid)] if is_valid: total_abundance = sum(p[1] for p in envelope) monoisotopic_mass = neutral_mass( base_tid.monoisotopic_mz, charge, charge_carrier=charge_carrier) reference_peak = first_peak(cleaned_eid) dpeak = DeconvolutedPeak( neutral_mass=monoisotopic_mass, intensity=total_abundance, charge=charge, signal_to_noise=mean(p.signal_to_noise for p in rep_eid), index=reference_peak.index, full_width_at_half_max=mean(p.full_width_at_half_max for p in rep_eid), a_to_a2_ratio=a_to_a2_ratio(tid), most_abundant_mass=neutral_mass( most_abundant_mz(cleaned_eid), charge, charge_carrier=charge_carrier), average_mass=neutral_mass( average_mz(cleaned_eid), charge, charge_carrier=charge_carrier), score=score, envelope=envelope, mz=base_tid.monoisotopic_mz, area=sum(e.area for e in cleaned_eid)) time = feat_iter.current_time precursor_info_set = [] for peak in rep_eid: pinfo = self.precursor_map.precursor_for_peak((time, peak.index)) if pinfo is not None: precursor_info_set.append(pinfo) node = DeconvolutedLCMSFeatureTreeNode(time, [dpeak], precursor_info_set) nodes.append(node) if subtract: for fpeak, tpeak in zip(cleaned_eid, envelope): # If a theoretical peak uses up more than 70% # of the abundance of a single peak, this peak # should not contribute meaninfully to any other # fits from now on. Set it's abundance to 1.0 as # if it were fully used up. ruin = (fpeak.intensity * 0.7) < tpeak[1] if ruin: fpeak.intensity = 1.0 else: fpeak.intensity -= tpeak[1] if fpeak.intensity < 0: fpeak.intensity = 1.0 for feature in feature_fit.features: if feature is None or isinstance(feature, EmptyFeature): continue feature.invalidate() if len(nodes) < self.minimum_size: return None result_feature = DeconvolutedLCMSFeature( nodes, feature_fit.charge, score=feature_fit.score, n_features=len(feature_fit), supporters=feature_fit.supporters) return result_feature