Ejemplo n.º 1
0
 def test_graph_deconvolution(self):
     scan = self.make_scan()
     scan.pick_peaks()
     self.assertIsNotNone(scan.peak_set)
     algorithm_type = CompositionListPeakDependenceGraphDeconvoluter
     decon_config = {
         "composition_list": self.compositions,
         "scorer": PenalizedMSDeconVFitter(5., 2.),
         "use_subtraction": True
     }
     deconresult = deconvolute_peaks(scan.peak_set,
                                     decon_config,
                                     charge_range=(-1, -8),
                                     deconvoluter_type=algorithm_type)
     dpeaks = deconresult.peak_set
     n_cases = sum(map(len, self.charges))
     # assert len(dpeaks) == n_cases
     if not (len(dpeaks) == n_cases):
         tids, ions = self.make_tids()
         tids, ions = zip(
             *sorted(zip(tids, ions), key=lambda x: x[0].monoisotopic_mz))
         seen = set()
         for i, dp in enumerate(sorted(dpeaks, key=lambda x: x.mz)):
             ix = self.get_nearest_index(dp.mz, tids)
             logger.warning("%0.3f %d %0.3f %r (Matched %d)",
                            dp.neutral_mass, dp.charge, dp.score,
                            dp.solution, ix)
             seen.add(ix)
         indices = set(range(len(ions)))
         missed = list(indices - seen)
         deconvoluter = algorithm_type(scan.peak_set.clone(),
                                       **decon_config)
         for ix in missed:
             tid = deconvoluter.generate_theoretical_isotopic_cluster(
                 *ions[ix])
             assert np.isclose(sum(p.intensity for p in tid), 1.0)
             monoisotopic_peak = deconvoluter.peaklist.has_peak(
                 tid[0].mz, 2e-5)
             if monoisotopic_peak is not None:
                 tid = deconvoluter.recalibrate_theoretical_mz(
                     tid, monoisotopic_peak.mz)
             eid = deconvoluter.match_theoretical_isotopic_distribution(
                 tid.peaklist, 2e-5)
             missed_peaks = count_placeholders(eid)
             deconvoluter.scale_theoretical_distribution(tid, eid)
             score = deconvoluter.scorer.evaluate(deconvoluter.peaklist,
                                                  eid, tid.peaklist)
             fit_record = deconvoluter.fit_composition_at_charge(*ions[ix])
             eid = fit_record.experimental
             tid = fit_record.theoretical
             rep_eid = drop_placeholders(eid)
             validation = (len(rep_eid) < 2), (
                 len(rep_eid) < len(tid) / 2.), (len(rep_eid) == 1
                                                 and fit_record.charge > 1)
             composition, charge = ions[ix]
             logger.warning(
                 "Missed %r %d (%d missed peaks, score = %0.3f, record = %r, validation = %r)"
                 % (composition, charge, missed_peaks, score, fit_record,
                    validation))
         assert not missed
def test_finalize(self, feature_fit, charge_carrier=PROTON, detection_threshold=0.1,
                  max_missed_peaks=1):
    start_time, end_time = find_bounds(feature_fit, detection_threshold)
    feat_iter = FeatureSetIterator(
        feature_fit.features, start_time, end_time)
    base_tid = feature_fit.theoretical
    charge = feature_fit.charge
    abs_charge = abs(charge)
    for eid in feat_iter:
        cleaned_eid, tid, n_missing = conform_envelopes(eid, base_tid)
        rep_eid = drop_placeholders(cleaned_eid)
        n_real_peaks = len(rep_eid)
        invalid = n_real_peaks == 0 or (n_real_peaks == 1 and abs_charge > 1) or n_missing > max_missed_peaks
        score = self.scorer.evaluate(None, cleaned_eid, tid)
        yield feat_iter.current_time, score, n_missing, invalid
def test_finalize(self, feature_fit, charge_carrier=PROTON, detection_threshold=0.1,
                  max_missed_peaks=1):
    start_time, end_time = find_bounds(feature_fit, detection_threshold)
    feat_iter = FeatureSetIterator(
        feature_fit.features, start_time, end_time)
    base_tid = feature_fit.theoretical
    charge = feature_fit.charge
    abs_charge = abs(charge)
    for eid in feat_iter:
        cleaned_eid, tid, n_missing = conform_envelopes(eid, base_tid)
        rep_eid = drop_placeholders(cleaned_eid)
        n_real_peaks = len(rep_eid)
        invalid = n_real_peaks == 0 or (n_real_peaks == 1 and abs_charge > 1) or n_missing > max_missed_peaks
        score = self.scorer.evaluate(None, cleaned_eid, tid)
        yield feat_iter.current_time, score, n_missing, invalid
Ejemplo n.º 4
0
 def test_graph_deconvolution(self):
     scan = self.make_scan()
     scan.pick_peaks()
     self.assertIsNotNone(scan.peak_set)
     algorithm_type = CompositionListPeakDependenceGraphDeconvoluter
     decon_config = {
         "composition_list": self.compositions,
         "scorer": PenalizedMSDeconVFitter(5., 2.),
         "use_subtraction": True
     }
     deconresult = deconvolute_peaks(
         scan.peak_set, decon_config, charge_range=(-1, -8), deconvoluter_type=algorithm_type)
     dpeaks = deconresult.peak_set
     n_cases = sum(map(len, self.charges))
     # assert len(dpeaks) == n_cases
     if not (len(dpeaks) == n_cases):
         tids, ions = self.make_tids()
         tids, ions = zip(*sorted(zip(tids, ions), key=lambda x: x[0].monoisotopic_mz))
         seen = set()
         for i, dp in enumerate(sorted(dpeaks, key=lambda x: x.mz)):
             ix = self.get_nearest_index(dp.mz, tids)
             logger.warning("%0.3f %d %0.3f %r (Matched %d)", dp.neutral_mass, dp.charge, dp.score, dp.solution, ix)
             seen.add(ix)
         indices = set(range(len(ions)))
         missed = list(indices - seen)
         deconvoluter = algorithm_type(scan.peak_set.clone(), **decon_config)
         for ix in missed:
             tid = deconvoluter.generate_theoretical_isotopic_cluster(*ions[ix])
             assert np.isclose(sum(p.intensity for p in tid), 1.0)
             monoisotopic_peak = deconvoluter.peaklist.has_peak(tid[0].mz, 2e-5)
             if monoisotopic_peak is not None:
                 tid = deconvoluter.recalibrate_theoretical_mz(tid, monoisotopic_peak.mz)
             eid = deconvoluter.match_theoretical_isotopic_distribution(
                 tid.peaklist, 2e-5)
             missed_peaks = count_placeholders(eid)
             deconvoluter.scale_theoretical_distribution(tid, eid)
             score = deconvoluter.scorer.evaluate(deconvoluter.peaklist, eid, tid.peaklist)
             fit_record = deconvoluter.fit_composition_at_charge(*ions[ix])
             eid = fit_record.experimental
             tid = fit_record.theoretical
             rep_eid = drop_placeholders(eid)
             validation = (len(rep_eid) < 2), (len(rep_eid) < len(tid) / 2.), (
                 len(rep_eid) == 1 and fit_record.charge > 1)
             composition, charge = ions[ix]
             logger.warning("Missed %r %d (%d missed peaks, score = %0.3f, record = %r, validation = %r)" % (
                 composition, charge, missed_peaks, score, fit_record, validation))
         assert not missed
Ejemplo n.º 5
0
    def finalize_fit(self,
                     feature_fit,
                     charge_carrier=PROTON,
                     subtract=True,
                     detection_threshold=0.1,
                     max_missed_peaks=1):
        nodes = []
        start_time, end_time = find_bounds(feature_fit, detection_threshold)
        feat_iter = FeatureSetIterator(feature_fit.features, start_time,
                                       end_time)
        base_tid = feature_fit.theoretical
        charge = feature_fit.charge
        abs_charge = abs(charge)
        for eid in feat_iter:
            cleaned_eid, tid, n_missing = conform_envelopes(
                eid, base_tid.truncated_tid)
            rep_eid = drop_placeholders(cleaned_eid)
            n_real_peaks = len(rep_eid)
            if n_real_peaks == 0 or (n_real_peaks == 1 and abs_charge > 1) or \
               n_missing > max_missed_peaks:
                continue
            score = self.scorer.evaluate(None, cleaned_eid, tid)
            is_valid = True
            if np.isnan(score) or score < 0:
                is_valid = False
            envelope = [(e.mz, min(e.intensity, t.intensity))
                        for e, t in zip(cleaned_eid, tid)]
            if is_valid:
                total_abundance = sum(p[1] for p in envelope)
                monoisotopic_mass = neutral_mass(base_tid.monoisotopic_mz,
                                                 charge,
                                                 charge_carrier=charge_carrier)
                reference_peak = first_peak(cleaned_eid)

                dpeak = DeconvolutedPeak(
                    neutral_mass=monoisotopic_mass,
                    intensity=total_abundance,
                    charge=charge,
                    signal_to_noise=mean(p.signal_to_noise for p in rep_eid),
                    index=reference_peak.index,
                    full_width_at_half_max=mean(p.full_width_at_half_max
                                                for p in rep_eid),
                    a_to_a2_ratio=a_to_a2_ratio(tid),
                    most_abundant_mass=neutral_mass(
                        most_abundant_mz(cleaned_eid),
                        charge,
                        charge_carrier=charge_carrier),
                    average_mass=neutral_mass(average_mz(cleaned_eid),
                                              charge,
                                              charge_carrier=charge_carrier),
                    score=score,
                    envelope=envelope,
                    mz=base_tid.monoisotopic_mz,
                    area=sum(e.area for e in cleaned_eid))

                time = feat_iter.current_time
                precursor_info_set = []
                for peak in rep_eid:
                    pinfo = self.precursor_map.precursor_for_peak(
                        (time, peak.index))
                    if pinfo is not None:
                        precursor_info_set.append(pinfo)

                node = DeconvolutedLCMSFeatureTreeNode(time, [dpeak],
                                                       precursor_info_set)
                nodes.append(node)
            if subtract:
                for fpeak, tpeak in zip(cleaned_eid, envelope):
                    # If a theoretical peak uses up more than 70%
                    # of the abundance of a single peak, this peak
                    # should not contribute meaninfully to any other
                    # fits from now on. Set it's abundance to 1.0 as
                    # if it were fully used up.
                    ruin = (fpeak.intensity * 0.7) < tpeak[1]
                    if ruin:
                        fpeak.intensity = 1.0
                    else:
                        fpeak.intensity -= tpeak[1]
                    if fpeak.intensity < 0:
                        fpeak.intensity = 1.0
        for feature in feature_fit.features:
            if feature is None or isinstance(feature, EmptyFeature):
                continue
            feature.invalidate()
        if len(nodes) < self.minimum_size:
            return None

        result_feature = DeconvolutedLCMSFeature(
            nodes,
            feature_fit.charge,
            score=feature_fit.score,
            n_features=len(feature_fit),
            supporters=feature_fit.supporters)

        return result_feature
    def finalize_fit(self, feature_fit, charge_carrier=PROTON, subtract=True,
                     detection_threshold=0.1, max_missed_peaks=1):
        nodes = []
        start_time, end_time = find_bounds(feature_fit, detection_threshold)
        feat_iter = FeatureSetIterator(
            feature_fit.features, start_time, end_time)
        base_tid = feature_fit.theoretical
        charge = feature_fit.charge
        abs_charge = abs(charge)
        for eid in feat_iter:
            cleaned_eid, tid, n_missing = conform_envelopes(eid, base_tid.truncated_tid)
            rep_eid = drop_placeholders(cleaned_eid)
            n_real_peaks = len(rep_eid)
            if n_real_peaks == 0 or (n_real_peaks == 1 and abs_charge > 1) or \
               n_missing > max_missed_peaks:
                continue
            score = self.scorer.evaluate(None, cleaned_eid, tid)
            is_valid = True
            if np.isnan(score) or score < 0:
                is_valid = False
            envelope = [(e.mz, min(e.intensity, t.intensity)) for e, t in zip(cleaned_eid, tid)]
            if is_valid:
                total_abundance = sum(p[1] for p in envelope)
                monoisotopic_mass = neutral_mass(
                    base_tid.monoisotopic_mz, charge, charge_carrier=charge_carrier)
                reference_peak = first_peak(cleaned_eid)

                dpeak = DeconvolutedPeak(
                    neutral_mass=monoisotopic_mass, intensity=total_abundance,
                    charge=charge,
                    signal_to_noise=mean(p.signal_to_noise for p in rep_eid),
                    index=reference_peak.index,
                    full_width_at_half_max=mean(p.full_width_at_half_max for p in rep_eid),
                    a_to_a2_ratio=a_to_a2_ratio(tid),
                    most_abundant_mass=neutral_mass(
                        most_abundant_mz(cleaned_eid), charge, charge_carrier=charge_carrier),
                    average_mass=neutral_mass(
                        average_mz(cleaned_eid), charge, charge_carrier=charge_carrier),
                    score=score,
                    envelope=envelope,
                    mz=base_tid.monoisotopic_mz,
                    area=sum(e.area for e in cleaned_eid))

                time = feat_iter.current_time
                precursor_info_set = []
                for peak in rep_eid:
                    pinfo = self.precursor_map.precursor_for_peak((time, peak.index))
                    if pinfo is not None:
                        precursor_info_set.append(pinfo)

                node = DeconvolutedLCMSFeatureTreeNode(time, [dpeak], precursor_info_set)
                nodes.append(node)
            if subtract:
                for fpeak, tpeak in zip(cleaned_eid, envelope):
                    # If a theoretical peak uses up more than 70%
                    # of the abundance of a single peak, this peak
                    # should not contribute meaninfully to any other
                    # fits from now on. Set it's abundance to 1.0 as
                    # if it were fully used up.
                    ruin = (fpeak.intensity * 0.7) < tpeak[1]
                    if ruin:
                        fpeak.intensity = 1.0
                    else:
                        fpeak.intensity -= tpeak[1]
                    if fpeak.intensity < 0:
                        fpeak.intensity = 1.0
        for feature in feature_fit.features:
            if feature is None or isinstance(feature, EmptyFeature):
                continue
            feature.invalidate()
        if len(nodes) < self.minimum_size:
            return None

        result_feature = DeconvolutedLCMSFeature(
            nodes, feature_fit.charge,
            score=feature_fit.score, n_features=len(feature_fit),
            supporters=feature_fit.supporters)

        return result_feature