Example #1
0
 def test_penalized_msdeconv(self):
     scorer = PenalizedMSDeconVFitter(20, 2.0)
     # score = scorer.evaluate(None, experimental, theoretical)
     scores = [scorer.evaluate(None, experimental, theoretical) for i in range(10)]
     score = scores[0]
     print(scores)
     assert all([np.isclose(s, score) for s in scores[1:]]), scores
     self.assertAlmostEqual(score, 293.47483621051316, 3)
     score = scorer(None, experimental, theoretical)
     self.assertAlmostEqual(score, 293.47483621051316, 3)
Example #2
0
 def test_penalized_msdeconv(self):
     scorer = PenalizedMSDeconVFitter(20, 2.0)
     # score = scorer.evaluate(None, experimental, theoretical)
     scores = [
         scorer.evaluate(None, experimental, theoretical) for i in range(10)
     ]
     score = scores[0]
     assert all([np.isclose(s, score) for s in scores[1:]]), scores
     self.assertAlmostEqual(score, 293.47483621051316, 3)
     score = scorer(None, experimental, theoretical)
     self.assertAlmostEqual(score, 293.47483621051316, 3)
Example #3
0
 def test_graph_deconvolution(self):
     scan = self.make_scan()
     scan.pick_peaks()
     self.assertIsNotNone(scan.peak_set)
     algorithm_type = CompositionListPeakDependenceGraphDeconvoluter
     decon_config = {
         "composition_list": self.compositions,
         "scorer": PenalizedMSDeconVFitter(5., 2.),
         "use_subtraction": True
     }
     deconresult = deconvolute_peaks(scan.peak_set,
                                     decon_config,
                                     charge_range=(-1, -8),
                                     deconvoluter_type=algorithm_type)
     dpeaks = deconresult.peak_set
     n_cases = sum(map(len, self.charges))
     # assert len(dpeaks) == n_cases
     if not (len(dpeaks) == n_cases):
         tids, ions = self.make_tids()
         tids, ions = zip(
             *sorted(zip(tids, ions), key=lambda x: x[0].monoisotopic_mz))
         seen = set()
         for i, dp in enumerate(sorted(dpeaks, key=lambda x: x.mz)):
             ix = self.get_nearest_index(dp.mz, tids)
             logger.warning("%0.3f %d %0.3f %r (Matched %d)",
                            dp.neutral_mass, dp.charge, dp.score,
                            dp.solution, ix)
             seen.add(ix)
         indices = set(range(len(ions)))
         missed = list(indices - seen)
         deconvoluter = algorithm_type(scan.peak_set.clone(),
                                       **decon_config)
         for ix in missed:
             tid = deconvoluter.generate_theoretical_isotopic_cluster(
                 *ions[ix])
             assert np.isclose(sum(p.intensity for p in tid), 1.0)
             monoisotopic_peak = deconvoluter.peaklist.has_peak(
                 tid[0].mz, 2e-5)
             if monoisotopic_peak is not None:
                 tid = deconvoluter.recalibrate_theoretical_mz(
                     tid, monoisotopic_peak.mz)
             eid = deconvoluter.match_theoretical_isotopic_distribution(
                 tid.peaklist, 2e-5)
             missed_peaks = count_placeholders(eid)
             deconvoluter.scale_theoretical_distribution(tid, eid)
             score = deconvoluter.scorer.evaluate(deconvoluter.peaklist,
                                                  eid, tid.peaklist)
             fit_record = deconvoluter.fit_composition_at_charge(*ions[ix])
             eid = fit_record.experimental
             tid = fit_record.theoretical
             rep_eid = drop_placeholders(eid)
             validation = (len(rep_eid) < 2), (
                 len(rep_eid) < len(tid) / 2.), (len(rep_eid) == 1
                                                 and fit_record.charge > 1)
             composition, charge = ions[ix]
             logger.warning(
                 "Missed %r %d (%d missed peaks, score = %0.3f, record = %r, validation = %r)"
                 % (composition, charge, missed_peaks, score, fit_record,
                    validation))
         assert not missed
Example #4
0
 def test_retrieve_deconvolution_solution(self):
     bunch = self.make_scan()
     scan = bunch.precursor
     scan.pick_peaks()
     ms1_deconvolution_args = {
         "averagine": glycopeptide,
         "scorer": PenalizedMSDeconVFitter(20., 2.),
     }
     priorities = []
     for product in bunch.products:
         priorities.append(scan.has_peak(product.precursor_information.mz))
     algorithm_type = AveraginePeakDependenceGraphDeconvoluter
     deconresult = deconvolute_peaks(scan.peak_set,
                                     ms1_deconvolution_args,
                                     priority_list=priorities,
                                     deconvoluter_type=algorithm_type)
     dpeaks = deconresult.peak_set
     deconvoluter = deconresult.deconvoluter
     priority_results = deconresult.priorities
     reference_deconvoluter = algorithm_type(scan.peak_set.clone(),
                                             **ms1_deconvolution_args)
     for i, result in enumerate(priority_results):
         query = priorities[i].mz
         if result is None:
             logger.warn("Query %d (%f) had no result", i, query)
             raw_peaks = scan.peak_set.between(query - 2, query + 3)
             anchor_peak = scan.peak_set.has_peak(query)
             deconvoluted_peaks = dpeaks.between(query - 2,
                                                 query + 3,
                                                 use_mz=True)
             possible_solutions = reference_deconvoluter._fit_all_charge_states(
                 anchor_peak)
             sols = []
             logger.warn("Possible Solutions %r", possible_solutions)
             if not possible_solutions:
                 for charge in [3, 4, 5]:
                     tid = reference_deconvoluter.averagine.isotopic_cluster(
                         anchor_peak.mz, charge)
                     assert np.isclose(tid.monoisotopic_mz, anchor_peak.mz)
                     assert np.isclose(sum(p.intensity for p in tid), 1.0)
                     eid = reference_deconvoluter.match_theoretical_isotopic_distribution(
                         tid.peaklist, error_tolerance=2e-5)
                     assert len(eid) == len(tid)
                     record = reference_deconvoluter._evaluate_theoretical_distribution(
                         eid, tid, anchor_peak, charge)
                     sols.append(record)
                 logger.warn("Manually Generated Solutions %r", sols)
             assert anchor_peak is not None and raw_peaks and (
                 possible_solutions or sols) and not deconvoluted_peaks
             assert deconvoluter.peak_dependency_network.find_solution_for(
                 anchor_peak) is not None
             assert dpeaks.has_peak(query, use_mz=True)
             # error out
             assert result is not None
         else:
             assert 0 <= abs(result.mz - query) < 1
             anchor_peak = scan.peak_set.has_peak(query)
             assert deconvoluter.peak_dependency_network.find_solution_for(
                 anchor_peak) is not None
 def test_processor(self):
     proc = processor.ScanProcessor(self.mzml_path,
                                    ms1_deconvolution_args={
                                        "averagine":
                                        glycopeptide,
                                        "scorer":
                                        PenalizedMSDeconVFitter(5., 2.)
                                    })
     for scan_bunch in iter(proc):
         self.assertIsNotNone(scan_bunch)
         self.assertIsNotNone(scan_bunch.precursor)
         self.assertIsNotNone(scan_bunch.products)
 def test_deconvolution(self):
     scan = self.make_scan()
     algorithm_type = AveragineDeconvoluter
     deconresult = deconvolute_peaks(
         scan.peak_set, {
             "averagine": peptide,
             "scorer": PenalizedMSDeconVFitter(5., 1.),
             "use_subtraction": False,
         }, left_search_limit=3, deconvoluter_type=algorithm_type)
     dpeaks = deconresult.peak_set
     assert len(dpeaks) == 6
     for point in points:
         peak = dpeaks.has_peak(neutral_mass(point[0], point[1]))
         self.assertIsNotNone(peak)
 def test_missing_charge_processing(self):
     proc = processor.ScanProcessor(self.missing_charge_mzml,
                                    ms1_deconvolution_args={
                                        "averagine":
                                        glycopeptide,
                                        "scorer":
                                        PenalizedMSDeconVFitter(5., 2.)
                                    })
     for scan_bunch in iter(proc):
         self.assertIsNotNone(scan_bunch)
         self.assertIsNotNone(scan_bunch.precursor)
         self.assertIsNotNone(scan_bunch.products)
         for product in scan_bunch.products:
             if product.precursor_information.defaulted:
                 candidates = scan_bunch.precursor.peak_set.between(
                     product.precursor_information.mz - 1,
                     product.precursor_information.mz + 1)
                 assert len(candidates) == 0
    def test_complex_processor(self):
        proc = processor.ScanProcessor(self.complex_compressed_mzml,
                                       ms1_deconvolution_args={
                                           "averagine":
                                           glycopeptide,
                                           "scorer":
                                           PenalizedMSDeconVFitter(20., 2.),
                                           "truncate_after":
                                           0.95
                                       },
                                       msn_deconvolution_args={
                                           "averagine": peptide,
                                           "scorer": MSDeconVFitter(10.),
                                           "truncate_after": 0.8
                                       })
        bunch = next(proc)
        assert len(bunch.products) == 5
        for product in bunch.products:
            assert not product.precursor_information.defaulted
        recalculated_precursors = {
            'scanId=1740086': 4640.00074242012,
            'scanId=1740149': 4786.05878475792,
            'scanId=1740226': 4640.007868154431,
            'scanId=1740344': 4348.90894554512,
            'scanId=1740492': 5005.1329902247435
        }
        for product in bunch.products:
            mass = product.precursor_information.extracted_neutral_mass
            self.assertAlmostEqual(mass, recalculated_precursors[product.id],
                                   2)

        proc.start_from_scan("scanId=1760847")
        bunch = next(proc)
        recalculated_precursors = {
            'scanId=1761168': 4640.01972225792,
            'scanId=1761235': 4640.019285920238,
            'scanId=1761325': 4786.07251976387,
            'scanId=1761523': 4696.016295197582,
            'scanId=1761804': 986.58798612896
        }
        for product in bunch.products:
            mass = product.precursor_information.extracted_neutral_mass
            self.assertAlmostEqual(mass, recalculated_precursors[product.id],
                                   2)
 def test_graph_deconvolution(self):
     scan = self.make_scan()
     scan.pick_peaks()
     self.assertIsNotNone(scan.peak_set)
     algorithm_type = AveraginePeakDependenceGraphDeconvoluter
     deconresult = deconvolute_peaks(
         scan.peak_set, {
             "averagine": peptide,
             "scorer": PenalizedMSDeconVFitter(5., 1.)
         }, deconvoluter_type=algorithm_type)
     dpeaks = deconresult.peak_set
     assert len(dpeaks) == 2
     deconvoluter = deconresult.deconvoluter
     for point in points:
         peak = dpeaks.has_peak(neutral_mass(point[0], point[1]))
         self.assertIsNotNone(peak)
         fp = scan.has_peak(peak.mz)
         self.assertAlmostEqual(
             deconvoluter.peak_dependency_network.find_solution_for(fp).mz,
             peak.mz, 3)