Beispiel #1
0
    def setUp(self):

        # 0. id
        # 1. quality score (FDR)
        # 2. retention time (normalized)
        # 3. intensity

        m = Multipeptide()
        m.set_nr_runs(2)

        # Run 1
        r = MockRun("0_1")
        p = precursor.Precursor("precursor_1", r)
        pg_tuple = ("someID_1", 0.1, 100, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), r)
        prgr.addPrecursor(p)
        m.insert("0_1", prgr)

        # Run 2:
        #  - peakgroup 2 : RT = 105 seconds
        #  - peakgroup 3 : RT = 120 seconds
        r = MockRun("0_2")
        p = precursor.Precursor("precursor_1", r)
        pg_tuple = ("peakgroup2", 0.2, 105, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        pg_tuple = ("peakgroup3", 0.18, 130, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), r)
        prgr.addPrecursor(p)
        m.insert("0_2", prgr)

        self.mpep = m
        self.al = algo.AlignmentAlgorithm()
        self.al.verbose = True
    def test_all_above_cutoff(self):
        m = Multipeptide()
        m.set_nr_runs(2)
        self.assertFalse(m.all_above_cutoff(0.4))
        help_insert(m)

        self.assertTrue(m.all_above_cutoff(0.4))
        self.assertFalse(m.all_above_cutoff(0.15))
    def test_find_best_peptide_pg(self):

        m = Multipeptide()
        m.set_nr_runs(2)
        self.assertIsNone(m.find_best_peptide_pg())
        help_insert(m)

        self.assertAlmostEqual(m.find_best_peptide_pg().get_fdr_score(), 0.1)
    def test_all_above_cutoff(self):
        m = Multipeptide()
        m.set_nr_runs(2)
        self.assertFalse(m.all_above_cutoff(0.4))
        help_insert(m)

        self.assertTrue(m.all_above_cutoff(0.4))
        self.assertFalse(m.all_above_cutoff(0.15))
    def test_find_best_peptide_pg(self):

        m = Multipeptide()
        m.set_nr_runs(2)
        self.assertIsNone(m.find_best_peptide_pg())
        help_insert(m)

        self.assertAlmostEqual(m.find_best_peptide_pg().get_fdr_score(), 0.1)
 def test_more_than_fraction_selected(self):
     m = Multipeptide()
     self.assertIsNone(m.get_id())
     m.insert("42_0", self.mockPrecursorGroup)
     m.set_nr_runs(1)
     self.assertTrue(m.more_than_fraction_selected(0.1))
     self.assertTrue(m.more_than_fraction_selected(0.6))
     m.set_nr_runs(2)
     self.assertTrue(m.more_than_fraction_selected(0.1))
     self.assertFalse(m.more_than_fraction_selected(0.6))
    def test_all_selected(self):
        m = Multipeptide()
        m.set_nr_runs(2)
        self.assertIsNone(m.find_best_peptide_pg())

        help_insert(m)
        self.assertTrue(m.all_selected())

        m.set_nr_runs(3)
        self.assertFalse(m.all_selected())
 def test_more_than_fraction_selected(self):
     m = Multipeptide()
     self.assertIsNone(m.get_id())
     m.insert("42_0", self.mockPrecursorGroup)
     m.set_nr_runs(1)
     self.assertTrue(m.more_than_fraction_selected(0.1))
     self.assertTrue(m.more_than_fraction_selected(0.6))
     m.set_nr_runs(2)
     self.assertTrue(m.more_than_fraction_selected(0.1))
     self.assertFalse(m.more_than_fraction_selected(0.6))
    def test_all_selected(self):
        m = Multipeptide()
        m.set_nr_runs(2)
        self.assertIsNone(m.find_best_peptide_pg())

        help_insert(m)
        self.assertTrue(m.all_selected())

        m.set_nr_runs(3)
        self.assertFalse(m.all_selected())
 def testNrRuns(self):
     m = Multipeptide()
     m.set_nr_runs(42)
     self.assertEqual(m.get_nr_runs(), 42)
Beispiel #11
0
    def setUp(self):

        import msproteomicstoolslib.data_structures.Precursor as precursor
        import msproteomicstoolslib.data_structures.PrecursorGroup as precursor_group
        import msproteomicstoolslib.format.TransformationCollection as transformations
        from msproteomicstoolslib.algorithms.alignment.SplineAligner import SplineAligner
        import msproteomicstoolslib.algorithms.alignment.AlignmentHelper as helper

        # 0. id
        # 1. quality score (FDR)
        # 2. retention time (normalized)
        # 3. intensity

        mpeps = [Multipeptide() for i in range(3)]
        [m.set_nr_runs(5) for m in mpeps]

        # Parameters
        self.initial_alignment_cutoff = 0.001

        runs = [MockRun("0_%s" % (i + 1)) for i in range(5)]
        ids = 0
        for i in range(5):

            # Two alignment peptides
            p = precursor.Precursor("anchorpeptide_1", runs[i])
            pg_tuple = ("id_%s" % ids, 0.0001, 100 + i * 10, 10000)
            p.add_peakgroup_tpl(pg_tuple, "anchorpeptide_1", -1)
            prgr = precursor_group.PrecursorGroup(p.get_id(), runs[i])
            prgr.addPrecursor(p)
            mpeps[0].insert(runs[i].get_id(), prgr)
            ids += 1

            p = precursor.Precursor("anchorpeptide_2", runs[i])
            pg_tuple = ("id_%s" % ids, 0.0001, 1000 + i * 100, 10000)
            p.add_peakgroup_tpl(pg_tuple, "anchorpeptide_2", -1)
            prgr = precursor_group.PrecursorGroup(p.get_id(), runs[i])
            prgr.addPrecursor(p)
            mpeps[1].insert(runs[i].get_id(), prgr)
            ids += 1

            # The noise peptide
            p = precursor.Precursor("anchorpeptide_3", runs[i])
            pg_tuple = ("id_%s" % ids, 0.0001, 500 + i * 40, 10000)
            p.add_peakgroup_tpl(pg_tuple, "anchorpeptide_3", -1)
            prgr = precursor_group.PrecursorGroup(p.get_id(), runs[i])
            prgr.addPrecursor(p)
            mpeps[2].insert(runs[i].get_id(), prgr)
            ids += 1

        m = Multipeptide()
        m.set_nr_runs(5)

        # Run 1
        #  - peakgroup 1 : RT = 110 seconds [correct]
        p = precursor.Precursor("precursor_1", runs[0])
        pg_tuple = ("peakgroup1", 0.01, 100, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[0])
        prgr.addPrecursor(p)
        m.insert(runs[0].get_id(), prgr)

        # Run 2:
        #  - peakgroup 2 : RT = 115 seconds [correct]
        #  - peakgroup 3 : RT = 130 seconds
        p = precursor.Precursor("precursor_1", runs[1])
        pg_tuple = ("peakgroup2", 0.2, 112, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        pg_tuple = ("peakgroup3", 0.18, 130, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[1])
        prgr.addPrecursor(p)
        m.insert(runs[1].get_id(), prgr)

        # Run 3:
        #  - peakgroup 4 : RT = 120 seconds [correct]
        #  - peakgroup 5 : RT = 130 seconds
        p = precursor.Precursor("precursor_1", runs[2])
        pg_tuple = ("peakgroup4", 0.2, 120, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        pg_tuple = ("peakgroup5", 0.17, 130, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[2])
        prgr.addPrecursor(p)
        m.insert(runs[2].get_id(), prgr)

        # Run 4:
        #  - peakgroup 6 : missing          [correct]
        #  - peakgroup 7 : RT = 145 seconds
        p = precursor.Precursor("precursor_1", runs[3])
        pg_tuple = ("peakgroup7", 0.18, 145, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[3])
        prgr.addPrecursor(p)
        m.insert(runs[3].get_id(), prgr)

        # Run 5:
        #  - peakgroup 8 : RT = 140 seconds [correct]
        #  - peakgroup 9 : missing
        p = precursor.Precursor("precursor_1", runs[4])
        pg_tuple = ("peakgroup8", 0.1, 139, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[4])
        prgr.addPrecursor(p)
        m.insert(runs[4].get_id(), prgr)

        self.mpep = m
        self.exp = Dummy()
        self.exp.runs = runs

        mpeps.append(m)
        self.multipeptides = mpeps

        # Align all against all
        self.tr_data = transformations.LightTransformationData()
        spl_aligner = SplineAligner(self.initial_alignment_cutoff)
        for run_0 in self.exp.runs:
            for run_1 in self.exp.runs:
                helper.addDataToTrafo(self.tr_data, run_0, run_1, spl_aligner,
                                      self.multipeptides, "linear", 30)
    def setUp(self):

        import msproteomicstoolslib.data_structures.Precursor as precursor
        import msproteomicstoolslib.data_structures.PrecursorGroup as precursor_group
        import msproteomicstoolslib.format.TransformationCollection as transformations
        from msproteomicstoolslib.algorithms.alignment.SplineAligner import SplineAligner
        import msproteomicstoolslib.algorithms.alignment.AlignmentHelper as helper

        # 0. id
        # 1. quality score (FDR)
        # 2. retention time (normalized)
        # 3. intensity

        mpeps = [Multipeptide() for i in range(3)]
        [m.set_nr_runs(5) for m in mpeps]

        # Parameters
        self.initial_alignment_cutoff = 0.001

        runs = [MockRun("0_%s" % (i+1)) for i in range(5)]
        ids = 0
        for i in range(5):

            # Two alignment peptides
            p = precursor.Precursor("anchorpeptide_1", runs[i] )
            pg_tuple = ("id_%s" % ids, 0.0001, 100 + i*10, 10000)
            p.add_peakgroup_tpl(pg_tuple, "anchorpeptide_1", -1)
            prgr = precursor_group.PrecursorGroup(p.get_id(), runs[i])
            prgr.addPrecursor(p)
            mpeps[0].insert(runs[i].get_id(), prgr)
            ids += 1

            p = precursor.Precursor("anchorpeptide_2", runs[i] )
            pg_tuple = ("id_%s" % ids, 0.0001, 1000 + i*100, 10000)
            p.add_peakgroup_tpl(pg_tuple, "anchorpeptide_2", -1)
            prgr = precursor_group.PrecursorGroup(p.get_id(), runs[i])
            prgr.addPrecursor(p)
            mpeps[1].insert(runs[i].get_id(), prgr)
            ids += 1

            # The noise peptide
            p = precursor.Precursor("anchorpeptide_3", runs[i] )
            pg_tuple = ("id_%s" % ids, 0.0001, 500 + i*40, 10000)
            p.add_peakgroup_tpl(pg_tuple, "anchorpeptide_3", -1)
            prgr = precursor_group.PrecursorGroup(p.get_id(), runs[i])
            prgr.addPrecursor(p)
            mpeps[2].insert(runs[i].get_id(), prgr)
            ids += 1

        m = Multipeptide()
        m.set_nr_runs(5)

        # Run 1
        #  - peakgroup 1 : RT = 110 seconds [correct]
        p = precursor.Precursor("precursor_1", runs[0])
        pg_tuple = ("peakgroup1", 0.01, 100, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[0])
        prgr.addPrecursor(p)
        m.insert(runs[0].get_id(), prgr)

        # Run 2:
        #  - peakgroup 2 : RT = 115 seconds [correct]
        #  - peakgroup 3 : RT = 130 seconds
        p = precursor.Precursor("precursor_1", runs[1])
        pg_tuple = ("peakgroup2", 0.2, 112, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        pg_tuple = ("peakgroup3", 0.18, 130, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[1])
        prgr.addPrecursor(p)
        m.insert(runs[1].get_id(), prgr)

        # Run 3:
        #  - peakgroup 4 : RT = 120 seconds [correct]
        #  - peakgroup 5 : RT = 130 seconds
        p = precursor.Precursor("precursor_1", runs[2])
        pg_tuple = ("peakgroup4", 0.2, 120, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        pg_tuple = ("peakgroup5", 0.17, 130, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[2])
        prgr.addPrecursor(p)
        m.insert(runs[2].get_id(), prgr)

        # Run 4:
        #  - peakgroup 6 : missing          [correct]
        #  - peakgroup 7 : RT = 145 seconds
        p = precursor.Precursor("precursor_1", runs[3])
        pg_tuple = ("peakgroup7", 0.18, 145, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[3])
        prgr.addPrecursor(p)
        m.insert(runs[3].get_id(), prgr)

        # Run 5:
        #  - peakgroup 8 : RT = 140 seconds [correct]
        #  - peakgroup 9 : missing
        p = precursor.Precursor("precursor_1", runs[4])
        pg_tuple = ("peakgroup8", 0.1, 139, 10000)
        p.add_peakgroup_tpl(pg_tuple, "precursor_1", -1)
        prgr = precursor_group.PrecursorGroup(p.get_id(), runs[4])
        prgr.addPrecursor(p)
        m.insert(runs[4].get_id(), prgr)

        self.mpep = m
        self.exp = Dummy()
        self.exp.runs = runs

        mpeps.append(m)
        self.multipeptides = mpeps

        # Align all against all
        self.tr_data = transformations.LightTransformationData()
        spl_aligner = SplineAligner(self.initial_alignment_cutoff)
        for run_0 in self.exp.runs:
            for run_1 in self.exp.runs:
                helper.addDataToTrafo(self.tr_data, run_0, run_1, spl_aligner, self.multipeptides, "linear", 30)
    def get_all_multipeptides(self, fdr_cutoff, verbose=False, verbosity=0):
        """Match all precursors in different runs to each other.

        Find all precursors that are above the fdr cutoff in each run and build
        a union of those precursors. Then search for each of those precursors
        in all the other runs and build a multipeptide / multiprecursor.

        Parameters
        ----------
        fdr_cutoff : float
            A cutoff in fdr (between 0 and 1) to use for the alignment. Each
            generated Multipeptide needs to have at least one member who is below
            the cutoff.
        verbose : bool
            Whether to be verbose or not
        verbosity : int
            How verbose to be
        """
        union_transition_groups = []
        union_proteins = []
        union_target_transition_groups = []
        for i,r in enumerate(self.runs):
            if verbose or verbosity >= 10: 
                stdout.write("\rParsing run %s out of %s" % (i+1, len(self.runs) ))
                stdout.flush()
            union_target_transition_groups.append( [peak.peptide.precursor_group.getPeptideGroupLabel() for peak in r.get_best_peaks_with_cutoff(fdr_cutoff) if not peak.peptide.get_decoy()] )
            union_transition_groups.append( [peak.peptide.precursor_group.getPeptideGroupLabel() for peak in r.get_best_peaks_with_cutoff(fdr_cutoff)] )
            union_proteins.append( list(set([peak.peptide.protein_name for peak in r.get_best_peaks_with_cutoff(fdr_cutoff) if not peak.peptide.get_decoy()])) )
        if verbose or verbosity >= 10: stdout.write("\r\r\n") # clean up

        union_target_transition_groups_set = set(union_target_transition_groups[0])
        self.union_transition_groups_set = set(union_transition_groups[0])
        self.union_proteins_set = set(union_proteins[0])
        for groups in union_transition_groups:
          self.union_transition_groups_set = self.union_transition_groups_set.union( groups )
        for groups in union_target_transition_groups:
          union_target_transition_groups_set = union_target_transition_groups_set.union( groups )
        for proteins in union_proteins:
          self.union_proteins_set = self.union_proteins_set.union( proteins )

        all_prec = sum([len(s) for s in union_transition_groups])
        target_prec = sum([len(s) for s in union_target_transition_groups])

        if verbose or verbosity >= 1:
            print "==================================="
            print "Finished parsing, number of precursors and peptides per run"
            print "All precursors", [len(s) for s in union_transition_groups], "(union of all runs %s)" % len(self.union_transition_groups_set)
            print "All target precursors", [len(s) for s in union_target_transition_groups], "(union of all runs %s)" % len(union_target_transition_groups_set)
            print "All target proteins", [len(s) for s in union_proteins], "(union of all runs %s)" % len(self.union_proteins_set)
            if all_prec > 0:
                print "Decoy percentage on precursor level %0.4f%%" % ( (all_prec - target_prec) * 100.0 / all_prec )

        self.initial_fdr_cutoff = fdr_cutoff
        if all_prec > 0 and all_prec - target_prec != 0:
            self.estimated_decoy_pcnt =  (all_prec - target_prec) * 100.0 / all_prec 
        else:
            self.estimated_decoy_pcnt = None

        multipeptides = []
        for peptide_id in self.union_transition_groups_set:
            m = Multipeptide()
            for r in self.runs:
                precursor_group = r.getPrecursorGroup(peptide_id)
                m.insert(r.get_id(), precursor_group)
            m.set_nr_runs(len(self.runs))
            multipeptides.append(m)
        return multipeptides
Beispiel #14
0
    def get_all_multipeptides(self, fdr_cutoff, verbose=False, verbosity=0):
        """Match all precursors in different runs to each other.

        Find all precursors that are above the fdr cutoff in each run and build
        a union of those precursors. Then search for each of those precursors
        in all the other runs and build a multipeptide / multiprecursor.

        Parameters
        ----------
        fdr_cutoff : float
            A cutoff in fdr (between 0 and 1) to use for the alignment. Each
            generated Multipeptide needs to have at least one member who is below
            the cutoff.
        verbose : bool
            Whether to be verbose or not
        verbosity : int
            How verbose to be
        """

        # Identify across all runs which transition groups are above the cutoff
        union_transition_groups = []
        union_target_transition_groups = []
        union_proteins = []

        self.union_transition_groups_set = set([])
        self.union_proteins_set = set([])
        self.union_target_transition_groups_set = set()
        for i, r in enumerate(self.runs):
            gr = []
            gr_target = []
            gr_protein = []
            for precursor_group in r:
                for peptide_precursor in precursor_group:
                    if (peptide_precursor.get_best_peakgroup().get_fdr_score()
                            < fdr_cutoff):
                        gr.append(precursor_group.getPeptideGroupLabel())
                        if not precursor_group.get_decoy():
                            gr_target.append(
                                precursor_group.getPeptideGroupLabel())
                            gr_protein.append(
                                peptide_precursor.getProteinName())
            union_transition_groups.append(gr)
            union_target_transition_groups.append(gr_target)
            union_proteins.append(list(set(gr_protein)))

            self.union_target_transition_groups_set = self.union_target_transition_groups_set.union(
                gr_target)
            self.union_transition_groups_set = self.union_transition_groups_set.union(
                gr)
            self.union_proteins_set = self.union_proteins_set.union(gr_protein)

        if verbose or verbosity >= 10:
            stdout.write("\r\r\n")  # clean up

        all_prec = sum([len(s) for s in union_transition_groups])
        target_prec = sum([len(s) for s in union_target_transition_groups])

        if verbose or verbosity >= 1:
            print("===================================")
            print(
                "Finished parsing, number of precursors and peptides per run")
            print(
                "All precursors", [len(s) for s in union_transition_groups],
                "(union of all runs %s)" %
                len(self.union_transition_groups_set))
            print(
                "All target precursors",
                [len(s) for s in union_target_transition_groups],
                "(union of all runs %s)" %
                len(self.union_target_transition_groups_set))
            print("All target proteins", [len(s) for s in union_proteins],
                  "(union of all runs %s)" % len(self.union_proteins_set))
            if all_prec > 0:
                print("Decoy percentage on precursor level %0.4f%%" %
                      ((all_prec - target_prec) * 100.0 / all_prec))

        self.initial_fdr_cutoff = fdr_cutoff
        if all_prec > 0 and all_prec - target_prec != 0:
            self.estimated_decoy_pcnt = (all_prec -
                                         target_prec) * 100.0 / all_prec
        else:
            self.estimated_decoy_pcnt = None

        multipeptides = []
        for peptide_id in self.union_transition_groups_set:
            m = Multipeptide()
            for r in self.runs:
                precursor_group = r.getPrecursorGroup(peptide_id)
                m.insert(r.get_id(), precursor_group)
            m.set_nr_runs(len(self.runs))
            multipeptides.append(m)

        # Return sorted multipeptides for consistency across all Python versions
        return (sorted(multipeptides, key=lambda x: str(x)))
 def testNrRuns(self):
     m = Multipeptide()
     m.set_nr_runs(42)
     self.assertEqual(m.get_nr_runs(), 42)
Beispiel #16
0
    def get_all_multipeptides(self, fdr_cutoff, verbose=False, verbosity=0):
        """Match all precursors in different runs to each other.

        Find all precursors that are above the fdr cutoff in each run and build
        a union of those precursors. Then search for each of those precursors
        in all the other runs and build a multipeptide / multiprecursor.

        Parameters
        ----------
        fdr_cutoff : float
            A cutoff in fdr (between 0 and 1) to use for the alignment. Each
            generated Multipeptide needs to have at least one member who is below
            the cutoff.
        verbose : bool
            Whether to be verbose or not
        verbosity : int
            How verbose to be
        """
        union_transition_groups = []
        union_proteins = []
        union_target_transition_groups = []
        for i, r in enumerate(self.runs):
            if verbose or verbosity >= 10:
                stdout.write("\rParsing run %s out of %s" %
                             (i + 1, len(self.runs)))
                stdout.flush()
            union_target_transition_groups.append([
                peak.peptide.precursor_group.getPeptideGroupLabel()
                for peak in r.get_best_peaks_with_cutoff(fdr_cutoff)
                if not peak.peptide.get_decoy()
            ])
            union_transition_groups.append([
                peak.peptide.precursor_group.getPeptideGroupLabel()
                for peak in r.get_best_peaks_with_cutoff(fdr_cutoff)
            ])
            union_proteins.append(
                list(
                    set([
                        peak.peptide.protein_name
                        for peak in r.get_best_peaks_with_cutoff(fdr_cutoff)
                        if not peak.peptide.get_decoy()
                    ])))
        if verbose or verbosity >= 10: stdout.write("\r\r\n")  # clean up

        union_target_transition_groups_set = set(
            union_target_transition_groups[0])
        self.union_transition_groups_set = set(union_transition_groups[0])
        self.union_proteins_set = set(union_proteins[0])
        for groups in union_transition_groups:
            self.union_transition_groups_set = self.union_transition_groups_set.union(
                groups)
        for groups in union_target_transition_groups:
            union_target_transition_groups_set = union_target_transition_groups_set.union(
                groups)
        for proteins in union_proteins:
            self.union_proteins_set = self.union_proteins_set.union(proteins)

        all_prec = sum([len(s) for s in union_transition_groups])
        target_prec = sum([len(s) for s in union_target_transition_groups])

        if verbose or verbosity >= 1:
            print "==================================="
            print "Finished parsing, number of precursors and peptides per run"
            print "All precursors", [
                len(s) for s in union_transition_groups
            ], "(union of all runs %s)" % len(self.union_transition_groups_set)
            print "All target precursors", [
                len(s) for s in union_target_transition_groups
            ], "(union of all runs %s)" % len(
                union_target_transition_groups_set)
            print "All target proteins", [
                len(s) for s in union_proteins
            ], "(union of all runs %s)" % len(self.union_proteins_set)
            if all_prec > 0:
                print "Decoy percentage on precursor level %0.4f%%" % (
                    (all_prec - target_prec) * 100.0 / all_prec)

        self.initial_fdr_cutoff = fdr_cutoff
        if all_prec > 0 and all_prec - target_prec != 0:
            self.estimated_decoy_pcnt = (all_prec -
                                         target_prec) * 100.0 / all_prec
        else:
            self.estimated_decoy_pcnt = None

        multipeptides = []
        for peptide_id in self.union_transition_groups_set:
            m = Multipeptide()
            for r in self.runs:
                precursor_group = r.getPrecursorGroup(peptide_id)
                m.insert(r.get_id(), precursor_group)
            m.set_nr_runs(len(self.runs))
            multipeptides.append(m)
        return multipeptides
Beispiel #17
0
    def get_all_multipeptides(self, fdr_cutoff, verbose=False, verbosity=0):
        """Match all precursors in different runs to each other.

        Find all precursors that are above the fdr cutoff in each run and build
        a union of those precursors. Then search for each of those precursors
        in all the other runs and build a multipeptide / multiprecursor.

        Parameters
        ----------
        fdr_cutoff : float
            A cutoff in fdr (between 0 and 1) to use for the alignment. Each
            generated Multipeptide needs to have at least one member who is below
            the cutoff.
        verbose : bool
            Whether to be verbose or not
        verbosity : int
            How verbose to be
        """

        # Identify across all runs which transition groups are above the cutoff
        union_transition_groups = []
        union_target_transition_groups = []
        union_proteins = []

        self.union_transition_groups_set = set([])
        self.union_proteins_set = set([])
        self.union_target_transition_groups_set = set()
        for i,r in enumerate(self.runs):
            gr = []
            gr_target = []
            gr_protein = []
            for precursor_group in r:
                for peptide_precursor in precursor_group:
                    if (peptide_precursor.get_best_peakgroup().get_fdr_score() < fdr_cutoff):
                        gr.append( precursor_group.getPeptideGroupLabel() )
                        if not precursor_group.get_decoy():
                            gr_target.append(precursor_group.getPeptideGroupLabel())
                            gr_protein.append(peptide_precursor.getProteinName())
            union_transition_groups.append(gr)
            union_target_transition_groups.append(gr_target)
            union_proteins.append(list(set(gr_protein)))

            self.union_target_transition_groups_set = self.union_target_transition_groups_set.union(gr_target)
            self.union_transition_groups_set = self.union_transition_groups_set.union(gr)
            self.union_proteins_set = self.union_proteins_set.union(gr_protein)

        if verbose or verbosity >= 10: 
            stdout.write("\r\r\n") # clean up

        all_prec = sum([len(s) for s in union_transition_groups])
        target_prec = sum([len(s) for s in union_target_transition_groups])

        if verbose or verbosity >= 1:
            print("===================================")
            print("Finished parsing, number of precursors and peptides per run")
            print("All precursors", [len(s) for s in union_transition_groups], "(union of all runs %s)" % len(self.union_transition_groups_set))
            print("All target precursors", [len(s) for s in union_target_transition_groups], "(union of all runs %s)" % len(self.union_target_transition_groups_set))
            print("All target proteins", [len(s) for s in union_proteins], "(union of all runs %s)" % len(self.union_proteins_set))
            if all_prec > 0:
                print("Decoy percentage on precursor level %0.4f%%" % ( (all_prec - target_prec) * 100.0 / all_prec ))

        self.initial_fdr_cutoff = fdr_cutoff
        if all_prec > 0 and all_prec - target_prec != 0:
            self.estimated_decoy_pcnt =  (all_prec - target_prec) * 100.0 / all_prec 
        else:
            self.estimated_decoy_pcnt = None

        multipeptides = []
        for peptide_id in self.union_transition_groups_set:
            m = Multipeptide()
            for r in self.runs:
                precursor_group = r.getPrecursorGroup(peptide_id)
                m.insert(r.get_id(), precursor_group)
            m.set_nr_runs(len(self.runs))
            multipeptides.append(m)

        # Return sorted multipeptides for consistency across all Python versions
        return(sorted(multipeptides, key=lambda x: str(x)))