Esempio n. 1
0
    def _read_peakgroup_files(self, aligned_pg_files, swathfiles):
        """
        The peakgroup files have to have the following columns:
            - FullPeptideName
            - Charge
            - leftWidth
            - rightWidth
            - m_score
            - Intensity
            - align_runid
            - transition_group_id
        """

        # Read in the peakgroup files, parse them and map across runs
        reader = SWATHScoringReader.newReader(aligned_pg_files,
                                              "openswath",
                                              readmethod="gui",
                                              errorHandling="loose")
        new_exp = Experiment()
        new_exp.runs = reader.parse_files(REALIGN_RUNS)
        multipeptides = new_exp.get_all_multipeptides(FDR_CUTOFF,
                                                      verbose=False)

        # Build map of the PeptideName/Charge to the individual multipeptide
        peakgroup_map = {}
        for m in multipeptides:
            pg = m.find_best_peptide_pg()
            identifier = pg.get_value("FullPeptideName") + "/" + pg.get_value(
                "Charge")
            peakgroup_map[identifier] = m

        for swathrun in swathfiles.getSwathFiles():
            if ONLY_SHOW_QUANTIFIED:
                intersection = set(
                    swathrun.get_all_precursor_ids()).intersection(
                        peakgroup_map.keys())
                todelete = set(
                    swathrun.get_all_precursor_ids()).difference(intersection)
                if len(intersection) == 0:
                    print "Could not find any intersection between identifiers in your transition file and the provided chromatograms"
                    print len(intersection)
                swathrun.remove_precursors(todelete)

            # for each precursor in this run, identify the best peakgroup and store the value
            for precursor_id in swathrun.get_all_precursor_ids():
                if not peakgroup_map.has_key(precursor_id):
                    continue

                m = peakgroup_map[precursor_id]
                if m.hasPrecursorGroup(swathrun.runid):
                    for pg in m.getPrecursorGroup(
                            swathrun.runid).getAllPeakgroups():
                        l, r = [
                            float(pg.get_value("leftWidth")),
                            float(pg.get_value("rightWidth"))
                        ]
                        fdrscore = float(pg.get_value("m_score"))
                        intensity = float(pg.get_value("Intensity"))
                        swathrun.add_peakgroup_data(precursor_id, l, r,
                                                    fdrscore, intensity)
    def test_parse_files(self):

        filename = os.path.join(self.datadir_gui, "dataset3.csv")
        filename_mzml = os.path.join(self.datadir_gui, "dataset3.mzML")
        r = reader.SWATHScoringReader.newReader([filename], "openswath", readmethod="gui", errorHandling="loose")

        new_exp = Experiment()
        new_exp.runs = r.parse_files(True)
        multipeptides = new_exp.get_all_multipeptides(1.0, verbose=False)

        # Build map of the PeptideName/Charge to the individual multipeptide
        peakgroup_map = {}
        mapper.buildPeakgroupMap(multipeptides, peakgroup_map)

        self.assertEqual(len(peakgroup_map.keys()), 2)
        self.assertEqual(sorted(list(peakgroup_map.keys())), ['testpeptide/0', 'testpeptide/0_pr'])
Esempio n. 3
0
    def _read_peakgroup_files(self, aligned_pg_files, swathfiles):
        """
        The peakgroup files have to have the following columns:
            - FullPeptideName
            - Charge
            - leftWidth
            - rightWidth
            - m_score
            - Intensity
            - align_runid
            - transition_group_id
        """

        # Read in the peakgroup files, parse them and map across runs
        reader = SWATHScoringReader.newReader(aligned_pg_files, "openswath", readmethod="gui", errorHandling="loose")
        new_exp = Experiment()
        new_exp.runs = reader.parse_files(REALIGN_RUNS)
        multipeptides = new_exp.get_all_multipeptides(FDR_CUTOFF, verbose=False)

        # Build map of the PeptideName/Charge to the individual multipeptide
        peakgroup_map = {}
        for m in multipeptides:
            pg = m.find_best_peptide_pg()
            identifier = pg.get_value("FullPeptideName") + "/" + pg.get_value("Charge")
            peakgroup_map[ identifier ] = m

        for swathrun in swathfiles.getSwathFiles():
            if ONLY_SHOW_QUANTIFIED:
                intersection = set(swathrun.get_all_precursor_ids()).intersection( peakgroup_map.keys() )
                todelete = set(swathrun.get_all_precursor_ids()).difference(intersection)
                if len(intersection) == 0:
                    print "Could not find any intersection between identifiers in your transition file and the provided chromatograms"
                    print len(intersection)
                swathrun.remove_precursors(todelete)

            # for each precursor in this run, identify the best peakgroup and store the value
            for precursor_id in swathrun.get_all_precursor_ids():
                if not peakgroup_map.has_key(precursor_id): 
                    continue

                m = peakgroup_map[ precursor_id ]
                if m.hasPrecursorGroup(swathrun.runid):
                    for pg in m.getPrecursorGroup(swathrun.runid).getAllPeakgroups():
                        l,r       = [ float(pg.get_value("leftWidth")), float(pg.get_value("rightWidth")) ]
                        fdrscore  = float(pg.get_value("m_score"))
                        intensity = float(pg.get_value("Intensity"))
                        swathrun.add_peakgroup_data(precursor_id,l,r, fdrscore, intensity)
    def test_parse_files(self):

        filename = os.path.join(self.datadir_gui, "dataset3.csv")
        filename_mzml = os.path.join(self.datadir_gui, "dataset3.mzML")
        r = reader.SWATHScoringReader.newReader([filename],
                                                "openswath",
                                                readmethod="gui",
                                                errorHandling="loose")

        new_exp = Experiment()
        new_exp.runs = r.parse_files(True)
        multipeptides = new_exp.get_all_multipeptides(1.0, verbose=False)

        # Build map of the PeptideName/Charge to the individual multipeptide
        peakgroup_map = {}
        mapper.buildPeakgroupMap(multipeptides, peakgroup_map)

        self.assertEqual(len(peakgroup_map.keys()), 2)
        self.assertEqual(sorted(list(peakgroup_map.keys())),
                         ['testpeptide/0', 'testpeptide/0_pr'])