def setUp(self):

        # Set up dirs
        self.dirname = os.path.dirname(os.path.abspath(__file__))
        self.topdir = os.path.join(os.path.join(self.dirname, ".."), "..")
        self.datadir = os.path.join(os.path.join(self.topdir, "test"), "data")
        self.scriptdir = os.path.join(self.topdir, "analysis")

        # Set up files
        peakgroups_file = os.path.join(self.datadir, "imputeValues/imputeValues_5_input.csv")
        fdr_cutoff_all_pg = 1.0

        # Read input
        reader = SWATHScoringReader.newReader([peakgroups_file], "openswath", readmethod="complete")
        self.exp = MRExperiment()
        self.exp.runs = reader.parse_files()
        self.multipeptides = self.exp.get_all_multipeptides(fdr_cutoff_all_pg, verbose=False)

        # Set up files nr2 
        peakgroups_file = os.path.join(self.datadir, "feature_alignment_7_openswath_input.csv")
        reader = SWATHScoringReader.newReader([peakgroups_file], "openswath", readmethod="complete")
        self.exp2 = MRExperiment()
        self.exp2.runs = reader.parse_files()
        self.multipeptides2 = self.exp2.get_all_multipeptides(fdr_cutoff_all_pg, verbose=False)

        # Select the best peakgroup per peptide and select it for writing out
        fdr_cutoff = 0.01
        for mpep in self.multipeptides2:
            for prgr in mpep.getAllPeptides():
                minpg = min( [(pg.get_fdr_score(), pg) for pg in prgr.peakgroups] )
                if minpg[0] < fdr_cutoff:
                    minpg[1].select_this_peakgroup()
    def setUp(self):
        # Set up dirs
        self.dirname = os.path.dirname(os.path.abspath(__file__))
        self.topdir = os.path.join(os.path.join(self.dirname, ".."), "..")
        self.datadir = os.path.join(os.path.join(self.topdir, "test"), "data")
        self.scriptdir = os.path.join(self.topdir, "analysis")

        # Set up files
        peakgroups_file = os.path.join(self.datadir, "imputeValues/imputeValues_5_input.csv")
        mzml_file = os.path.join(self.datadir, "imputeValues/r004_small/split_olgas_otherfile.chrom.mzML")

        # Parameters
        self.initial_alignment_cutoff = 0.0001
        fdr_cutoff_all_pg = 1.0
        max_rt_diff = 30

        # Read input
        reader = SWATHScoringReader.newReader([peakgroups_file], "openswath", readmethod="complete")
        self.new_exp = MRExperiment()
        self.new_exp.runs = reader.parse_files()
        self.multipeptides = self.new_exp.get_all_multipeptides(fdr_cutoff_all_pg, verbose=False)

        # Align all against all
        self.tr_data = transformations.LightTransformationData()
        spl_aligner = SplineAligner(self.initial_alignment_cutoff)
        for run_0 in self.new_exp.runs:
            for run_1 in self.new_exp.runs:
                helper.addDataToTrafo(self.tr_data, run_0, run_1, spl_aligner, self.multipeptides, "linear", 30)

        # Select two interesting peptides
        pepname = "21517_C[160]NVVISGGTGSGK/2_run0 0 0"
        self.current_mpep1 = [m for m in self.multipeptides if m.getAllPeptides()[0].get_id() == pepname][0]

        pepname = "26471_GYEDPPAALFR/2_run0 0 0"
        self.current_mpep2 = [m for m in self.multipeptides if m.getAllPeptides()[0].get_id() == pepname][0]
Example #3
0
    def _read_peakgroup_files(self, aligned_pg_files, swathfiles):
        """
        The peakgroup files have to have the following columns:
            - FullPeptideName
            - Charge
            - leftWidth
            - rightWidth
            - m_score
            - Intensity
            - align_runid
            - transition_group_id
        """

        # Read in the peakgroup files, parse them and map across runs
        reader = SWATHScoringReader.newReader(aligned_pg_files,
                                              "openswath",
                                              readmethod="gui",
                                              errorHandling="loose")
        new_exp = Experiment()
        new_exp.runs = reader.parse_files(REALIGN_RUNS)
        multipeptides = new_exp.get_all_multipeptides(FDR_CUTOFF,
                                                      verbose=False)

        # Build map of the PeptideName/Charge to the individual multipeptide
        peakgroup_map = {}
        for m in multipeptides:
            pg = m.find_best_peptide_pg()
            identifier = pg.get_value("FullPeptideName") + "/" + pg.get_value(
                "Charge")
            peakgroup_map[identifier] = m

        for swathrun in swathfiles.getSwathFiles():
            if ONLY_SHOW_QUANTIFIED:
                intersection = set(
                    swathrun.get_all_precursor_ids()).intersection(
                        peakgroup_map.keys())
                todelete = set(
                    swathrun.get_all_precursor_ids()).difference(intersection)
                if len(intersection) == 0:
                    print "Could not find any intersection between identifiers in your transition file and the provided chromatograms"
                    print len(intersection)
                swathrun.remove_precursors(todelete)

            # for each precursor in this run, identify the best peakgroup and store the value
            for precursor_id in swathrun.get_all_precursor_ids():
                if not peakgroup_map.has_key(precursor_id):
                    continue

                m = peakgroup_map[precursor_id]
                if m.hasPrecursorGroup(swathrun.runid):
                    for pg in m.getPrecursorGroup(
                            swathrun.runid).getAllPeakgroups():
                        l, r = [
                            float(pg.get_value("leftWidth")),
                            float(pg.get_value("rightWidth"))
                        ]
                        fdrscore = float(pg.get_value("m_score"))
                        intensity = float(pg.get_value("Intensity"))
                        swathrun.add_peakgroup_data(precursor_id, l, r,
                                                    fdrscore, intensity)
Example #4
0
    def _read_peakgroup_files(self, aligned_pg_files, swathfiles):
        """
        The peakgroup files have to have the following columns:
            - FullPeptideName
            - Charge
            - leftWidth
            - rightWidth
            - m_score
            - Intensity
            - align_runid
            - transition_group_id
        """

        # Read in the peakgroup files, parse them and map across runs
        reader = SWATHScoringReader.newReader(aligned_pg_files, "openswath", readmethod="gui", errorHandling="loose")
        new_exp = Experiment()
        new_exp.runs = reader.parse_files(REALIGN_RUNS)
        multipeptides = new_exp.get_all_multipeptides(FDR_CUTOFF, verbose=False)

        # Build map of the PeptideName/Charge to the individual multipeptide
        peakgroup_map = {}
        for m in multipeptides:
            pg = m.find_best_peptide_pg()
            identifier = pg.get_value("FullPeptideName") + "/" + pg.get_value("Charge")
            peakgroup_map[ identifier ] = m

        for swathrun in swathfiles.getSwathFiles():
            if ONLY_SHOW_QUANTIFIED:
                intersection = set(swathrun.get_all_precursor_ids()).intersection( peakgroup_map.keys() )
                todelete = set(swathrun.get_all_precursor_ids()).difference(intersection)
                if len(intersection) == 0:
                    print "Could not find any intersection between identifiers in your transition file and the provided chromatograms"
                    print len(intersection)
                swathrun.remove_precursors(todelete)

            # for each precursor in this run, identify the best peakgroup and store the value
            for precursor_id in swathrun.get_all_precursor_ids():
                if not peakgroup_map.has_key(precursor_id): 
                    continue

                m = peakgroup_map[ precursor_id ]
                if m.hasPrecursorGroup(swathrun.runid):
                    for pg in m.getPrecursorGroup(swathrun.runid).getAllPeakgroups():
                        l,r       = [ float(pg.get_value("leftWidth")), float(pg.get_value("rightWidth")) ]
                        fdrscore  = float(pg.get_value("m_score"))
                        intensity = float(pg.get_value("Intensity"))
                        swathrun.add_peakgroup_data(precursor_id,l,r, fdrscore, intensity)
Example #5
0
    def setUpClass(cls):
        from msproteomicstoolslib.format.SWATHScoringReader import SWATHScoringReader
        cls.dirname = os.path.dirname(os.path.abspath(__file__))
        cls.topdir = os.path.join(os.path.join(cls.dirname, ".."), "..")
        cls.datadir = os.path.join(os.path.join(cls.topdir, "test"), "data")
        cls.datadir_DIAlign = os.path.join(cls.datadir, "DIAlign")

        filename = os.path.join(cls.datadir_DIAlign, "merged.osw")
        r = SWATHScoringReader.newReader([filename], "openswath", "minimal")
        runs = r.parse_files(read_exp_RT=False)
        from analysis.alignment.feature_alignment import Experiment
        this_exp = Experiment()
        this_exp.set_runs(runs)
        cls.best_run = this_exp.determine_best_run(
            alignment_fdr_threshold=0.05)

        cls.mp = this_exp.get_all_multipeptides(0.05, verbose=False)