def setUp(self): # Set up dirs self.dirname = os.path.dirname(os.path.abspath(__file__)) self.topdir = os.path.join(os.path.join(self.dirname, ".."), "..") self.datadir = os.path.join(os.path.join(self.topdir, "test"), "data") self.scriptdir = os.path.join(self.topdir, "analysis") # Set up files peakgroups_file = os.path.join(self.datadir, "imputeValues/imputeValues_5_input.csv") fdr_cutoff_all_pg = 1.0 # Read input reader = SWATHScoringReader.newReader([peakgroups_file], "openswath", readmethod="complete") self.exp = MRExperiment() self.exp.runs = reader.parse_files() self.multipeptides = self.exp.get_all_multipeptides(fdr_cutoff_all_pg, verbose=False) # Set up files nr2 peakgroups_file = os.path.join(self.datadir, "feature_alignment_7_openswath_input.csv") reader = SWATHScoringReader.newReader([peakgroups_file], "openswath", readmethod="complete") self.exp2 = MRExperiment() self.exp2.runs = reader.parse_files() self.multipeptides2 = self.exp2.get_all_multipeptides(fdr_cutoff_all_pg, verbose=False) # Select the best peakgroup per peptide and select it for writing out fdr_cutoff = 0.01 for mpep in self.multipeptides2: for prgr in mpep.getAllPeptides(): minpg = min( [(pg.get_fdr_score(), pg) for pg in prgr.peakgroups] ) if minpg[0] < fdr_cutoff: minpg[1].select_this_peakgroup()
def setUp(self): # Set up dirs self.dirname = os.path.dirname(os.path.abspath(__file__)) self.topdir = os.path.join(os.path.join(self.dirname, ".."), "..") self.datadir = os.path.join(os.path.join(self.topdir, "test"), "data") self.scriptdir = os.path.join(self.topdir, "analysis") # Set up files peakgroups_file = os.path.join(self.datadir, "imputeValues/imputeValues_5_input.csv") mzml_file = os.path.join(self.datadir, "imputeValues/r004_small/split_olgas_otherfile.chrom.mzML") # Parameters self.initial_alignment_cutoff = 0.0001 fdr_cutoff_all_pg = 1.0 max_rt_diff = 30 # Read input reader = SWATHScoringReader.newReader([peakgroups_file], "openswath", readmethod="complete") self.new_exp = MRExperiment() self.new_exp.runs = reader.parse_files() self.multipeptides = self.new_exp.get_all_multipeptides(fdr_cutoff_all_pg, verbose=False) # Align all against all self.tr_data = transformations.LightTransformationData() spl_aligner = SplineAligner(self.initial_alignment_cutoff) for run_0 in self.new_exp.runs: for run_1 in self.new_exp.runs: helper.addDataToTrafo(self.tr_data, run_0, run_1, spl_aligner, self.multipeptides, "linear", 30) # Select two interesting peptides pepname = "21517_C[160]NVVISGGTGSGK/2_run0 0 0" self.current_mpep1 = [m for m in self.multipeptides if m.getAllPeptides()[0].get_id() == pepname][0] pepname = "26471_GYEDPPAALFR/2_run0 0 0" self.current_mpep2 = [m for m in self.multipeptides if m.getAllPeptides()[0].get_id() == pepname][0]
def _read_peakgroup_files(self, aligned_pg_files, swathfiles): """ The peakgroup files have to have the following columns: - FullPeptideName - Charge - leftWidth - rightWidth - m_score - Intensity - align_runid - transition_group_id """ # Read in the peakgroup files, parse them and map across runs reader = SWATHScoringReader.newReader(aligned_pg_files, "openswath", readmethod="gui", errorHandling="loose") new_exp = Experiment() new_exp.runs = reader.parse_files(REALIGN_RUNS) multipeptides = new_exp.get_all_multipeptides(FDR_CUTOFF, verbose=False) # Build map of the PeptideName/Charge to the individual multipeptide peakgroup_map = {} for m in multipeptides: pg = m.find_best_peptide_pg() identifier = pg.get_value("FullPeptideName") + "/" + pg.get_value( "Charge") peakgroup_map[identifier] = m for swathrun in swathfiles.getSwathFiles(): if ONLY_SHOW_QUANTIFIED: intersection = set( swathrun.get_all_precursor_ids()).intersection( peakgroup_map.keys()) todelete = set( swathrun.get_all_precursor_ids()).difference(intersection) if len(intersection) == 0: print "Could not find any intersection between identifiers in your transition file and the provided chromatograms" print len(intersection) swathrun.remove_precursors(todelete) # for each precursor in this run, identify the best peakgroup and store the value for precursor_id in swathrun.get_all_precursor_ids(): if not peakgroup_map.has_key(precursor_id): continue m = peakgroup_map[precursor_id] if m.hasPrecursorGroup(swathrun.runid): for pg in m.getPrecursorGroup( swathrun.runid).getAllPeakgroups(): l, r = [ float(pg.get_value("leftWidth")), float(pg.get_value("rightWidth")) ] fdrscore = float(pg.get_value("m_score")) intensity = float(pg.get_value("Intensity")) swathrun.add_peakgroup_data(precursor_id, l, r, fdrscore, intensity)
def _read_peakgroup_files(self, aligned_pg_files, swathfiles): """ The peakgroup files have to have the following columns: - FullPeptideName - Charge - leftWidth - rightWidth - m_score - Intensity - align_runid - transition_group_id """ # Read in the peakgroup files, parse them and map across runs reader = SWATHScoringReader.newReader(aligned_pg_files, "openswath", readmethod="gui", errorHandling="loose") new_exp = Experiment() new_exp.runs = reader.parse_files(REALIGN_RUNS) multipeptides = new_exp.get_all_multipeptides(FDR_CUTOFF, verbose=False) # Build map of the PeptideName/Charge to the individual multipeptide peakgroup_map = {} for m in multipeptides: pg = m.find_best_peptide_pg() identifier = pg.get_value("FullPeptideName") + "/" + pg.get_value("Charge") peakgroup_map[ identifier ] = m for swathrun in swathfiles.getSwathFiles(): if ONLY_SHOW_QUANTIFIED: intersection = set(swathrun.get_all_precursor_ids()).intersection( peakgroup_map.keys() ) todelete = set(swathrun.get_all_precursor_ids()).difference(intersection) if len(intersection) == 0: print "Could not find any intersection between identifiers in your transition file and the provided chromatograms" print len(intersection) swathrun.remove_precursors(todelete) # for each precursor in this run, identify the best peakgroup and store the value for precursor_id in swathrun.get_all_precursor_ids(): if not peakgroup_map.has_key(precursor_id): continue m = peakgroup_map[ precursor_id ] if m.hasPrecursorGroup(swathrun.runid): for pg in m.getPrecursorGroup(swathrun.runid).getAllPeakgroups(): l,r = [ float(pg.get_value("leftWidth")), float(pg.get_value("rightWidth")) ] fdrscore = float(pg.get_value("m_score")) intensity = float(pg.get_value("Intensity")) swathrun.add_peakgroup_data(precursor_id,l,r, fdrscore, intensity)
def setUpClass(cls): from msproteomicstoolslib.format.SWATHScoringReader import SWATHScoringReader cls.dirname = os.path.dirname(os.path.abspath(__file__)) cls.topdir = os.path.join(os.path.join(cls.dirname, ".."), "..") cls.datadir = os.path.join(os.path.join(cls.topdir, "test"), "data") cls.datadir_DIAlign = os.path.join(cls.datadir, "DIAlign") filename = os.path.join(cls.datadir_DIAlign, "merged.osw") r = SWATHScoringReader.newReader([filename], "openswath", "minimal") runs = r.parse_files(read_exp_RT=False) from analysis.alignment.feature_alignment import Experiment this_exp = Experiment() this_exp.set_runs(runs) cls.best_run = this_exp.determine_best_run( alignment_fdr_threshold=0.05) cls.mp = this_exp.get_all_multipeptides(0.05, verbose=False)