def _read_peakgroup_files(self, aligned_pg_files, swathfiles): """ The peakgroup files have to have the following columns: - FullPeptideName - Charge - leftWidth - rightWidth - m_score - Intensity - align_runid - transition_group_id """ # Read in the peakgroup files, parse them and map across runs reader = SWATHScoringReader.newReader(aligned_pg_files, "openswath", readmethod="gui", errorHandling="loose") new_exp = Experiment() new_exp.runs = reader.parse_files(REALIGN_RUNS) multipeptides = new_exp.get_all_multipeptides(FDR_CUTOFF, verbose=False) # Build map of the PeptideName/Charge to the individual multipeptide peakgroup_map = {} for m in multipeptides: pg = m.find_best_peptide_pg() identifier = pg.get_value("FullPeptideName") + "/" + pg.get_value( "Charge") peakgroup_map[identifier] = m for swathrun in swathfiles.getSwathFiles(): if ONLY_SHOW_QUANTIFIED: intersection = set( swathrun.get_all_precursor_ids()).intersection( peakgroup_map.keys()) todelete = set( swathrun.get_all_precursor_ids()).difference(intersection) if len(intersection) == 0: print "Could not find any intersection between identifiers in your transition file and the provided chromatograms" print len(intersection) swathrun.remove_precursors(todelete) # for each precursor in this run, identify the best peakgroup and store the value for precursor_id in swathrun.get_all_precursor_ids(): if not peakgroup_map.has_key(precursor_id): continue m = peakgroup_map[precursor_id] if m.hasPrecursorGroup(swathrun.runid): for pg in m.getPrecursorGroup( swathrun.runid).getAllPeakgroups(): l, r = [ float(pg.get_value("leftWidth")), float(pg.get_value("rightWidth")) ] fdrscore = float(pg.get_value("m_score")) intensity = float(pg.get_value("Intensity")) swathrun.add_peakgroup_data(precursor_id, l, r, fdrscore, intensity)
def test_parse_files(self): filename = os.path.join(self.datadir_gui, "dataset3.csv") filename_mzml = os.path.join(self.datadir_gui, "dataset3.mzML") r = reader.SWATHScoringReader.newReader([filename], "openswath", readmethod="gui", errorHandling="loose") new_exp = Experiment() new_exp.runs = r.parse_files(True) multipeptides = new_exp.get_all_multipeptides(1.0, verbose=False) # Build map of the PeptideName/Charge to the individual multipeptide peakgroup_map = {} mapper.buildPeakgroupMap(multipeptides, peakgroup_map) self.assertEqual(len(peakgroup_map.keys()), 2) self.assertEqual(sorted(list(peakgroup_map.keys())), ['testpeptide/0', 'testpeptide/0_pr'])
def _read_peakgroup_files(self, aligned_pg_files, swathfiles): """ The peakgroup files have to have the following columns: - FullPeptideName - Charge - leftWidth - rightWidth - m_score - Intensity - align_runid - transition_group_id """ # Read in the peakgroup files, parse them and map across runs reader = SWATHScoringReader.newReader(aligned_pg_files, "openswath", readmethod="gui", errorHandling="loose") new_exp = Experiment() new_exp.runs = reader.parse_files(REALIGN_RUNS) multipeptides = new_exp.get_all_multipeptides(FDR_CUTOFF, verbose=False) # Build map of the PeptideName/Charge to the individual multipeptide peakgroup_map = {} for m in multipeptides: pg = m.find_best_peptide_pg() identifier = pg.get_value("FullPeptideName") + "/" + pg.get_value("Charge") peakgroup_map[ identifier ] = m for swathrun in swathfiles.getSwathFiles(): if ONLY_SHOW_QUANTIFIED: intersection = set(swathrun.get_all_precursor_ids()).intersection( peakgroup_map.keys() ) todelete = set(swathrun.get_all_precursor_ids()).difference(intersection) if len(intersection) == 0: print "Could not find any intersection between identifiers in your transition file and the provided chromatograms" print len(intersection) swathrun.remove_precursors(todelete) # for each precursor in this run, identify the best peakgroup and store the value for precursor_id in swathrun.get_all_precursor_ids(): if not peakgroup_map.has_key(precursor_id): continue m = peakgroup_map[ precursor_id ] if m.hasPrecursorGroup(swathrun.runid): for pg in m.getPrecursorGroup(swathrun.runid).getAllPeakgroups(): l,r = [ float(pg.get_value("leftWidth")), float(pg.get_value("rightWidth")) ] fdrscore = float(pg.get_value("m_score")) intensity = float(pg.get_value("Intensity")) swathrun.add_peakgroup_data(precursor_id,l,r, fdrscore, intensity)
def test_parse_files(self): filename = os.path.join(self.datadir_gui, "dataset3.csv") filename_mzml = os.path.join(self.datadir_gui, "dataset3.mzML") r = reader.SWATHScoringReader.newReader([filename], "openswath", readmethod="gui", errorHandling="loose") new_exp = Experiment() new_exp.runs = r.parse_files(True) multipeptides = new_exp.get_all_multipeptides(1.0, verbose=False) # Build map of the PeptideName/Charge to the individual multipeptide peakgroup_map = {} mapper.buildPeakgroupMap(multipeptides, peakgroup_map) self.assertEqual(len(peakgroup_map.keys()), 2) self.assertEqual(sorted(list(peakgroup_map.keys())), ['testpeptide/0', 'testpeptide/0_pr'])