def _collect_casava_qc(self): qc_objects = [] runinfo_csv = os.path.join(os.path.join(self._meta.root_path, self.pargs.flowcell), "{}.csv".format(fc_id(self.pargs.flowcell))) if not os.path.exists(runinfo_csv): LOG.warn("No such file {}: trying fallback SampleSheet.csv".format(runinfo_csv)) runinfo_csv = os.path.join(os.path.join(self._meta.root_path, self.pargs.flowcell), "SampleSheet.csv") try: with open(runinfo_csv) as fh: runinfo_reader = csv.reader(fh) runinfo = [x for x in runinfo_reader] except IOError as e: self.app.log.warn(str(e)) raise e fcdir = os.path.join(os.path.abspath(self._meta.root_path), self.pargs.flowcell) (fc_date, fc_name) = fc_parts(self.pargs.flowcell) ## Check modification time if modified_within_days(fcdir, self.pargs.mtime): fc_kw = dict(fc_date = fc_date, fc_name=fc_name) parser = FlowcellRunMetricsParser(fcdir) fcobj = FlowcellRunMetricsDocument(fc_date, fc_name) fcobj["RunInfo"] = parser.parseRunInfo(**fc_kw) fcobj["RunParameters"] = parser.parseRunParameters(**fc_kw) fcobj["illumina"] = parser.parse_illumina_metrics(fullRTA=False, **fc_kw) fcobj["bc_metrics"] = parser.parse_bc_metrics(**fc_kw) fcobj["undemultiplexed_barcodes"] = parser.parse_undemultiplexed_barcode_metrics(**fc_kw) fcobj["illumina"].update({"Demultiplex_Stats" : parser.parse_demultiplex_stats_htm(**fc_kw)}) fcobj["samplesheet_csv"] = parser.parse_samplesheet_csv(runinfo_csv=runinfo_csv, **fc_kw) qc_objects.append(fcobj) qc_objects = self._parse_samplesheet(runinfo, qc_objects, fc_date, fc_name, fcdir, demultiplex_stats=fcobj["illumina"]["Demultiplex_Stats"]) return qc_objects
def test_get_bc_count_demux_stats(self): parser = SampleRunMetricsParser(os.path.join(project_dir, "J.Doe_00_01", "P001_101_index3", "120924_AC003CCCXX")) bc_count = parser.get_bc_count(**self.sample_kw) fc_parser = FlowcellRunMetricsParser(self.fcdir) data = fc_parser.parse_demultiplex_stats_htm(**self.fc_kw) bc_count = parser.get_bc_count(demultiplex_stats=data, **self.sample_kw) self.assertEqual(str(bc_count), str(19517198))
def _collect_casava_qc(self): qc_objects = [] runinfo_csv = os.path.join(os.path.join(self._meta.root_path, self.pargs.flowcell), "{}.csv".format(fc_id(self.pargs.flowcell))) if not os.path.exists(runinfo_csv): LOG.warn("No such file {}: trying fallback SampleSheet.csv".format(runinfo_csv)) runinfo_csv = os.path.join(os.path.join(self._meta.root_path, self.pargs.flowcell), "SampleSheet.csv") try: with open(runinfo_csv) as fh: runinfo_reader = csv.reader(fh) runinfo = [x for x in runinfo_reader] except IOError as e: self.app.log.warn(str(e)) raise e fcdir = os.path.join(os.path.abspath(self._meta.root_path), self.pargs.flowcell) (fc_date, fc_name) = fc_parts(self.pargs.flowcell) ## Check modification time demux_stats = None if modified_within_days(fcdir, self.pargs.mtime): fc_kw = dict(fc_date = fc_date, fc_name=fc_name) parser = FlowcellRunMetricsParser(fcdir) fcobj = FlowcellRunMetricsDocument(fc_date, fc_name) fcobj["RunInfo"] = parser.parseRunInfo(**fc_kw) fcobj["RunParameters"] = parser.parseRunParameters(**fc_kw) fcobj["illumina"] = parser.parse_illumina_metrics(fullRTA=False, **fc_kw) fcobj["bc_metrics"] = parser.parse_bc_metrics(**fc_kw) fcobj["undemultiplexed_barcodes"] = parser.parse_undemultiplexed_barcode_metrics(**fc_kw) fcobj["illumina"].update({"Demultiplex_Stats" : parser.parse_demultiplex_stats_htm(**fc_kw)}) fcobj["samplesheet_csv"] = parser.parse_samplesheet_csv(runinfo_csv=runinfo_csv, **fc_kw) demux_stats = fcobj["illumina"]["Demultiplex_Stats"] qc_objects.append(fcobj) qc_objects = self._parse_samplesheet(runinfo, qc_objects, fc_date, fc_name, fcdir, demultiplex_stats=demux_stats) return qc_objects
def _collect_casava_qc(self): qc_objects = [] read_setup = None demux_stats = None fcdir = os.path.join(os.path.abspath(self._meta.root_path), self.pargs.flowcell) # Get the fc_name, fc_date from RunInfo parser = FlowcellRunMetricsParser(fcdir) runinfo_xml = parser.parseRunInfo() runparams = parser.parseRunParameters() fc_date = runinfo_xml.get('Date', None) fc_name = runinfo_xml.get('Flowcell', None) fc_pos = runparams.get('FCPosition', '') runinfo_csv = os.path.join( os.path.join(self._meta.root_path, self.pargs.flowcell), "{}.csv".format(fc_name)) if not os.path.exists(runinfo_csv): LOG.warn("No such file {}: trying fallback SampleSheet.csv".format( runinfo_csv)) runinfo_csv = os.path.join( os.path.join(self._meta.root_path, self.pargs.flowcell), "SampleSheet.csv") runinfo = parser.parse_samplesheet_csv(runinfo_csv=runinfo_csv) if modified_within_days(fcdir, self.pargs.mtime): # Most of the code expects to have the flowcell position pre-pended to the flowcell id fc_kw = dict(fc_date=fc_date, fc_name="{}{}".format(fc_pos, fc_name)) fcobj = FlowcellRunMetricsDocument(**fc_kw) fcobj["RunInfo"] = runinfo_xml fcobj["RunParameters"] = runparams fcobj["DemultiplexConfig"] = parser.parseDemultiplexConfig(**fc_kw) fcobj["illumina"] = parser.parse_illumina_metrics(fullRTA=False, **fc_kw) fcobj["bc_metrics"] = parser.parse_bc_metrics(**fc_kw) fcobj[ "undemultiplexed_barcodes"] = parser.parse_undemultiplexed_barcode_metrics( **fc_kw) fcobj["illumina"].update({ "Demultiplex_Stats": parser.parse_demultiplex_stats_htm(**fc_kw) }) fcobj["samplesheet_csv"] = runinfo read_setup = fcobj["RunInfo"].get('Reads', []) fcobj["run_setup"] = self._run_setup(read_setup) demux_stats = fcobj["illumina"]["Demultiplex_Stats"] qc_objects.append(fcobj) qc_objects = self._parse_samplesheet(runinfo, qc_objects, fc_date, "{}{}".format(fc_pos, fc_name), fcdir, demultiplex_stats=demux_stats, setup=read_setup) return qc_objects
def test_get_bc_count_demux_stats(self): parser = SampleRunMetricsParser( os.path.join(project_dir, "J.Doe_00_01", "P001_101_index3", "120924_AC003CCCXX")) bc_count = parser.get_bc_count(**self.sample_kw) fc_parser = FlowcellRunMetricsParser(self.fcdir) data = fc_parser.parse_demultiplex_stats_htm(**self.fc_kw) bc_count = parser.get_bc_count(demultiplex_stats=data, **self.sample_kw) self.assertEqual(str(bc_count), str(19517198))
def collect_metrics(path, log): parser = FlowcellRunMetricsParser(path) run_info = parser.parseRunInfo() fcid = run_info.get('Flowcell',None) if fcid is None: log.error("Could not parse flowcell id from RunInfo.xml") return {} # Insert a dummy character as the parse method expects a flowcell position metrics = parser.parse_demultiplex_stats_htm(fcid) metrics['RunInfo'] = run_info # Get the undemultiplexed indexes undemux = parser.parse_undemultiplexed_barcode_metrics(fcid) metrics['Undemultiplexed'] = undemux return metrics
def collect_metrics(path): parser = FlowcellRunMetricsParser(path) run_info = parser.parseRunInfo() fcid = run_info.get('Flowcell', None) if fcid is None: LOG.error("Could not parse flowcell id from RunInfo.xml") return {} # Insert a dummy character as the parse method expects a flowcell position metrics = parser.parse_demultiplex_stats_htm(fcid) metrics['RunInfo'] = run_info # Get the undemultiplexed indexes undemux = parser.parse_undemultiplexed_barcode_metrics(fcid) metrics['Undemultiplexed'] = undemux return metrics
def _collect_casava_qc(self): qc_objects = [] read_setup = None demux_stats = None fcdir = os.path.join(os.path.abspath(self._meta.root_path), self.pargs.flowcell) # Get the fc_name, fc_date from RunInfo parser = FlowcellRunMetricsParser(fcdir) runinfo_xml = parser.parseRunInfo() runparams = parser.parseRunParameters() fc_date = runinfo_xml.get('Date',None) fc_name = runinfo_xml.get('Flowcell',None) fc_pos = runparams.get('FCPosition','') runinfo_csv = os.path.join(os.path.join(self._meta.root_path, self.pargs.flowcell), "{}.csv".format(fc_name)) if not os.path.exists(runinfo_csv): LOG.warn("No such file {}: trying fallback SampleSheet.csv".format(runinfo_csv)) runinfo_csv = os.path.join(os.path.join(self._meta.root_path, self.pargs.flowcell), "SampleSheet.csv") runinfo = parser.parse_samplesheet_csv(runinfo_csv=runinfo_csv) if modified_within_days(fcdir, self.pargs.mtime): # Most of the code expects to have the flowcell position pre-pended to the flowcell id fc_kw = dict(fc_date = fc_date, fc_name="{}{}".format(fc_pos,fc_name)) fcobj = FlowcellRunMetricsDocument(**fc_kw) fcobj["RunInfo"] = runinfo_xml fcobj["RunParameters"] = runparams fcobj["DemultiplexConfig"] = parser.parseDemultiplexConfig(**fc_kw) fcobj["illumina"] = parser.parse_illumina_metrics(fullRTA=False, **fc_kw) fcobj["bc_metrics"] = parser.parse_bc_metrics(**fc_kw) fcobj["undemultiplexed_barcodes"] = parser.parse_undemultiplexed_barcode_metrics(**fc_kw) fcobj["illumina"].update({"Demultiplex_Stats" : parser.parse_demultiplex_stats_htm(**fc_kw)}) fcobj["samplesheet_csv"] = runinfo read_setup = fcobj["RunInfo"].get('Reads',[]) fcobj["run_setup"] = self._run_setup(read_setup) demux_stats = fcobj["illumina"]["Demultiplex_Stats"] qc_objects.append(fcobj) qc_objects = self._parse_samplesheet(runinfo, qc_objects, fc_date, "{}{}".format(fc_pos,fc_name), fcdir, demultiplex_stats=demux_stats, setup=read_setup) return qc_objects
def test_parse_demux_stats(self): """Test parsing of a Demultiplex_Stats.htm file""" parser = FlowcellRunMetricsParser(self.fcdir) data = parser.parse_demultiplex_stats_htm(**self.fc_kw) self.assertEqual(data['Barcode_lane_statistics'][0]['# Reads'], '39,034,396')
def test_parse_demux_stats(self): """Test parsing of a Demultiplex_Stats.htm file""" parser = FlowcellRunMetricsParser(self.fcdir) data = parser.parse_demultiplex_stats_htm(**self.fc_kw) self.assertEqual(data["Barcode_lane_statistics"][0]["# Reads"], "39,034,396")