def _parse_samplesheet(self, runinfo, qc_objects, fc_date, fc_name, fcdir, as_yaml=False, demultiplex_stats=None, setup=None): """Parse samplesheet information and populate sample run metrics object""" if as_yaml: for info in runinfo: if not info.get("multiplex", None): self.app.log.warn("No multiplex information for lane {}".format(info.get("lane"))) sample = {} sample.update({k: info.get(k, None) for k in ('analysis', 'description', 'flowcell_id', 'lane')}) sample_kw = dict(path=fcdir, flowcell=fc_name, date=fc_date, lane=sample.get('lane', None), barcode_name=sample.get('name', None), sample_prj=sample.get('sample_prj', None), barcode_id=sample.get('barcode_id', None), sequence=sample.get('sequence', "NoIndex")) for sample in info["multiplex"]: sample.update({k: info.get(k, None) for k in ('analysis', 'description', 'flowcell_id', 'lane')}) sample_kw = dict(flowcell=fc_name, date=fc_date, lane=sample['lane'], barcode_name=sample['name'], sample_prj=sample.get('sample_prj', None), barcode_id=sample['barcode_id'], sequence=sample.get('sequence', "NoIndex")) parser = SampleRunMetricsParser(fcdir) obj = SampleRunMetricsDocument(**sample_kw) obj["picard_metrics"] = parser.read_picard_metrics(**sample_kw) obj["fastq_scr"] = parser.parse_fastq_screen(**sample_kw) obj["bc_count"] = parser.get_bc_count(run_setup=setup, **sample_kw) obj["fastqc"] = parser.read_fastqc_metrics(**sample_kw) obj["bcbb_checkpoints"] = parser.parse_bcbb_checkpoints(**sample_kw) qc_objects.append(obj) else: for d in runinfo: LOG.debug("Getting information for sample defined by {}".format(d.values())) if self.app.pargs.project_name and self.app.pargs.project_name != d['SampleProject']: continue if self.app.pargs.sample and self.app.pargs.sample != d['SampleID']: continue sampledir = os.path.join(os.path.abspath(self._meta.production_root_path), d['SampleProject'].replace("__", "."), d['SampleID']) if not os.path.exists(sampledir): self.app.log.warn("No such sample directory: {}".format(sampledir)) continue sample_fcdir = os.path.join(sampledir, fc_fullname(self.pargs.flowcell)) if not os.path.exists(sample_fcdir): self.app.log.warn("No such sample flowcell directory: {}".format(sample_fcdir)) continue if not modified_within_days(sample_fcdir, self.pargs.mtime): continue runinfo_yaml_file = os.path.join(sample_fcdir, "{}-bcbb-config.yaml".format(d['SampleID'])) if not os.path.exists(runinfo_yaml_file): self.app.log.warn("No such yaml file for sample: {}".format(runinfo_yaml_file)) raise IOError(2, "No such yaml file for sample: {}".format(runinfo_yaml_file), runinfo_yaml_file) with open(runinfo_yaml_file) as fh: runinfo_yaml = yaml.load(fh) if not runinfo_yaml['details'][0].get("multiplex", None): self.app.log.warn("No multiplex information for sample {}".format(d['SampleID'])) runinfo_yaml['details'][0]['multiplex'] = [{'barcode_id': 0, 'sequence': 'NoIndex'}] sample_kw = dict(flowcell=fc_name, date=fc_date, lane=d['Lane'], barcode_name=d['SampleID'], sample_prj=d['SampleProject'].replace("__", "."), barcode_id=runinfo_yaml['details'][0]['multiplex'][0]['barcode_id'], sequence=runinfo_yaml['details'][0]['multiplex'][0]['sequence']) parser = SampleRunMetricsParser(sample_fcdir) obj = SampleRunMetricsDocument(**sample_kw) obj["picard_metrics"] = parser.read_picard_metrics(**sample_kw) obj["fastq_scr"] = parser.parse_fastq_screen(**sample_kw) obj["bc_count"] = parser.get_bc_count(demultiplex_stats=demultiplex_stats, run_setup=setup, **sample_kw) obj["fastqc"] = parser.read_fastqc_metrics(**sample_kw) obj["bcbb_checkpoints"] = parser.parse_bcbb_checkpoints(**sample_kw) qc_objects.append(obj) return qc_objects
def _parse_samplesheet(self, runinfo, qc_objects, fc_date, fc_name, fcdir, as_yaml=False, demultiplex_stats=None, setup=None): """Parse samplesheet information and populate sample run metrics object""" if as_yaml: for info in runinfo: if not info.get("multiplex", None): self.app.log.warn("No multiplex information for lane {}".format(info.get("lane"))) sample = {} sample.update({k: info.get(k, None) for k in ('analysis', 'description', 'flowcell_id', 'lane')}) sample_kw = dict(path=fcdir, flowcell=fc_name, date=fc_date, lane=sample.get('lane', None), barcode_name=sample.get('name', None), sample_prj=sample.get('sample_prj', None), barcode_id=sample.get('barcode_id', None), sequence=sample.get('sequence', "NoIndex")) for sample in info["multiplex"]: sample.update({k: info.get(k, None) for k in ('analysis', 'description', 'flowcell_id', 'lane')}) sample_kw = dict(flowcell=fc_name, date=fc_date, lane=sample['lane'], barcode_name=sample['name'], sample_prj=sample.get('sample_prj', None), barcode_id=sample['barcode_id'], sequence=sample.get('sequence', "NoIndex")) parser = SampleRunMetricsParser(fcdir) obj = SampleRunMetricsDocument(**sample_kw) obj["picard_metrics"] = parser.read_picard_metrics(**sample_kw) obj["fastq_scr"] = parser.parse_fastq_screen(**sample_kw) obj["bc_count"] = parser.get_bc_count(run_setup=setup, **sample_kw) obj["fastqc"] = parser.read_fastqc_metrics(**sample_kw) obj["bcbb_checkpoints"] = parser.parse_bcbb_checkpoints(**sample_kw) qc_objects.append(obj) else: for d in runinfo: LOG.debug("Getting information for sample defined by {}".format(d.values())) if self.app.pargs.project_name and self.app.pargs.project_name != d['SampleProject']: continue if self.app.pargs.sample and self.app.pargs.sample != d['SampleID']: continue sampledir = os.path.join(os.path.abspath(self._meta.production_root_path), d['SampleProject'].replace("__", "."), d['SampleID']) if not os.path.exists(sampledir): self.app.log.warn("No such sample directory: {}".format(sampledir)) continue sample_fcdir = os.path.join(sampledir, fc_fullname(self.pargs.flowcell)) if not os.path.exists(sample_fcdir): self.app.log.warn("No such sample flowcell directory: {}".format(sample_fcdir)) continue if not modified_within_days(sample_fcdir, self.pargs.mtime): continue runinfo_yaml_file = os.path.join(sample_fcdir, "{}-bcbb-config.yaml".format(d['SampleID'])) if not os.path.exists(runinfo_yaml_file): self.app.log.warn("No such yaml file for sample: {}".format(runinfo_yaml_file)) raise IOError(2, "No such yaml file for sample: {}".format(runinfo_yaml_file), runinfo_yaml_file) with open(runinfo_yaml_file) as fh: runinfo_yaml = yaml.load(fh) if not runinfo_yaml['details'][0].get("multiplex", None): self.app.log.warn("No multiplex information for sample {}".format(d['SampleID'])) runinfo_yaml['details'][0]['multiplex'] = [{'barcode_id': 0, 'sequence': 'NoIndex'}] sample_kw = dict(flowcell=fc_name, date=fc_date, lane=d['Lane'], barcode_name=d['SampleID'], sample_prj=d['SampleProject'].replace("__", "."), barcode_id=runinfo_yaml['details'][0]['multiplex'][0]['barcode_id'], sequence=runinfo_yaml['details'][0]['multiplex'][0]['sequence']) parser = SampleRunMetricsParser(sample_fcdir) obj = SampleRunMetricsDocument(**sample_kw) obj["picard_metrics"] = parser.read_picard_metrics(**sample_kw) obj["fastq_scr"] = parser.parse_fastq_screen(**sample_kw) obj["bc_count"] = parser.get_bc_count(demultiplex_stats=demultiplex_stats, run_setup=setup, **sample_kw) obj["fastqc"] = parser.read_fastqc_metrics(**sample_kw) obj["bcbb_checkpoints"] = parser.parse_bcbb_checkpoints(**sample_kw) qc_objects.append(obj) return qc_objects
def test_get_bc_count_demux_stats(self): parser = SampleRunMetricsParser(os.path.join(project_dir, "J.Doe_00_01", "P001_101_index3", "120924_AC003CCCXX")) bc_count = parser.get_bc_count(**self.sample_kw) fc_parser = FlowcellRunMetricsParser(self.fcdir) data = fc_parser.parse_demultiplex_stats_htm(**self.fc_kw) bc_count = parser.get_bc_count(demultiplex_stats=data, **self.sample_kw) self.assertEqual(str(bc_count), str(19517198))
def test_get_bc_count_demux_stats(self): parser = SampleRunMetricsParser( os.path.join(project_dir, "J.Doe_00_01", "P001_101_index3", "120924_AC003CCCXX")) bc_count = parser.get_bc_count(**self.sample_kw) fc_parser = FlowcellRunMetricsParser(self.fcdir) data = fc_parser.parse_demultiplex_stats_htm(**self.fc_kw) bc_count = parser.get_bc_count(demultiplex_stats=data, **self.sample_kw) self.assertEqual(str(bc_count), str(19517198))
def test_get_bc_count(self): parser = SampleRunMetricsParser( os.path.join(project_dir, "J.Doe_00_01", "P001_101_index3", "120924_AC003CCCXX")) bc_count = parser.get_bc_count(**self.sample_kw) self.assertEqual(bc_count, 0)
def _parse_samplesheet(self, runinfo, qc_objects, fc_date, fc_name, fcdir, as_yaml=False, demultiplex_stats=None): """Parse samplesheet information and populate sample run metrics object""" if as_yaml: for info in runinfo: if not info.get("multiplex", None): self.app.log.warn("No multiplex information for lane {}".format(info.get("lane"))) sample = {} sample.update({k: info.get(k, None) for k in ("analysis", "description", "flowcell_id", "lane")}) sample_kw = dict( path=fcdir, flowcell=fc_name, date=fc_date, lane=sample.get("lane", None), barcode_name=sample.get("name", None), sample_prj=sample.get("sample_prj", None), barcode_id=sample.get("barcode_id", None), sequence=sample.get("sequence", "NoIndex"), ) for sample in info["multiplex"]: sample.update({k: info.get(k, None) for k in ("analysis", "description", "flowcell_id", "lane")}) sample_kw = dict( flowcell=fc_name, date=fc_date, lane=sample["lane"], barcode_name=sample["name"], sample_prj=sample.get("sample_prj", None), barcode_id=sample["barcode_id"], sequence=sample.get("sequence", "NoIndex"), ) parser = SampleRunMetricsParser(fcdir) obj = SampleRunMetricsDocument(**sample_kw) obj["picard_metrics"] = parser.read_picard_metrics(**sample_kw) obj["fastq_scr"] = parser.parse_fastq_screen(**sample_kw) obj["bc_count"] = parser.get_bc_count(**sample_kw) obj["fastqc"] = parser.read_fastqc_metrics(**sample_kw) qc_objects.append(obj) else: for sample in runinfo[1:]: LOG.debug("Getting information for sample defined by {}".format(sample)) d = dict(zip(runinfo[0], sample)) if self.app.pargs.project_name and self.app.pargs.project_name != d["SampleProject"]: continue if self.app.pargs.sample and self.app.pargs.sample != d["SampleID"]: continue sampledir = os.path.join( os.path.abspath(self._meta.production_root_path), d["SampleProject"].replace("__", "."), d["SampleID"], ) if not os.path.exists(sampledir): self.app.log.warn("No such sample directory: {}".format(sampledir)) continue sample_fcdir = os.path.join(sampledir, fc_fullname(self.pargs.flowcell)) if not os.path.exists(sample_fcdir): self.app.log.warn("No such sample flowcell directory: {}".format(sample_fcdir)) continue if not modified_within_days(sample_fcdir, self.pargs.mtime): continue runinfo_yaml_file = os.path.join(sample_fcdir, "{}-bcbb-config.yaml".format(d["SampleID"])) if not os.path.exists(runinfo_yaml_file): self.app.log.warn("No such yaml file for sample: {}".format(runinfo_yaml_file)) raise IOError(2, "No such yaml file for sample: {}".format(runinfo_yaml_file), runinfo_yaml_file) with open(runinfo_yaml_file) as fh: runinfo_yaml = yaml.load(fh) if not runinfo_yaml["details"][0].get("multiplex", None): self.app.log.warn("No multiplex information for sample {}".format(d["SampleID"])) continue sample_kw = dict( flowcell=fc_name, date=fc_date, lane=d["Lane"], barcode_name=d["SampleID"], sample_prj=d["SampleProject"].replace("__", "."), barcode_id=runinfo_yaml["details"][0]["multiplex"][0]["barcode_id"], sequence=runinfo_yaml["details"][0]["multiplex"][0]["sequence"], ) parser = SampleRunMetricsParser(sample_fcdir) obj = SampleRunMetricsDocument(**sample_kw) obj["picard_metrics"] = parser.read_picard_metrics(**sample_kw) obj["fastq_scr"] = parser.parse_fastq_screen(**sample_kw) obj["bc_count"] = parser.get_bc_count(demultiplex_stats=demultiplex_stats, **sample_kw) obj["fastqc"] = parser.read_fastqc_metrics(**sample_kw) qc_objects.append(obj) return qc_objects
def test_get_bc_count(self): parser = SampleRunMetricsParser( os.path.join(project_dir, "J.Doe_00_01", "P001_101_index3", "120924_AC003CCCXX") ) bc_count = parser.get_bc_count(**self.sample_kw) self.assertEqual(bc_count, 0)