Ejemplo n.º 1
0
 def _parse_samplesheet(self, runinfo, qc_objects, fc_date, fc_name, fcdir, as_yaml=False, demultiplex_stats=None, setup=None):
     """Parse samplesheet information and populate sample run metrics object"""
     if as_yaml:
         for info in runinfo:
             if not info.get("multiplex", None):
                 self.app.log.warn("No multiplex information for lane {}".format(info.get("lane")))
                 sample = {}
                 sample.update({k: info.get(k, None) for k in ('analysis', 'description', 'flowcell_id', 'lane')})
                 sample_kw = dict(path=fcdir, flowcell=fc_name, date=fc_date, lane=sample.get('lane', None), barcode_name=sample.get('name', None), sample_prj=sample.get('sample_prj', None),
                                  barcode_id=sample.get('barcode_id', None), sequence=sample.get('sequence', "NoIndex"))
             for sample in info["multiplex"]:
                 sample.update({k: info.get(k, None) for k in ('analysis', 'description', 'flowcell_id', 'lane')})
                 sample_kw = dict(flowcell=fc_name, date=fc_date, lane=sample['lane'], barcode_name=sample['name'], sample_prj=sample.get('sample_prj', None),
                                  barcode_id=sample['barcode_id'], sequence=sample.get('sequence', "NoIndex"))
             
                 parser = SampleRunMetricsParser(fcdir)
                 obj = SampleRunMetricsDocument(**sample_kw)
                 obj["picard_metrics"] = parser.read_picard_metrics(**sample_kw)
                 obj["fastq_scr"] = parser.parse_fastq_screen(**sample_kw)
                 obj["bc_count"] = parser.get_bc_count(run_setup=setup, **sample_kw)
                 obj["fastqc"] = parser.read_fastqc_metrics(**sample_kw)
                 obj["bcbb_checkpoints"] = parser.parse_bcbb_checkpoints(**sample_kw)
                 qc_objects.append(obj)
     else:
         for d in runinfo:
             LOG.debug("Getting information for sample defined by {}".format(d.values()))
             if self.app.pargs.project_name and self.app.pargs.project_name != d['SampleProject']:
                 continue
             if self.app.pargs.sample and self.app.pargs.sample != d['SampleID']:
                 continue
             
             sampledir = os.path.join(os.path.abspath(self._meta.production_root_path), d['SampleProject'].replace("__", "."), d['SampleID'])
             if not os.path.exists(sampledir):
                 self.app.log.warn("No such sample directory: {}".format(sampledir))
                 continue
             sample_fcdir = os.path.join(sampledir, fc_fullname(self.pargs.flowcell))
             if not os.path.exists(sample_fcdir):
                 self.app.log.warn("No such sample flowcell directory: {}".format(sample_fcdir))
                 continue
             if not modified_within_days(sample_fcdir, self.pargs.mtime):
                 continue
             runinfo_yaml_file = os.path.join(sample_fcdir, "{}-bcbb-config.yaml".format(d['SampleID']))
             if not os.path.exists(runinfo_yaml_file):
                 self.app.log.warn("No such yaml file for sample: {}".format(runinfo_yaml_file))
                 raise IOError(2, "No such yaml file for sample: {}".format(runinfo_yaml_file), runinfo_yaml_file)
             with open(runinfo_yaml_file) as fh:
                 runinfo_yaml = yaml.load(fh)
             if not runinfo_yaml['details'][0].get("multiplex", None):
                 self.app.log.warn("No multiplex information for sample {}".format(d['SampleID']))
                 runinfo_yaml['details'][0]['multiplex'] = [{'barcode_id': 0, 'sequence': 'NoIndex'}]
             sample_kw = dict(flowcell=fc_name, date=fc_date, lane=d['Lane'], barcode_name=d['SampleID'], sample_prj=d['SampleProject'].replace("__", "."), barcode_id=runinfo_yaml['details'][0]['multiplex'][0]['barcode_id'], sequence=runinfo_yaml['details'][0]['multiplex'][0]['sequence'])
             parser = SampleRunMetricsParser(sample_fcdir)
             obj = SampleRunMetricsDocument(**sample_kw)
             obj["picard_metrics"] = parser.read_picard_metrics(**sample_kw)
             obj["fastq_scr"] = parser.parse_fastq_screen(**sample_kw)
             obj["bc_count"] = parser.get_bc_count(demultiplex_stats=demultiplex_stats, run_setup=setup, **sample_kw)
             obj["fastqc"] = parser.read_fastqc_metrics(**sample_kw)
             obj["bcbb_checkpoints"] = parser.parse_bcbb_checkpoints(**sample_kw)
             qc_objects.append(obj)
     return qc_objects
Ejemplo n.º 2
0
 def _parse_samplesheet(self, runinfo, qc_objects, fc_date, fc_name, fcdir, as_yaml=False, demultiplex_stats=None, setup=None):
     """Parse samplesheet information and populate sample run metrics object"""
     if as_yaml:
         for info in runinfo:
             if not info.get("multiplex", None):
                 self.app.log.warn("No multiplex information for lane {}".format(info.get("lane")))
                 sample = {}
                 sample.update({k: info.get(k, None) for k in ('analysis', 'description', 'flowcell_id', 'lane')})
                 sample_kw = dict(path=fcdir, flowcell=fc_name, date=fc_date, lane=sample.get('lane', None), barcode_name=sample.get('name', None), sample_prj=sample.get('sample_prj', None),
                                  barcode_id=sample.get('barcode_id', None), sequence=sample.get('sequence', "NoIndex"))
             for sample in info["multiplex"]:
                 sample.update({k: info.get(k, None) for k in ('analysis', 'description', 'flowcell_id', 'lane')})
                 sample_kw = dict(flowcell=fc_name, date=fc_date, lane=sample['lane'], barcode_name=sample['name'], sample_prj=sample.get('sample_prj', None),
                                  barcode_id=sample['barcode_id'], sequence=sample.get('sequence', "NoIndex"))
             
                 parser = SampleRunMetricsParser(fcdir)
                 obj = SampleRunMetricsDocument(**sample_kw)
                 obj["picard_metrics"] = parser.read_picard_metrics(**sample_kw)
                 obj["fastq_scr"] = parser.parse_fastq_screen(**sample_kw)
                 obj["bc_count"] = parser.get_bc_count(run_setup=setup, **sample_kw)
                 obj["fastqc"] = parser.read_fastqc_metrics(**sample_kw)
                 obj["bcbb_checkpoints"] = parser.parse_bcbb_checkpoints(**sample_kw)
                 qc_objects.append(obj)
     else:
         for d in runinfo:
             LOG.debug("Getting information for sample defined by {}".format(d.values()))
             if self.app.pargs.project_name and self.app.pargs.project_name != d['SampleProject']:
                 continue
             if self.app.pargs.sample and self.app.pargs.sample != d['SampleID']:
                 continue
             
             sampledir = os.path.join(os.path.abspath(self._meta.production_root_path), d['SampleProject'].replace("__", "."), d['SampleID'])
             if not os.path.exists(sampledir):
                 self.app.log.warn("No such sample directory: {}".format(sampledir))
                 continue
             sample_fcdir = os.path.join(sampledir, fc_fullname(self.pargs.flowcell))
             if not os.path.exists(sample_fcdir):
                 self.app.log.warn("No such sample flowcell directory: {}".format(sample_fcdir))
                 continue
             if not modified_within_days(sample_fcdir, self.pargs.mtime):
                 continue
             runinfo_yaml_file = os.path.join(sample_fcdir, "{}-bcbb-config.yaml".format(d['SampleID']))
             if not os.path.exists(runinfo_yaml_file):
                 self.app.log.warn("No such yaml file for sample: {}".format(runinfo_yaml_file))
                 raise IOError(2, "No such yaml file for sample: {}".format(runinfo_yaml_file), runinfo_yaml_file)
             with open(runinfo_yaml_file) as fh:
                 runinfo_yaml = yaml.load(fh)
             if not runinfo_yaml['details'][0].get("multiplex", None):
                 self.app.log.warn("No multiplex information for sample {}".format(d['SampleID']))
                 runinfo_yaml['details'][0]['multiplex'] = [{'barcode_id': 0, 'sequence': 'NoIndex'}]
             sample_kw = dict(flowcell=fc_name, date=fc_date, lane=d['Lane'], barcode_name=d['SampleID'], sample_prj=d['SampleProject'].replace("__", "."), barcode_id=runinfo_yaml['details'][0]['multiplex'][0]['barcode_id'], sequence=runinfo_yaml['details'][0]['multiplex'][0]['sequence'])
             parser = SampleRunMetricsParser(sample_fcdir)
             obj = SampleRunMetricsDocument(**sample_kw)
             obj["picard_metrics"] = parser.read_picard_metrics(**sample_kw)
             obj["fastq_scr"] = parser.parse_fastq_screen(**sample_kw)
             obj["bc_count"] = parser.get_bc_count(demultiplex_stats=demultiplex_stats, run_setup=setup, **sample_kw)
             obj["fastqc"] = parser.read_fastqc_metrics(**sample_kw)
             obj["bcbb_checkpoints"] = parser.parse_bcbb_checkpoints(**sample_kw)
             qc_objects.append(obj)
     return qc_objects
Ejemplo n.º 3
0
 def test_get_bc_count_demux_stats(self):
     parser = SampleRunMetricsParser(os.path.join(project_dir, "J.Doe_00_01", "P001_101_index3", "120924_AC003CCCXX"))
     bc_count = parser.get_bc_count(**self.sample_kw)
     fc_parser = FlowcellRunMetricsParser(self.fcdir)
     data = fc_parser.parse_demultiplex_stats_htm(**self.fc_kw)
     bc_count = parser.get_bc_count(demultiplex_stats=data, **self.sample_kw)
     self.assertEqual(str(bc_count), str(19517198))
Ejemplo n.º 4
0
 def test_get_bc_count_demux_stats(self):
     parser = SampleRunMetricsParser(
         os.path.join(project_dir, "J.Doe_00_01", "P001_101_index3",
                      "120924_AC003CCCXX"))
     bc_count = parser.get_bc_count(**self.sample_kw)
     fc_parser = FlowcellRunMetricsParser(self.fcdir)
     data = fc_parser.parse_demultiplex_stats_htm(**self.fc_kw)
     bc_count = parser.get_bc_count(demultiplex_stats=data,
                                    **self.sample_kw)
     self.assertEqual(str(bc_count), str(19517198))
Ejemplo n.º 5
0
 def test_get_bc_count(self):
     parser = SampleRunMetricsParser(
         os.path.join(project_dir, "J.Doe_00_01", "P001_101_index3",
                      "120924_AC003CCCXX"))
     bc_count = parser.get_bc_count(**self.sample_kw)
     self.assertEqual(bc_count, 0)
Ejemplo n.º 6
0
    def _parse_samplesheet(self, runinfo, qc_objects, fc_date, fc_name, fcdir, as_yaml=False, demultiplex_stats=None):
        """Parse samplesheet information and populate sample run metrics object"""
        if as_yaml:
            for info in runinfo:
                if not info.get("multiplex", None):
                    self.app.log.warn("No multiplex information for lane {}".format(info.get("lane")))
                    sample = {}
                    sample.update({k: info.get(k, None) for k in ("analysis", "description", "flowcell_id", "lane")})
                    sample_kw = dict(
                        path=fcdir,
                        flowcell=fc_name,
                        date=fc_date,
                        lane=sample.get("lane", None),
                        barcode_name=sample.get("name", None),
                        sample_prj=sample.get("sample_prj", None),
                        barcode_id=sample.get("barcode_id", None),
                        sequence=sample.get("sequence", "NoIndex"),
                    )
                for sample in info["multiplex"]:
                    sample.update({k: info.get(k, None) for k in ("analysis", "description", "flowcell_id", "lane")})
                    sample_kw = dict(
                        flowcell=fc_name,
                        date=fc_date,
                        lane=sample["lane"],
                        barcode_name=sample["name"],
                        sample_prj=sample.get("sample_prj", None),
                        barcode_id=sample["barcode_id"],
                        sequence=sample.get("sequence", "NoIndex"),
                    )

                    parser = SampleRunMetricsParser(fcdir)
                    obj = SampleRunMetricsDocument(**sample_kw)
                    obj["picard_metrics"] = parser.read_picard_metrics(**sample_kw)
                    obj["fastq_scr"] = parser.parse_fastq_screen(**sample_kw)
                    obj["bc_count"] = parser.get_bc_count(**sample_kw)
                    obj["fastqc"] = parser.read_fastqc_metrics(**sample_kw)
                    qc_objects.append(obj)
        else:
            for sample in runinfo[1:]:
                LOG.debug("Getting information for sample defined by {}".format(sample))
                d = dict(zip(runinfo[0], sample))
                if self.app.pargs.project_name and self.app.pargs.project_name != d["SampleProject"]:
                    continue
                if self.app.pargs.sample and self.app.pargs.sample != d["SampleID"]:
                    continue

                sampledir = os.path.join(
                    os.path.abspath(self._meta.production_root_path),
                    d["SampleProject"].replace("__", "."),
                    d["SampleID"],
                )
                if not os.path.exists(sampledir):
                    self.app.log.warn("No such sample directory: {}".format(sampledir))
                    continue
                sample_fcdir = os.path.join(sampledir, fc_fullname(self.pargs.flowcell))
                if not os.path.exists(sample_fcdir):
                    self.app.log.warn("No such sample flowcell directory: {}".format(sample_fcdir))
                    continue
                if not modified_within_days(sample_fcdir, self.pargs.mtime):
                    continue
                runinfo_yaml_file = os.path.join(sample_fcdir, "{}-bcbb-config.yaml".format(d["SampleID"]))
                if not os.path.exists(runinfo_yaml_file):
                    self.app.log.warn("No such yaml file for sample: {}".format(runinfo_yaml_file))
                    raise IOError(2, "No such yaml file for sample: {}".format(runinfo_yaml_file), runinfo_yaml_file)
                with open(runinfo_yaml_file) as fh:
                    runinfo_yaml = yaml.load(fh)
                if not runinfo_yaml["details"][0].get("multiplex", None):
                    self.app.log.warn("No multiplex information for sample {}".format(d["SampleID"]))
                    continue
                sample_kw = dict(
                    flowcell=fc_name,
                    date=fc_date,
                    lane=d["Lane"],
                    barcode_name=d["SampleID"],
                    sample_prj=d["SampleProject"].replace("__", "."),
                    barcode_id=runinfo_yaml["details"][0]["multiplex"][0]["barcode_id"],
                    sequence=runinfo_yaml["details"][0]["multiplex"][0]["sequence"],
                )
                parser = SampleRunMetricsParser(sample_fcdir)
                obj = SampleRunMetricsDocument(**sample_kw)
                obj["picard_metrics"] = parser.read_picard_metrics(**sample_kw)
                obj["fastq_scr"] = parser.parse_fastq_screen(**sample_kw)
                obj["bc_count"] = parser.get_bc_count(demultiplex_stats=demultiplex_stats, **sample_kw)
                obj["fastqc"] = parser.read_fastqc_metrics(**sample_kw)
                qc_objects.append(obj)
        return qc_objects
Ejemplo n.º 7
0
 def test_get_bc_count(self):
     parser = SampleRunMetricsParser(
         os.path.join(project_dir, "J.Doe_00_01", "P001_101_index3", "120924_AC003CCCXX")
     )
     bc_count = parser.get_bc_count(**self.sample_kw)
     self.assertEqual(bc_count, 0)