def _collect_casava_qc(self): qc_objects = [] runinfo_csv = os.path.join(os.path.abspath(self.pargs.flowcell), "{}.csv".format(self._fc_id())) try: with open(runinfo_csv) as fh: runinfo_reader = csv.reader(fh) runinfo = [x for x in runinfo_reader] except IOError as e: self.app.log.warn(str(e)) raise e fcdir = os.path.join(os.path.abspath(self.pargs.analysis), self.pargs.flowcell) (fc_date, fc_name) = self._fc_parts() ## Check modification time if modified_within_days(fcdir, self.pargs.mtime): fc_kw = dict(path=fcdir, fc_date = fc_date, fc_name=fc_name) fcobj = FlowcellRunMetrics(**fc_kw) fcobj.parse_illumina_metrics(fullRTA=False) fcobj.parse_bc_metrics() fcobj.parse_demultiplex_stats_htm() fcobj.parse_samplesheet_csv() qc_objects.append(fcobj) for sample in runinfo[1:]: d = dict(zip(runinfo[0], sample)) if self.app.pargs.project and self.app.pargs.project != d['SampleProject']: continue if self.app.pargs.sample and self.app.pargs.sample != d['SampleID']: continue sampledir = os.path.join(os.path.abspath(self.pargs.analysis), d['SampleProject'].replace("__", "."), d['SampleID']) if not os.path.exists(sampledir): self.app.log.warn("No such sample directory: {}".format(sampledir)) continue sample_fcdir = os.path.join(sampledir, self._fc_fullname()) if not os.path.exists(sample_fcdir): self.app.log.warn("No such sample flowcell directory: {}".format(sample_fcdir)) continue if not modified_within_days(sample_fcdir, self.pargs.mtime): continue runinfo_yaml_file = os.path.join(sample_fcdir, "{}-bcbb-config.yaml".format(d['SampleID'])) if not os.path.exists(runinfo_yaml_file): self.app.log.warn("No such yaml file for sample: {}".format(runinfo_yaml_file)) raise IOError(2, "No such yaml file for sample: {}".format(runinfo_yaml_file), runinfo_yaml_file) with open(runinfo_yaml_file) as fh: runinfo_yaml = yaml.load(fh) if not runinfo_yaml['details'][0].get("multiplex", None): self.app.log.warn("No multiplex information for sample {}".format(d['SampleID'])) continue sample_kw = dict(path=sample_fcdir, flowcell=fc_name, date=fc_date, lane=d['Lane'], barcode_name=d['SampleID'], sample_prj=d['SampleProject'].replace("__", "."), barcode_id=runinfo_yaml['details'][0]['multiplex'][0]['barcode_id'], sequence=runinfo_yaml['details'][0]['multiplex'][0]['sequence']) obj = SampleRunMetrics(**sample_kw) obj.read_picard_metrics() obj.parse_fastq_screen() obj.parse_bc_metrics() obj.read_fastqc_metrics() qc_objects.append(obj) return qc_objects
def setUp(self): if not os.path.exists(os.path.join(os.getenv("HOME"), "dbcon.ini")): self.url = None self.user = None self.pw = None self.examples = {} else: config = ConfigParser.ConfigParser() config.readfp(open(os.path.join(os.getenv("HOME"), "dbcon.ini"))) self.url = config.get("couchdb", "url") self.user = config.get("couchdb", "username") self.pw = config.get("couchdb", "password") self.demuxstats = config.get("statusdb", "demuxstats") self.sample_kw = { "path": config.get("statusdb", "fcdir"), "flowcell": config.get("statusdb", "fc_name"), "date": config.get("statusdb", "date"), "lane": config.get("statusdb", "lane"), "barcode_name": config.get("statusdb", "name"), "sample_prj": config.get("statusdb", "project"), "barcode_id": config.get("statusdb", "barcode_id"), "sequence": config.get("statusdb", "sequence") } self.fc_kw = { "path": config.get("statusdb", "fcdir"), "fc_date": config.get("statusdb", "date"), "fc_name": config.get("statusdb", "fc_name") } self.examples = { "sample": config.get("examples", "sample"), "flowcell": config.get("examples", "flowcell"), "project": config.get("examples", "project") } self.fcrm = FlowcellRunMetrics(**self.fc_kw) self.srm = SampleRunMetrics(**self.sample_kw)
def _collect_pre_casava_qc(self): qc_objects = [] runinfo_yaml = os.path.join(os.path.abspath(self.pargs.flowcell), "run_info.yaml") try: with open(runinfo_yaml) as fh: runinfo = yaml.load(fh) except IOError as e: self.app.log.warn(str(e)) raise e fcdir = os.path.abspath(self.pargs.flowcell) (fc_date, fc_name) = self._fc_parts() ## Check modification time if modified_within_days(fcdir, self.pargs.mtime): fc_kw = dict(path=fcdir, fc_date = fc_date, fc_name=fc_name) fcobj = FlowcellRunMetrics(**fc_kw) fcobj.parse_illumina_metrics(fullRTA=False) fcobj.parse_bc_metrics() fcobj.parse_filter_metrics() if not fcobj.parse_samplesheet_csv(): fcobj.parse_run_info_yaml() qc_objects.append(fcobj) else: return qc_objects for info in runinfo: if not info.get("multiplex", None): self.app.log.warn("No multiplex information for lane {}".format(info.get("lane"))) sample.update({k: info.get(k, None) for k in ('analysis', 'description', 'flowcell_id', 'lane')}) sample_kw = dict(path=fcdir, flowcell=fc_name, date=fc_date, lane=sample.get('lane', None), barcode_name=sample.get('name', None), sample_prj=sample.get('sample_prj', None), barcode_id=sample.get('barcode_id', None), sequence=sample.get('sequence', "NoIndex")) for sample in info["multiplex"]: sample.update({k: info.get(k, None) for k in ('analysis', 'description', 'flowcell_id', 'lane')}) sample_kw = dict(path=fcdir, flowcell=fc_name, date=fc_date, lane=sample['lane'], barcode_name=sample['name'], sample_prj=sample.get('sample_prj', None), barcode_id=sample['barcode_id'], sequence=sample.get('sequence', "NoIndex")) obj = SampleRunMetrics(**sample_kw) obj.read_picard_metrics() obj.parse_fastq_screen() obj.parse_bc_metrics() obj.read_fastqc_metrics() qc_objects.append(obj) return qc_objects