Example #1
0
 def setUp(self):
     if not os.path.exists(os.path.join(os.getenv("HOME"), "dbcon.ini")):
         self.url = None
         self.user = None
         self.pw = None
         self.examples = {}
     else:
         config = ConfigParser.ConfigParser()
         config.readfp(open(os.path.join(os.getenv("HOME"), "dbcon.ini")))
         self.url = config.get("couchdb", "url")
         self.user = config.get("couchdb", "username")
         self.pw = config.get("couchdb", "password")
         self.demuxstats = config.get("statusdb", "demuxstats")
         self.sample_kw = {
             "path": config.get("statusdb", "fcdir"),
             "flowcell": config.get("statusdb", "fc_name"),
             "date": config.get("statusdb", "date"),
             "lane": config.get("statusdb", "lane"),
             "barcode_name": config.get("statusdb", "name"),
             "sample_prj": config.get("statusdb", "project"),
             "barcode_id": config.get("statusdb", "barcode_id"),
             "sequence": config.get("statusdb", "sequence")
         }
         self.fc_kw = {
             "path": config.get("statusdb", "fcdir"),
             "fc_date": config.get("statusdb", "date"),
             "fc_name": config.get("statusdb", "fc_name")
         }
         self.examples = {
             "sample": config.get("examples", "sample"),
             "flowcell": config.get("examples", "flowcell"),
             "project": config.get("examples", "project")
         }
         self.fcrm = FlowcellRunMetrics(**self.fc_kw)
         self.srm = SampleRunMetrics(**self.sample_kw)
Example #2
0
    def _collect_casava_qc(self):
        qc_objects = []
        runinfo_csv = os.path.join(os.path.abspath(self.pargs.flowcell), "{}.csv".format(self._fc_id()))
        try:
            with open(runinfo_csv) as fh:
                runinfo_reader = csv.reader(fh)
                runinfo = [x for x in runinfo_reader]
        except IOError as e:
            self.app.log.warn(str(e))
            raise e
        fcdir = os.path.join(os.path.abspath(self.pargs.analysis), self.pargs.flowcell)
        (fc_date, fc_name) = self._fc_parts()
        ## Check modification time
        if modified_within_days(fcdir, self.pargs.mtime):
            fc_kw = dict(path=fcdir, fc_date = fc_date, fc_name=fc_name)
            fcobj = FlowcellRunMetrics(**fc_kw)
            fcobj.parse_illumina_metrics(fullRTA=False)
            fcobj.parse_bc_metrics()
            fcobj.parse_demultiplex_stats_htm()
            fcobj.parse_samplesheet_csv()
            qc_objects.append(fcobj)

        for sample in runinfo[1:]:
            d = dict(zip(runinfo[0], sample))
            if self.app.pargs.project and self.app.pargs.project != d['SampleProject']:
                continue
            if self.app.pargs.sample and self.app.pargs.sample != d['SampleID']:
                continue
                
            sampledir = os.path.join(os.path.abspath(self.pargs.analysis), d['SampleProject'].replace("__", "."), d['SampleID'])
            if not os.path.exists(sampledir):
                self.app.log.warn("No such sample directory: {}".format(sampledir))
                continue
            sample_fcdir = os.path.join(sampledir, self._fc_fullname())
            if not os.path.exists(sample_fcdir):
                self.app.log.warn("No such sample flowcell directory: {}".format(sample_fcdir))
                continue
            if not modified_within_days(sample_fcdir, self.pargs.mtime):
                continue
            runinfo_yaml_file = os.path.join(sample_fcdir, "{}-bcbb-config.yaml".format(d['SampleID']))
            if not os.path.exists(runinfo_yaml_file):
                self.app.log.warn("No such yaml file for sample: {}".format(runinfo_yaml_file))
                raise IOError(2, "No such yaml file for sample: {}".format(runinfo_yaml_file), runinfo_yaml_file)
            with open(runinfo_yaml_file) as fh:
                runinfo_yaml = yaml.load(fh)
            if not runinfo_yaml['details'][0].get("multiplex", None):
                self.app.log.warn("No multiplex information for sample {}".format(d['SampleID']))
                continue
            sample_kw = dict(path=sample_fcdir, flowcell=fc_name, date=fc_date, lane=d['Lane'], barcode_name=d['SampleID'], sample_prj=d['SampleProject'].replace("__", "."), barcode_id=runinfo_yaml['details'][0]['multiplex'][0]['barcode_id'], sequence=runinfo_yaml['details'][0]['multiplex'][0]['sequence'])
            obj = SampleRunMetrics(**sample_kw)
            obj.read_picard_metrics()
            obj.parse_fastq_screen()
            obj.parse_bc_metrics()
            obj.read_fastqc_metrics()
            qc_objects.append(obj)
        return qc_objects
Example #3
0
 def setUp(self):
     if not os.path.exists(os.path.join(os.getenv("HOME"), "dbcon.ini")):
         self.url = None
         self.user = None
         self.pw = None
         self.examples = {}
     else:
         config = ConfigParser.ConfigParser()
         config.readfp(open(os.path.join(os.getenv("HOME"), "dbcon.ini")))
         self.url = config.get("couchdb", "url")
         self.user = config.get("couchdb", "username")
         self.pw = config.get("couchdb", "password")
         self.demuxstats = config.get("statusdb", "demuxstats")
         self.sample_kw = {"path":config.get("statusdb", "fcdir"),
                          "flowcell":config.get("statusdb", "fc_name"),
                          "date":config.get("statusdb", "date"),
                          "lane":config.get("statusdb", "lane"),
                          "barcode_name":config.get("statusdb", "name"),
                          "sample_prj":config.get("statusdb", "project"),
                          "barcode_id":config.get("statusdb", "barcode_id"),
                          "sequence":config.get("statusdb", "sequence")}
         self.fc_kw = {"path":config.get("statusdb", "fcdir"),
                       "fc_date":config.get("statusdb", "date"),
                       "fc_name":config.get("statusdb", "fc_name")}
         self.examples = {"sample":config.get("examples", "sample"),
                          "flowcell":config.get("examples", "flowcell"),
                          "project":config.get("examples", "project")}
         self.fcrm = FlowcellRunMetrics(**self.fc_kw)
         self.srm  = SampleRunMetrics(**self.sample_kw)
Example #4
0
class TestQCUpload(unittest.TestCase):
    def setUp(self):
        if not os.path.exists(os.path.join(os.getenv("HOME"), "dbcon.ini")):
            self.url = None
            self.user = None
            self.pw = None
            self.examples = {}
        else:
            config = ConfigParser.ConfigParser()
            config.readfp(open(os.path.join(os.getenv("HOME"), "dbcon.ini")))
            self.url = config.get("couchdb", "url")
            self.user = config.get("couchdb", "username")
            self.pw = config.get("couchdb", "password")
            self.demuxstats = config.get("statusdb", "demuxstats")
            self.sample_kw = {
                "path": config.get("statusdb", "fcdir"),
                "flowcell": config.get("statusdb", "fc_name"),
                "date": config.get("statusdb", "date"),
                "lane": config.get("statusdb", "lane"),
                "barcode_name": config.get("statusdb", "name"),
                "sample_prj": config.get("statusdb", "project"),
                "barcode_id": config.get("statusdb", "barcode_id"),
                "sequence": config.get("statusdb", "sequence")
            }
            self.fc_kw = {
                "path": config.get("statusdb", "fcdir"),
                "fc_date": config.get("statusdb", "date"),
                "fc_name": config.get("statusdb", "fc_name")
            }
            self.examples = {
                "sample": config.get("examples", "sample"),
                "flowcell": config.get("examples", "flowcell"),
                "project": config.get("examples", "project")
            }
            self.fcrm = FlowcellRunMetrics(**self.fc_kw)
            self.srm = SampleRunMetrics(**self.sample_kw)

    def test_1_demuxstats(self):
        metrics = self.fcrm.parse_demultiplex_stats_htm()
        print metrics["Barcode_lane_statistics"][0]

    def test_2_map_srmseqid_to_srmid(self):
        """Map srm seq id names to srm ids"""
        sample_con = SampleRunMetricsConnection(username=self.user,
                                                password=self.pw,
                                                url=self.url)
        sample_map = {}
        for k in sample_con.db:
            obj = sample_con.db.get(k)
            sample_seq_id = "{}_{}_{}_{}".format(
                obj.get("lane"), obj.get("date"), obj.get("flowcell"),
                obj.get("sequence", "NoIndex"))
            if not sample_seq_id in sample_map.keys():
                sample_map[sample_seq_id] = [k]
            else:
                print "WARNING: duplicate for {}".format(sample_seq_id)
                sample_map[sample_seq_id].append(k)
        for k, v in sample_map.items():
            if len(v) > 1:
                print k, v
Example #5
0
class TestQCUpload(unittest.TestCase):
    def setUp(self):
        if not os.path.exists(os.path.join(os.getenv("HOME"), "dbcon.ini")):
            self.url = None
            self.user = None
            self.pw = None
            self.examples = {}
        else:
            config = ConfigParser.ConfigParser()
            config.readfp(open(os.path.join(os.getenv("HOME"), "dbcon.ini")))
            self.url = config.get("couchdb", "url")
            self.user = config.get("couchdb", "username")
            self.pw = config.get("couchdb", "password")
            self.demuxstats = config.get("statusdb", "demuxstats")
            self.sample_kw = {"path":config.get("statusdb", "fcdir"),
                             "flowcell":config.get("statusdb", "fc_name"),
                             "date":config.get("statusdb", "date"),
                             "lane":config.get("statusdb", "lane"),
                             "barcode_name":config.get("statusdb", "name"),
                             "sample_prj":config.get("statusdb", "project"),
                             "barcode_id":config.get("statusdb", "barcode_id"),
                             "sequence":config.get("statusdb", "sequence")}
            self.fc_kw = {"path":config.get("statusdb", "fcdir"),
                          "fc_date":config.get("statusdb", "date"),
                          "fc_name":config.get("statusdb", "fc_name")}
            self.examples = {"sample":config.get("examples", "sample"),
                             "flowcell":config.get("examples", "flowcell"),
                             "project":config.get("examples", "project")}
            self.fcrm = FlowcellRunMetrics(**self.fc_kw)
            self.srm  = SampleRunMetrics(**self.sample_kw)

    def test_1_demuxstats(self):
        metrics = self.fcrm.parse_demultiplex_stats_htm()
        print metrics["Barcode_lane_statistics"][0]

    def test_2_map_srmseqid_to_srmid(self):
        """Map srm seq id names to srm ids"""
        sample_con = SampleRunMetricsConnection(username=self.user, password=self.pw, url=self.url)
        sample_map = {}
        for k in sample_con.db:
            obj = sample_con.db.get(k)
            sample_seq_id = "{}_{}_{}_{}".format(obj.get("lane"), obj.get("date"), obj.get("flowcell"), obj.get("sequence", "NoIndex"))
            if not sample_seq_id in sample_map.keys():
                sample_map[sample_seq_id] = [k]
            else:
                print "WARNING: duplicate for {}".format(sample_seq_id)
                sample_map[sample_seq_id].append(k)
        for k,v in sample_map.items():
            if len(v) > 1:
                print k, v
Example #6
0
 def _collect_pre_casava_qc(self):
     qc_objects = []
     runinfo_yaml = os.path.join(os.path.abspath(self.pargs.flowcell), "run_info.yaml")
     try:
         with open(runinfo_yaml) as fh:
             runinfo = yaml.load(fh)
     except IOError as e:
         self.app.log.warn(str(e))
         raise e
     fcdir = os.path.abspath(self.pargs.flowcell)
     (fc_date, fc_name) = self._fc_parts()
     ## Check modification time
     if modified_within_days(fcdir, self.pargs.mtime):
         fc_kw = dict(path=fcdir, fc_date = fc_date, fc_name=fc_name)
         fcobj = FlowcellRunMetrics(**fc_kw)
         fcobj.parse_illumina_metrics(fullRTA=False)
         fcobj.parse_bc_metrics()
         fcobj.parse_filter_metrics()
         if not fcobj.parse_samplesheet_csv():
             fcobj.parse_run_info_yaml()
         qc_objects.append(fcobj)
     else:
         return qc_objects
     for info in runinfo:
         if not info.get("multiplex", None):
             self.app.log.warn("No multiplex information for lane {}".format(info.get("lane")))
             sample.update({k: info.get(k, None) for k in ('analysis', 'description', 'flowcell_id', 'lane')})
             sample_kw = dict(path=fcdir, flowcell=fc_name, date=fc_date, lane=sample.get('lane', None), barcode_name=sample.get('name', None), sample_prj=sample.get('sample_prj', None),
                              barcode_id=sample.get('barcode_id', None), sequence=sample.get('sequence', "NoIndex"))
         for sample in info["multiplex"]:
             sample.update({k: info.get(k, None) for k in ('analysis', 'description', 'flowcell_id', 'lane')})
             sample_kw = dict(path=fcdir, flowcell=fc_name, date=fc_date, lane=sample['lane'], barcode_name=sample['name'], sample_prj=sample.get('sample_prj', None),
                              barcode_id=sample['barcode_id'], sequence=sample.get('sequence', "NoIndex"))
             obj = SampleRunMetrics(**sample_kw)
             obj.read_picard_metrics()
             obj.parse_fastq_screen()
             obj.parse_bc_metrics()
             obj.read_fastqc_metrics()
             qc_objects.append(obj)
     return qc_objects