Example #1
0
 def test_get_bc_count_demux_stats(self):
     parser = SampleRunMetricsParser(os.path.join(project_dir, "J.Doe_00_01", "P001_101_index3", "120924_AC003CCCXX"))
     bc_count = parser.get_bc_count(**self.sample_kw)
     fc_parser = FlowcellRunMetricsParser(self.fcdir)
     data = fc_parser.parse_demultiplex_stats_htm(**self.fc_kw)
     bc_count = parser.get_bc_count(demultiplex_stats=data, **self.sample_kw)
     self.assertEqual(str(bc_count), str(19517198))
Example #2
0
    def _collect_casava_qc(self):
        qc_objects = []
        read_setup = None
        demux_stats = None

        fcdir = os.path.join(os.path.abspath(self._meta.root_path),
                             self.pargs.flowcell)

        # Get the fc_name, fc_date from RunInfo
        parser = FlowcellRunMetricsParser(fcdir)
        runinfo_xml = parser.parseRunInfo()
        runparams = parser.parseRunParameters()
        fc_date = runinfo_xml.get('Date', None)
        fc_name = runinfo_xml.get('Flowcell', None)
        fc_pos = runparams.get('FCPosition', '')
        runinfo_csv = os.path.join(
            os.path.join(self._meta.root_path, self.pargs.flowcell),
            "{}.csv".format(fc_name))
        if not os.path.exists(runinfo_csv):
            LOG.warn("No such file {}: trying fallback SampleSheet.csv".format(
                runinfo_csv))
            runinfo_csv = os.path.join(
                os.path.join(self._meta.root_path, self.pargs.flowcell),
                "SampleSheet.csv")
        runinfo = parser.parse_samplesheet_csv(runinfo_csv=runinfo_csv)

        if modified_within_days(fcdir, self.pargs.mtime):
            # Most of the code expects to have the flowcell position pre-pended to the flowcell id
            fc_kw = dict(fc_date=fc_date,
                         fc_name="{}{}".format(fc_pos, fc_name))
            fcobj = FlowcellRunMetricsDocument(**fc_kw)
            fcobj["RunInfo"] = runinfo_xml
            fcobj["RunParameters"] = runparams
            fcobj["DemultiplexConfig"] = parser.parseDemultiplexConfig(**fc_kw)
            fcobj["illumina"] = parser.parse_illumina_metrics(fullRTA=False,
                                                              **fc_kw)
            fcobj["bc_metrics"] = parser.parse_bc_metrics(**fc_kw)
            fcobj[
                "undemultiplexed_barcodes"] = parser.parse_undemultiplexed_barcode_metrics(
                    **fc_kw)
            fcobj["illumina"].update({
                "Demultiplex_Stats":
                parser.parse_demultiplex_stats_htm(**fc_kw)
            })
            fcobj["samplesheet_csv"] = runinfo
            read_setup = fcobj["RunInfo"].get('Reads', [])
            fcobj["run_setup"] = self._run_setup(read_setup)
            demux_stats = fcobj["illumina"]["Demultiplex_Stats"]
            qc_objects.append(fcobj)
        qc_objects = self._parse_samplesheet(runinfo,
                                             qc_objects,
                                             fc_date,
                                             "{}{}".format(fc_pos, fc_name),
                                             fcdir,
                                             demultiplex_stats=demux_stats,
                                             setup=read_setup)
        return qc_objects
Example #3
0
 def test_parseRunParameters(self):
     parser = FlowcellRunMetricsParser(self.fcdir)
     data = parser.parseRunParameters(**self.fc_kw)
     self.assertEqual(
         data['Setup']['FPGADynamicFocusSettings']['CVGainPosLocked'],
         '500')
     self.assertEqual(data['Setup']['Reads']['Read'][0]['NumCycles'], '101')
     self.assertEqual(
         data['Setup']['SelectedSections']['Section'][1]['Name'], 'B_1')
Example #4
0
 def __init__(self, run_dir, samplesheet=None):
     self._run_dir = os.path.normpath(run_dir)
     assert os.path.exists(self._run_dir), "The path %s is invalid" % self._run_dir
     
     # Parse the run parameters
     parser = FlowcellRunMetricsParser(self._run_dir)
     self.run_config = parser.parseRunParameters()
     self.run_info = parser.parseRunInfo()
     
     self.samplesheet_file = samplesheet or IlluminaRun.get_samplesheet(self._run_dir)
Example #5
0
 def test_get_bc_count_demux_stats(self):
     parser = SampleRunMetricsParser(
         os.path.join(project_dir, "J.Doe_00_01", "P001_101_index3",
                      "120924_AC003CCCXX"))
     bc_count = parser.get_bc_count(**self.sample_kw)
     fc_parser = FlowcellRunMetricsParser(self.fcdir)
     data = fc_parser.parse_demultiplex_stats_htm(**self.fc_kw)
     bc_count = parser.get_bc_count(demultiplex_stats=data,
                                    **self.sample_kw)
     self.assertEqual(str(bc_count), str(19517198))
Example #6
0
 def _collect_casava_qc(self):
     qc_objects = []
     runinfo_csv = os.path.join(os.path.join(self._meta.root_path, self.pargs.flowcell), "{}.csv".format(fc_id(self.pargs.flowcell)))
     if not os.path.exists(runinfo_csv):
         LOG.warn("No such file {}: trying fallback SampleSheet.csv".format(runinfo_csv))
         runinfo_csv = os.path.join(os.path.join(self._meta.root_path, self.pargs.flowcell), "SampleSheet.csv")
     try:
         with open(runinfo_csv) as fh:
             runinfo_reader = csv.reader(fh)
             runinfo = [x for x in runinfo_reader]
     except IOError as e:
         self.app.log.warn(str(e))
         raise e
     fcdir = os.path.join(os.path.abspath(self._meta.root_path), self.pargs.flowcell)
     (fc_date, fc_name) = fc_parts(self.pargs.flowcell)
     ## Check modification time
     demux_stats = None
     if modified_within_days(fcdir, self.pargs.mtime):
         fc_kw = dict(fc_date = fc_date, fc_name=fc_name)
         parser = FlowcellRunMetricsParser(fcdir)
         fcobj = FlowcellRunMetricsDocument(fc_date, fc_name)
         fcobj["RunInfo"] = parser.parseRunInfo(**fc_kw)
         fcobj["RunParameters"] = parser.parseRunParameters(**fc_kw)
         fcobj["illumina"] = parser.parse_illumina_metrics(fullRTA=False, **fc_kw)
         fcobj["bc_metrics"] = parser.parse_bc_metrics(**fc_kw)
         fcobj["undemultiplexed_barcodes"] = parser.parse_undemultiplexed_barcode_metrics(**fc_kw)
         fcobj["illumina"].update({"Demultiplex_Stats" : parser.parse_demultiplex_stats_htm(**fc_kw)})
         fcobj["samplesheet_csv"] = parser.parse_samplesheet_csv(runinfo_csv=runinfo_csv, **fc_kw)
         demux_stats = fcobj["illumina"]["Demultiplex_Stats"]
         qc_objects.append(fcobj)
     qc_objects = self._parse_samplesheet(runinfo, qc_objects, fc_date, fc_name, fcdir, demultiplex_stats=demux_stats)
     return qc_objects
Example #7
0
 def __init__(self, run_dir):
     self._run_dir = os.path.normpath(run_dir)
     assert os.path.exists(self._run_dir), "The path %s is invalid" % self._run_dir
     ss_file = self._find_samplesheet()
     if ss_file is not None:
         samplesheet = MiSeqSampleSheet(ss_file)
         self.samplesheet = samplesheet
     
     parser = FlowcellRunMetricsParser(self._run_dir)
     self.run_config = parser.parseRunParameters()
     self._fastq = self._fastq_files()
Example #8
0
    def __init__(self, run_dir, samplesheet=None):
        self._run_dir = os.path.normpath(run_dir)
        assert os.path.exists(
            self._run_dir), "The path %s is invalid" % self._run_dir

        # Parse the run parameters
        parser = FlowcellRunMetricsParser(self._run_dir)
        self.run_config = parser.parseRunParameters()
        self.run_info = parser.parseRunInfo()

        self.samplesheet_file = samplesheet or IlluminaRun.get_samplesheet(
            self._run_dir)
Example #9
0
    def __init__(self, run_dir):
        self._run_dir = os.path.normpath(run_dir)
        assert os.path.exists(
            self._run_dir), "The path %s is invalid" % self._run_dir
        ss_file = self._find_samplesheet()
        if ss_file is not None:
            samplesheet = MiSeqSampleSheet(ss_file)
            self.samplesheet = samplesheet

        parser = FlowcellRunMetricsParser(self._run_dir)
        self.run_config = parser.parseRunParameters()
        self._fastq = self._fastq_files()
Example #10
0
 def _collect_casava_qc(self):
     qc_objects = []
     runinfo_csv = os.path.join(os.path.join(self._meta.root_path, self.pargs.flowcell), "{}.csv".format(fc_id(self.pargs.flowcell)))
     if not os.path.exists(runinfo_csv):
         LOG.warn("No such file {}: trying fallback SampleSheet.csv".format(runinfo_csv))
         runinfo_csv = os.path.join(os.path.join(self._meta.root_path, self.pargs.flowcell), "SampleSheet.csv")
     try:
         with open(runinfo_csv) as fh:
             runinfo_reader = csv.reader(fh)
             runinfo = [x for x in runinfo_reader]
     except IOError as e:
         self.app.log.warn(str(e))
         raise e
     fcdir = os.path.join(os.path.abspath(self._meta.root_path), self.pargs.flowcell)
     (fc_date, fc_name) = fc_parts(self.pargs.flowcell)
     ## Check modification time
     if modified_within_days(fcdir, self.pargs.mtime):
         fc_kw = dict(fc_date = fc_date, fc_name=fc_name)
         parser = FlowcellRunMetricsParser(fcdir)
         fcobj = FlowcellRunMetricsDocument(fc_date, fc_name)
         fcobj["RunInfo"] = parser.parseRunInfo(**fc_kw)
         fcobj["RunParameters"] = parser.parseRunParameters(**fc_kw)
         fcobj["illumina"] = parser.parse_illumina_metrics(fullRTA=False, **fc_kw)
         fcobj["bc_metrics"] = parser.parse_bc_metrics(**fc_kw)
         fcobj["undemultiplexed_barcodes"] = parser.parse_undemultiplexed_barcode_metrics(**fc_kw)
         fcobj["illumina"].update({"Demultiplex_Stats" : parser.parse_demultiplex_stats_htm(**fc_kw)})
         fcobj["samplesheet_csv"] = parser.parse_samplesheet_csv(runinfo_csv=runinfo_csv, **fc_kw)
         qc_objects.append(fcobj)
     qc_objects = self._parse_samplesheet(runinfo, qc_objects, fc_date, fc_name, fcdir, demultiplex_stats=fcobj["illumina"]["Demultiplex_Stats"])
     return qc_objects
Example #11
0
 def _collect_pre_casava_qc(self):
     qc_objects = []
     as_yaml = False
     runinfo_csv = os.path.join(
         os.path.join(self._meta.root_path, self.pargs.flowcell),
         "{}.csv".format(fc_id(self.pargs.flowcell)))
     if not os.path.exists(runinfo_csv):
         LOG.warn("No such file {}: trying fallback SampleSheet.csv".format(
             runinfo_csv))
         runinfo_csv = os.path.join(
             os.path.join(self._meta.root_path, self.pargs.flowcell),
             "SampleSheet.csv")
     runinfo_yaml = os.path.join(os.path.abspath(self.pargs.flowcell),
                                 "run_info.yaml")
     try:
         if os.path.exists(runinfo_csv):
             with open(runinfo_csv) as fh:
                 runinfo_reader = csv.reader(fh)
                 runinfo = [x for x in runinfo_reader]
         else:
             as_yaml = True
             with open(runinfo_yaml) as fh:
                 runinfo = yaml.load(fh)
     except IOError as e:
         self.app.log.warn(str(e))
         raise e
     fcdir = os.path.abspath(self.pargs.flowcell)
     (fc_date, fc_name) = fc_parts(self.pargs.flowcell)
     ## Check modification time
     if modified_within_days(fcdir, self.pargs.mtime):
         fc_kw = dict(fc_date=fc_date, fc_name=fc_name)
         parser = FlowcellRunMetricsParser(fcdir)
         fcobj = FlowcellRunMetricsDocument(**fc_kw)
         fcobj["RunInfo"] = parser.parseRunInfo(**fc_kw)
         fcobj["RunParameters"] = parser.parseRunParameters(**fc_kw)
         fcobj["illumina"] = parser.parse_illumina_metrics(fullRTA=False,
                                                           **fc_kw)
         fcobj["bc_metrics"] = parser.parse_bc_metrics(**fc_kw)
         fcobj["filter_metrics"] = parser.parse_filter_metrics(**fc_kw)
         fcobj["samplesheet_csv"] = parser.parse_samplesheet_csv(
             runinfo_csv=runinfo_csv, **fc_kw)
         fcobj["run_info_yaml"] = parser.parse_run_info_yaml(**fc_kw)
         qc_objects.append(fcobj)
     else:
         return qc_objects
     qc_objects = self._parse_samplesheet(runinfo,
                                          qc_objects,
                                          fc_date,
                                          fc_name,
                                          fcdir,
                                          as_yaml=as_yaml)
     return qc_objects
Example #12
0
def collect_metrics(path, log):
    parser = FlowcellRunMetricsParser(path)
    run_info = parser.parseRunInfo()
    fcid = run_info.get('Flowcell',None)
    if fcid is None:
        log.error("Could not parse flowcell id from RunInfo.xml")
        return {}

    # Insert a dummy character as the parse method expects a flowcell position
    metrics = parser.parse_demultiplex_stats_htm(fcid)
    metrics['RunInfo'] = run_info

    # Get the undemultiplexed indexes
    undemux = parser.parse_undemultiplexed_barcode_metrics(fcid)
    metrics['Undemultiplexed'] = undemux

    return metrics
Example #13
0
def collect_metrics(path):
    parser = FlowcellRunMetricsParser(path)
    run_info = parser.parseRunInfo()
    fcid = run_info.get('Flowcell', None)
    if fcid is None:
        LOG.error("Could not parse flowcell id from RunInfo.xml")
        return {}

    # Insert a dummy character as the parse method expects a flowcell position
    metrics = parser.parse_demultiplex_stats_htm(fcid)
    metrics['RunInfo'] = run_info

    # Get the undemultiplexed indexes
    undemux = parser.parse_undemultiplexed_barcode_metrics(fcid)
    metrics['Undemultiplexed'] = undemux

    return metrics
Example #14
0
def parse_casava_directory(fc_dir):
    """Traverse a bcl2fastq v2.17 generated directory structure and return a dictionary
    """
    projects = []

    fc_dir = os.path.abspath(fc_dir)
    parser = FlowcellRunMetricsParser(fc_dir)
    run_info = parser.parseRunInfo()
    runparams = parser.parseRunParameters()

    fc_name = run_info.get('Flowcell',None)
    fc_date = run_info.get('Date',None)
    fc_pos = runparams.get('FCPosition','')
    assert fc_name is not None and fc_date is not None, "Could not parse flowcell name and flowcell date"

    unaligned_dir_pattern = os.path.join(fc_dir,"{}".format(CASAVA_OUTPUT_DIR))
    basecall_stats_dir_pattern = os.path.join(unaligned_dir_pattern,"Basecall_Stats_*")
    basecall_stats_dir = [os.path.relpath(d,fc_dir) for d in glob.glob(basecall_stats_dir_pattern)]

    project_dir_pattern = os.path.join(unaligned_dir_pattern,"*__*_*_*")
    data=read_ssheet_csv(fc_dir)

    for project_dir in glob.glob(project_dir_pattern):
        project_samples = []
        sample_dir_pattern = os.path.join(project_dir,"Sample_*")
        for sample_dir in glob.glob(sample_dir_pattern):
            fastq_file_pattern = os.path.join(sample_dir,"*.fastq.gz")
            fastq_files = [os.path.basename(file) for file in glob.glob(fastq_file_pattern)]
            sample_name = os.path.basename(sample_dir).replace("Sample_","").replace('__','.')
            samplesheet_pattern = os.path.join(sample_dir, "SampleSheet.csv")
            if not os.path.exists(samplesheet_pattern):
                write_samplesheet(samplesheet_pattern,data,sample_name,fc_name,fc_dir)
            samplesheet = glob.glob(samplesheet_pattern)
            project_samples.append({'sample_dir': os.path.basename(sample_dir),
                                    'sample_name': sample_name,
                                    'files': fastq_files,
                                    'samplesheet': os.path.basename(samplesheet[0])})
        project_name = os.path.basename(project_dir).replace('__','.')
        projects.append({'data_dir': os.path.relpath(os.path.dirname(project_dir),fc_dir),
                         'project_dir': os.path.basename(project_dir),
                         'project_name': project_name,
                         'samples': project_samples})

    return {'fc_dir': fc_dir, 'fc_name': '{}{}'.format(fc_pos,fc_name), 'fc_date': fc_date, 'basecall_stats_dir': basecall_stats_dir, 'projects': projects}
Example #15
0
 def _collect_pre_casava_qc(self):
     qc_objects = []
     as_yaml = False
     read_setup = None
     
     fcdir = os.path.abspath(self.pargs.flowcell)
     
     ## Check modification time
     if not modified_within_days(fcdir, self.pargs.mtime):
         return qc_objects
     
     # Get the fc_name, fc_date from RunInfo    
     parser = FlowcellRunMetricsParser(fcdir)
     runinfo_xml = parser.parseRunInfo()
     runparams = parser.parseRunParameters()
     fc_date = runinfo_xml.get('Date',None)
     fc_name = runinfo_xml.get('Flowcell',None)
     fc_pos = runparams.get('FCPosition','')
     
     runinfo_csv = os.path.join(os.path.join(self._meta.root_path, self.pargs.flowcell), "{}.csv".format(fc_name))
     if not os.path.exists(runinfo_csv):
         LOG.warn("No such file {}: trying fallback SampleSheet.csv".format(runinfo_csv))
         runinfo_csv = os.path.join(os.path.join(self._meta.root_path, self.pargs.flowcell), "SampleSheet.csv")
     runinfo = parser.parse_samplesheet_csv(runinfo_csv=runinfo_csv)
     if len(runinfo) == 0:
         runinfo_yaml = os.path.join(os.path.abspath(self.pargs.flowcell), "run_info.yaml")
         as_yaml = True
         try:
             with open(runinfo_yaml) as fh:
                 runinfo = yaml.load(fh)
         except IOError as e:
             self.app.log.warn(str(e))
             raise e
     
     # Most of the code expects to have the flowcell position pre-pended to the flowcell id
     fc_kw = dict(fc_date = fc_date, fc_name="{}{}".format(fc_pos,fc_name))
     fcobj = FlowcellRunMetricsDocument(**fc_kw)
     fcobj["RunInfo"] = runinfo_xml
     fcobj["RunParameters"] = runparams
     fcobj["illumina"] = parser.parse_illumina_metrics(fullRTA=False, **fc_kw)
     fcobj["bc_metrics"] = parser.parse_bc_metrics(**fc_kw)
     fcobj["filter_metrics"] = parser.parse_filter_metrics(**fc_kw)
     fcobj["samplesheet_csv"] = runinfo
     fcobj["run_info_yaml"] = parser.parse_run_info_yaml(**fc_kw)
     read_setup = fcobj["RunInfo"].get('Reads',[])
     fcobj["run_setup"] = self._run_setup(read_setup)
     qc_objects.append(fcobj)
     qc_objects = self._parse_samplesheet(runinfo, qc_objects, fc_date, "{}{}".format(fc_pos,fc_name), fcdir, as_yaml=as_yaml, setup=read_setup)
     return qc_objects
Example #16
0
def parse_casava_directory(fc_dir):
    """Traverse a CASAVA 1.8+ generated directory structure and return a dictionary
    """ 
    projects = []
    
    fc_dir = os.path.abspath(fc_dir)
    parser = FlowcellRunMetricsParser(fc_dir)
    run_info = parser.parseRunInfo()
    runparams = parser.parseRunParameters()
        
    fc_name = run_info.get('Flowcell',None)
    fc_date = run_info.get('Date',None)
    fc_pos = runparams.get('FCPosition','')
    assert fc_name is not None and fc_date is not None, "Could not parse flowcell name and flowcell date"
    
    unaligned_dir_pattern = os.path.join(fc_dir,"{}*".format(CASAVA_OUTPUT_DIR))
    basecall_stats_dir_pattern = os.path.join(unaligned_dir_pattern,"Basecall_Stats_*")
    basecall_stats_dir = [os.path.relpath(d,fc_dir) for d in glob.glob(basecall_stats_dir_pattern)]
    
    project_dir_pattern = os.path.join(unaligned_dir_pattern,"Project_*")
    for project_dir in glob.glob(project_dir_pattern):
        project_samples = []
        sample_dir_pattern = os.path.join(project_dir,"Sample_*")
        for sample_dir in glob.glob(sample_dir_pattern):
            fastq_file_pattern = os.path.join(sample_dir,"*.fastq.gz")
            samplesheet_pattern = os.path.join(sample_dir,"*.csv")
            fastq_files = [os.path.basename(file) for file in glob.glob(fastq_file_pattern)]
            samplesheet = glob.glob(samplesheet_pattern)
            assert len(samplesheet) == 1, "ERROR: Could not unambiguously locate samplesheet in %s" % sample_dir
            sample_name = os.path.basename(sample_dir).replace("Sample_","").replace('__','.')
            project_samples.append({'sample_dir': os.path.basename(sample_dir), 
                                    'sample_name': sample_name, 
                                    'files': fastq_files, 
                                    'samplesheet': os.path.basename(samplesheet[0])})
        project_name = os.path.basename(project_dir).replace("Project_","").replace('__','.')
        projects.append({'data_dir': os.path.relpath(os.path.dirname(project_dir),fc_dir), 
                         'project_dir': os.path.basename(project_dir), 
                         'project_name': project_name, 
                         'samples': project_samples})
    
    return {'fc_dir': fc_dir, 'fc_name': '{}{}'.format(fc_pos,fc_name), 'fc_date': fc_date, 'basecall_stats_dir': basecall_stats_dir, 'projects': projects}
Example #17
0
 def _collect_pre_casava_qc(self):
     qc_objects = []
     as_yaml = False
     read_setup = None
     
     fcdir = os.path.abspath(self.pargs.flowcell)
     
     ## Check modification time
     if not modified_within_days(fcdir, self.pargs.mtime):
         return qc_objects
     
     # Get the fc_name, fc_date from RunInfo    
     parser = FlowcellRunMetricsParser(fcdir)
     runinfo_xml = parser.parseRunInfo()
     runparams = parser.parseRunParameters()
     fc_date = runinfo_xml.get('Date',None)
     fc_name = runinfo_xml.get('Flowcell',None)
     fc_pos = runparams.get('FCPosition','')
     
     runinfo_csv = os.path.join(os.path.join(self._meta.root_path, self.pargs.flowcell), "{}.csv".format(fc_name))
     if not os.path.exists(runinfo_csv):
         LOG.warn("No such file {}: trying fallback SampleSheet.csv".format(runinfo_csv))
         runinfo_csv = os.path.join(os.path.join(self._meta.root_path, self.pargs.flowcell), "SampleSheet.csv")
     runinfo = parser.parse_samplesheet_csv(runinfo_csv=runinfo_csv)
     if len(runinfo) == 0:
         runinfo_yaml = os.path.join(os.path.abspath(self.pargs.flowcell), "run_info.yaml")
         as_yaml = True
         try:
             with open(runinfo_yaml) as fh:
                 runinfo = yaml.load(fh)
         except IOError as e:
             self.app.log.warn(str(e))
             raise e
     
     # Most of the code expects to have the flowcell position pre-pended to the flowcell id
     fc_kw = dict(fc_date = fc_date, fc_name="{}{}".format(fc_pos,fc_name))
     fcobj = FlowcellRunMetricsDocument(**fc_kw)
     fcobj["RunInfo"] = runinfo_xml
     fcobj["RunParameters"] = runparams
     fcobj["illumina"] = parser.parse_illumina_metrics(fullRTA=False, **fc_kw)
     fcobj["bc_metrics"] = parser.parse_bc_metrics(**fc_kw)
     fcobj["filter_metrics"] = parser.parse_filter_metrics(**fc_kw)
     fcobj["samplesheet_csv"] = runinfo
     fcobj["run_info_yaml"] = parser.parse_run_info_yaml(**fc_kw)
     read_setup = fcobj["RunInfo"].get('Reads',[])
     fcobj["run_setup"] = self._run_setup(read_setup)
     qc_objects.append(fcobj)
     qc_objects = self._parse_samplesheet(runinfo, qc_objects, fc_date, "{}{}".format(fc_pos,fc_name), fcdir, as_yaml=as_yaml, setup=read_setup)
     return qc_objects
Example #18
0
    def _collect_casava_qc(self):
        qc_objects = []
        read_setup = None
        demux_stats = None
        
        fcdir = os.path.join(os.path.abspath(self._meta.root_path), self.pargs.flowcell)
        
        # Get the fc_name, fc_date from RunInfo    
        parser = FlowcellRunMetricsParser(fcdir)
        runinfo_xml = parser.parseRunInfo()
        runparams = parser.parseRunParameters()
        fc_date = runinfo_xml.get('Date',None)
        fc_name = runinfo_xml.get('Flowcell',None)
        fc_pos = runparams.get('FCPosition','')
        runinfo_csv = os.path.join(os.path.join(self._meta.root_path, self.pargs.flowcell), "{}.csv".format(fc_name))
        if not os.path.exists(runinfo_csv):
            LOG.warn("No such file {}: trying fallback SampleSheet.csv".format(runinfo_csv))
            runinfo_csv = os.path.join(os.path.join(self._meta.root_path, self.pargs.flowcell), "SampleSheet.csv")
        runinfo = parser.parse_samplesheet_csv(runinfo_csv=runinfo_csv)

        if modified_within_days(fcdir, self.pargs.mtime):
            # Most of the code expects to have the flowcell position pre-pended to the flowcell id
            fc_kw = dict(fc_date = fc_date, fc_name="{}{}".format(fc_pos,fc_name))
            fcobj = FlowcellRunMetricsDocument(**fc_kw)
            fcobj["RunInfo"] = runinfo_xml
            fcobj["RunParameters"] = runparams
            fcobj["DemultiplexConfig"] = parser.parseDemultiplexConfig(**fc_kw)
            fcobj["illumina"] = parser.parse_illumina_metrics(fullRTA=False, **fc_kw)
            fcobj["bc_metrics"] = parser.parse_bc_metrics(**fc_kw)
            fcobj["undemultiplexed_barcodes"] = parser.parse_undemultiplexed_barcode_metrics(**fc_kw)
            fcobj["illumina"].update({"Demultiplex_Stats" : parser.parse_demultiplex_stats_htm(**fc_kw)})
            fcobj["samplesheet_csv"] = runinfo
            read_setup = fcobj["RunInfo"].get('Reads',[])
            fcobj["run_setup"] = self._run_setup(read_setup)
            demux_stats = fcobj["illumina"]["Demultiplex_Stats"]
            qc_objects.append(fcobj)
        qc_objects = self._parse_samplesheet(runinfo, qc_objects, fc_date, "{}{}".format(fc_pos,fc_name), fcdir, demultiplex_stats=demux_stats, setup=read_setup)
        return qc_objects
Example #19
0
 def _collect_pre_casava_qc(self):
     qc_objects = []
     as_yaml = False
     runinfo_csv = os.path.join(
         os.path.join(self._meta.root_path, self.pargs.flowcell), "{}.csv".format(fc_id(self.pargs.flowcell))
     )
     if not os.path.exists(runinfo_csv):
         LOG.warn("No such file {}: trying fallback SampleSheet.csv".format(runinfo_csv))
         runinfo_csv = os.path.join(os.path.join(self._meta.root_path, self.pargs.flowcell), "SampleSheet.csv")
     runinfo_yaml = os.path.join(os.path.abspath(self.pargs.flowcell), "run_info.yaml")
     try:
         if os.path.exists(runinfo_csv):
             with open(runinfo_csv) as fh:
                 runinfo_reader = csv.reader(fh)
                 runinfo = [x for x in runinfo_reader]
         else:
             as_yaml = True
             with open(runinfo_yaml) as fh:
                 runinfo = yaml.load(fh)
     except IOError as e:
         self.app.log.warn(str(e))
         raise e
     fcdir = os.path.abspath(self.pargs.flowcell)
     (fc_date, fc_name) = fc_parts(self.pargs.flowcell)
     ## Check modification time
     if modified_within_days(fcdir, self.pargs.mtime):
         fc_kw = dict(fc_date=fc_date, fc_name=fc_name)
         parser = FlowcellRunMetricsParser(fcdir)
         fcobj = FlowcellRunMetricsDocument(**fc_kw)
         fcobj["RunInfo"] = parser.parseRunInfo(**fc_kw)
         fcobj["RunParameters"] = parser.parseRunParameters(**fc_kw)
         fcobj["illumina"] = parser.parse_illumina_metrics(fullRTA=False, **fc_kw)
         fcobj["bc_metrics"] = parser.parse_bc_metrics(**fc_kw)
         fcobj["filter_metrics"] = parser.parse_filter_metrics(**fc_kw)
         fcobj["samplesheet_csv"] = parser.parse_samplesheet_csv(runinfo_csv=runinfo_csv, **fc_kw)
         fcobj["run_info_yaml"] = parser.parse_run_info_yaml(**fc_kw)
         qc_objects.append(fcobj)
     else:
         return qc_objects
     qc_objects = self._parse_samplesheet(runinfo, qc_objects, fc_date, fc_name, fcdir, as_yaml=as_yaml)
     return qc_objects
Example #20
0
 def test_parse_demux_stats(self):
     """Test parsing of a Demultiplex_Stats.htm file"""
     parser = FlowcellRunMetricsParser(self.fcdir)
     data = parser.parse_demultiplex_stats_htm(**self.fc_kw)
     self.assertEqual(data['Barcode_lane_statistics'][0]['# Reads'],
                      '39,034,396')
Example #21
0
 def test_parseRunParameters(self):
     parser = FlowcellRunMetricsParser(self.fcdir)
     data = parser.parseRunParameters(**self.fc_kw)
     self.assertEqual(data['Setup']['FPGADynamicFocusSettings']['CVGainPosLocked'],'500')
     self.assertEqual(data['Setup']['Reads']['Read'][0]['NumCycles'],'101')
     self.assertEqual(data['Setup']['SelectedSections']['Section'][1]['Name'],'B_1')
Example #22
0
 def test_parse_demux_stats(self):
     """Test parsing of a Demultiplex_Stats.htm file"""
     parser = FlowcellRunMetricsParser(self.fcdir)
     data = parser.parse_demultiplex_stats_htm(**self.fc_kw)
     self.assertEqual(data["Barcode_lane_statistics"][0]["# Reads"], "39,034,396")
Example #23
0
 def test_parseRunParameters(self):
     parser = FlowcellRunMetricsParser(self.fcdir)
     data = parser.parseRunParameters(**self.fc_kw)
     self.assertEqual(data["Setup"]["FPGADynamicFocusSettings"]["CVGainPosLocked"], "500")
     self.assertEqual(data["Setup"]["Reads"]["Read"][0]["NumCycles"], "101")
     self.assertEqual(data["Setup"]["SelectedSections"]["Section"][1]["Name"], "B_1")