def test_dbcon(self): """Test database connection and that we get expected values.""" s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******") samples = [s_con.get_entry(x) for x in s_con.name_view] samples_d = {x["name"]: x for x in samples} self.assertEqual(samples_d["1_120924_AC003CCCXX_TGACCA"]["date"], "120924") self.assertEqual(samples_d["1_121015_BB002BBBXX_TGACCA"]["flowcell"], "BB002BBBXX") self.assertEqual(samples_d["2_120924_AC003CCCXX_ACAGTG"]["entity_type"], "sample_run_metrics") self.assertEqual(samples_d["3_120924_AC003CCCXX_ACAGTG"]["lane"], "3") self.assertEqual(samples_d["4_120924_AC003CCCXX_CGTTAA"]["sequence"], "CGTTAA") self.assertEqual(samples_d["2_121015_BB002BBBXX_TGACCA"]["project_id"], "P002") fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******") flowcells = [fc_con.get_entry(x) for x in fc_con.name_view] flowcells_d = {x["name"]: x for x in flowcells} self.assertEqual(flowcells_d["120924_AC003CCCXX"]["name"], "120924_AC003CCCXX") self.assertEqual(flowcells_d["121015_BB002BBBXX"]["name"], "121015_BB002BBBXX") self.assertEqual(flowcells_d["120924_AC003CCCXX"]["entity_type"], "flowcell_run_metrics") p_con = ProjectSummaryConnection(dbname="projects-test", username="******", password="******") projects = [p_con.get_entry(x) for x in p_con.name_view] projects_d = {x["project_name"]: x for x in projects} self.assertEqual(projects_d["J.Doe_00_01"]["min_m_reads_per_sample_ordered"], 0.1) self.assertEqual(projects_d["J.Doe_00_01"]["no_of_samples"], 2) self.assertEqual( set(projects_d["J.Doe_00_01"]["samples"].keys()), set(["P001_101_index3", "P001_102", "P001_103"]) ) self.assertEqual(projects_d["J.Doe_00_01"]["customer_reference"], "GnuGenome") self.assertEqual(projects_d["J.Doe_00_02"]["min_m_reads_per_sample_ordered"], 0.2) self.assertEqual(projects_d["J.Doe_00_03"]["samples"].keys(), ["3_index6"]) self.assertIn("A", projects_d["J.Doe_00_03"]["samples"]["3_index6"]["library_prep"])
def list_projects(self): if not self._check_pargs(["flowcell"]): return url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url") if not url: self.app.log.warn("Please provide a valid url: got {}".format(url)) return if not validate_fc_directory_format(self.pargs.flowcell): self.app.log.warn( "Path '{}' does not conform to bcbio flowcell directory format; aborting".format(self.pargs.flowcell) ) return out_data = [[self.pargs.flowcell]] s = self.pargs.flowcell.split("_") fcid = "_".join([s[0], s[-1]]) self.log.debug("Establishing FlowcellRunMetricsConnection") fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get("db", "flowcells"), **vars(self.app.pargs)) self.log.debug("Establishing ProjectSummaryConnection") p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs)) self.log.debug("Fetching flowcell metric document for flowcell {}".format(fcid)) fc = fc_con.get_entry(fcid) if fc is None: self.log.warn("No flowcell metric document for flowcell {}".format(fcid)) return self.log.debug("Fetching csv samplesheet data for flowcell {}".format(fcid)) ssheet_data = self._get_samplesheet_sample_data(fc) if len(ssheet_data) == 0: self.log.warn("No csv samplesheet data for flowcell {}".format(fcid)) return self.log.debug("Fetch runParameter data for flowcell {}".format(fcid)) run_data = self._get_run_parameter_data(fc) if len(run_data) == 0: self.log.warn("No runParameter data for flowcell {}".format(fcid)) out_data = [ [self.pargs.flowcell, run_data.get("InstrumentType", "HiSeq2000"), run_data.get("RunMode", "High Output")] ] # Extract the project names projects = set([proj[0].replace("__", ".") for data in ssheet_data.values() for proj in data.values()]) # Extract application for each project for project in projects: self.log.debug("Fetching project data document for project {}".format(project)) pdoc = p_con.get_entry(project) if pdoc is None: self.log.warn("No project data document for project {}".format(project)) pdoc = {} application = pdoc.get("application", "N/A") out_data.append([project, application]) self.app._output_data["stdout"].write("\n".join(["\t".join([str(r) for r in row]) for row in out_data]))
def list_projects(self): if not self._check_pargs(["flowcell"]): return url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url") if not url: self.app.log.warn("Please provide a valid url: got {}".format(url)) return if not validate_fc_directory_format(self.pargs.flowcell): self.app.log.warn("Path '{}' does not conform to bcbio flowcell directory format; aborting".format(self.pargs.flowcell)) return out_data = [[self.pargs.flowcell]] s = self.pargs.flowcell.split("_") fcid = "_".join([s[0],s[-1]]) self.log.debug("Establishing FlowcellRunMetricsConnection") fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get("db", "flowcells"), **vars(self.app.pargs)) self.log.debug("Establishing ProjectSummaryConnection") p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs)) self.log.debug("Fetching flowcell metric document for flowcell {}".format(fcid)) fc = fc_con.get_entry(fcid) if fc is None: self.log.warn("No flowcell metric document for flowcell {}".format(fcid)) return self.log.debug("Fetching csv samplesheet data for flowcell {}".format(fcid)) ssheet_data = self._get_samplesheet_sample_data(fc) if len(ssheet_data) == 0: self.log.warn("No csv samplesheet data for flowcell {}".format(fcid)) return self.log.debug("Fetch runParameter data for flowcell {}".format(fcid)) run_data = self._get_run_parameter_data(fc) if len(run_data) == 0: self.log.warn("No runParameter data for flowcell {}".format(fcid)) out_data = [[self.pargs.flowcell, run_data.get("InstrumentType","HiSeq2000"), run_data.get("RunMode","High Output")]] # Extract the project names projects = set([proj[0].replace("__",".") for data in ssheet_data.values() for proj in data.values()]) # Extract application for each project for project in projects: self.log.debug("Fetching project data document for project {}".format(project)) pdoc = p_con.get_entry(project) if pdoc is None: self.log.warn("No project data document for project {}".format(project)) pdoc = {} application = pdoc.get("application","N/A") type = pdoc.get("type","Check GPL") out_data.append([project,application,type]) self.app._output_data['stdout'].write("\n".join(["\t".join([str(r) for r in row]) for row in out_data]))
def test_dbcon(self): """Test database connection and that we get expected values.""" s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******") samples = [s_con.get_entry(x) for x in s_con.name_view] samples_d = {x["name"]: x for x in samples} self.assertEqual(samples_d["1_120924_AC003CCCXX_TGACCA"]["date"], "120924") self.assertEqual(samples_d["1_121015_BB002BBBXX_TGACCA"]["flowcell"], "BB002BBBXX") self.assertEqual( samples_d["2_120924_AC003CCCXX_ACAGTG"]["entity_type"], "sample_run_metrics") self.assertEqual(samples_d["3_120924_AC003CCCXX_ACAGTG"]["lane"], "3") self.assertEqual(samples_d["4_120924_AC003CCCXX_CGTTAA"]["sequence"], "CGTTAA") self.assertEqual(samples_d["2_121015_BB002BBBXX_TGACCA"]["project_id"], "P002") fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******") flowcells = [fc_con.get_entry(x) for x in fc_con.name_view] flowcells_d = {x["name"]: x for x in flowcells} self.assertEqual(flowcells_d["120924_AC003CCCXX"]["name"], "120924_AC003CCCXX") self.assertEqual(flowcells_d["121015_BB002BBBXX"]["name"], "121015_BB002BBBXX") self.assertEqual(flowcells_d["120924_AC003CCCXX"]["entity_type"], "flowcell_run_metrics") p_con = ProjectSummaryConnection(dbname="projects-test", username="******", password="******") projects = [p_con.get_entry(x) for x in p_con.name_view] projects_d = {x["project_name"]: x for x in projects} self.assertEqual( projects_d["J.Doe_00_01"]["min_m_reads_per_sample_ordered"], 0.1) self.assertEqual(projects_d["J.Doe_00_01"]["no_of_samples"], 2) self.assertEqual(set(projects_d["J.Doe_00_01"]["samples"].keys()), set(["P001_101_index3", "P001_102", "P001_103"])) self.assertEqual(projects_d["J.Doe_00_01"]["customer_reference"], "GnuGenome") self.assertEqual( projects_d["J.Doe_00_02"]["min_m_reads_per_sample_ordered"], 0.2) self.assertEqual(projects_d["J.Doe_00_03"]["samples"].keys(), ["3_index6"]) self.assertIn( "A", projects_d["J.Doe_00_03"]["samples"]["3_index6"]["library_prep"])
def sample_status_note(project_name=None, flowcell=None, username=None, password=None, url=None, ordered_million_reads=None, uppnex_id=None, customer_reference=None, bc_count=None, project_alias=[], projectdb="projects", samplesdb="samples", flowcelldb="flowcells", phix=None, is_paired=True, **kw): """Make a sample status note. Used keywords: :param project_name: project name :param flowcell: flowcell id :param username: db username :param password: db password :param url: db url :param ordered_million_reads: number of ordered reads in millions :param uppnex_id: the uppnex id :param customer_reference: customer project name :param project_alias: project alias name :param phix: phix error rate :param is_paired: True if run is paired-end, False for single-end """ # Cutoffs cutoffs = { "phix_err_cutoff": 2.0, "qv_cutoff": 30, } instrument = _parse_instrument_config( os.path.expanduser(kw.get("instrument_config", ""))) instrument_dict = {i['instrument_id']: i for i in instrument} # parameters parameters = { "project_name": None, "start_date": None, "FC_id": None, "scilifelab_name": None, "rounded_read_count": None, "phix_error_rate": None, "avg_quality_score": None, "pct_q30_bases": None, "success": None, "run_mode": None, "is_paired": True } # key mapping from sample_run_metrics to parameter keys srm_to_parameter = { "project_name": "sample_prj", "FC_id": "flowcell", "scilifelab_name": "barcode_name", "start_date": "date", "rounded_read_count": "bc_count", "lane": "lane" } LOG.debug("got parameters {}".format(parameters)) output_data = { 'stdout': StringIO(), 'stderr': StringIO(), 'debug': StringIO() } if not _assert_flowcell_format(flowcell): LOG.warn( "Wrong flowcell format {}; skipping. Please use the flowcell id (format \"[A-Z0-9\-]+\")" .format(flowcell)) return output_data output_data = _update_sample_output_data(output_data, cutoffs) # Connect and run s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url) fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url) p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url) # Set up paragraphs paragraphs = sample_note_paragraphs() headers = sample_note_headers() # Get project project = p_con.get_entry(project_name) source = p_con.get_info_source(project_name) if not project: LOG.warn("No such project '{}'".format(project_name)) return output_data # Set samples list sample_run_list = _set_sample_run_list(project_name, flowcell, project_alias, s_con) if len(sample_run_list) == 0: LOG.warn( "No samples for project '{}', flowcell '{}'. Maybe there are no sample run metrics in statusdb?" .format(project_name, flowcell)) return output_data # Set options ordered_million_reads = _literal_eval_option(ordered_million_reads) bc_count = _literal_eval_option(bc_count) phix = _literal_eval_option(phix) # Count number of times a sample has been run on a flowcell; if several, make lane-specific reports sample_count = Counter([x.get("barcode_name") for x in sample_run_list]) # Loop samples and collect information s_param_out = [] fcdoc = None for s in sample_run_list: s_param = {} LOG.debug( "working on sample '{}', sample run metrics name '{}', id '{}'". format(s.get("barcode_name", None), s.get("name", None), s.get("_id", None))) s_param.update(parameters) s_param.update( {key: s[srm_to_parameter[key]] for key in srm_to_parameter.keys()}) fc = "{}_{}".format(s.get("date"), s.get("flowcell")) # Get instrument try: s_param.update(instrument_dict[fc_con.get_instrument(str(fc))]) except: LOG.warn( "Failed to set instrument and software versions for flowcell {} in report due to missing RunInfo -> Instrument field in statusdb. Either rerun 'pm qc update-qc' or search-and-replace 'NN' in the sample report." .format(fc)) s_param.update(instrument_dict['default']) # Get run mode if not fcdoc or fcdoc.get("name") != fc: fcdoc = fc_con.get_entry(fc) runp = fcdoc.get("RunParameters", {}) s_param[ "sequencing_platform"] = "MiSeq" if "MCSVersion" in runp else "HiSeq2500" s_param["clustering_method"] = "onboard clustering" if runp.get( "ClusteringChoice", "") == "OnBoardClustering" or s_param[ "sequencing_platform"] == "MiSeq" else "cBot" s_param["sequencing_setup"] = fcdoc.get("run_setup") s_param["sequencing_mode"] = runp.get("RunMode", "High Output") s_param["sequencing_software"] = "RTA {}".format( runp.get("RTAVersion")) if s_param["sequencing_platform"] == "MiSeq": s_param["sequencing_software"] = "MCS {}/{}".format( runp.get("MCSVersion"), s_param["sequencing_software"]) else: s_param["sequencing_software"] = "{} {}/{}".format( runp.get("ApplicationName"), runp.get("ApplicationVersion"), s_param["sequencing_software"]) s_param["is_paired"] = fc_con.is_paired_end(str(fc)) if s_param["is_paired"] is None: LOG.warn( "Could not determine run setup for flowcell {}. Will assume paired-end." .format(fc)) s_param["is_paired"] = True s_param.update(software_versions) s_param["phix_error_rate"] = fc_con.get_phix_error_rate( str(fc), s["lane"]) if phix: s_param["phix_error_rate"] = _get_phix_error_rate(s["lane"], phix) # Get quality score from demultiplex stats, if that fails # (which it shouldn't), fall back on fastqc data. (avg_quality_score, pct_q30_bases) = fc_con.get_barcode_lane_statistics( project_name, s.get("barcode_name"), fc, s["lane"]) s_param[ 'avg_quality_score'] = avg_quality_score if avg_quality_score else calc_avg_qv( s) if not s_param['avg_quality_score']: LOG.warn( "Setting average quality failed for sample {}, id {}".format( s.get("name"), s.get("_id"))) s_param['pct_q30_bases'] = pct_q30_bases if not s_param['pct_q30_bases']: LOG.warn( "Setting % of >= Q30 Bases (PF) failed for sample {}, id {}". format(s.get("name"), s.get("_id"))) # Compare phix error and qv to cutoffs err_stat = "OK" qv_stat = "OK" if s_param["phix_error_rate"] > cutoffs["phix_err_cutoff"]: err_stat = "HIGH" elif s_param["phix_error_rate"] == -1: err_stat = "N/A" if s_param["avg_quality_score"] < cutoffs["qv_cutoff"]: qv_stat = "LOW" output_data["stdout"].write( "{:>18}\t{:>6}\t{:>12}\t{:>12}\t{:>12}\t{:>12}\n".format( s["barcode_name"], s["lane"], s_param["phix_error_rate"], err_stat, s_param["avg_quality_score"], qv_stat)) # Update/set remaning sample run parameters, falling back on project defaults if *key* is missing s_param['ordered_amount'] = s_param.get( 'ordered_amount', p_con.get_ordered_amount(project_name, samples=p_con.get_entry( project_name, 'samples'))) s_param['customer_reference'] = s_param.get( 'customer_reference', project.get('customer_reference')) s_param['uppnex_project_id'] = s_param.get('uppnex_project_id', project.get('uppnex_id')) # Override database settings if options passed at command line if ordered_million_reads: s_param["ordered_amount"] = _get_ordered_million_reads( s["barcode_name"], ordered_million_reads) if bc_count: s_param["rounded_read_count"] = _round_read_count_in_millions( _get_bc_count(s["barcode_name"], bc_count, s)) else: s_param["rounded_read_count"] = _round_read_count_in_millions( s_param["rounded_read_count"]) if uppnex_id: s_param["uppnex_project_id"] = uppnex_id if customer_reference: s_param["customer_reference"] = customer_reference # Get the project sample name corresponding to the sample run project_sample = p_con.get_project_sample( project_name, s.get("project_sample_name", None)) if project_sample: LOG.debug( "project sample run metrics mapping found: '{}' : '{}'".format( s["name"], project_sample["sample_name"])) project_sample_item = project_sample['project_sample'] # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id project_sample_d = _set_project_sample_dict( project_sample_item, source) if not project_sample_d: LOG.warn( "No sample_run_metrics information for sample '{}', barcode name '{}', id '{}'\n\tProject summary information {}" .format(s["name"], s["barcode_name"], s["_id"], project_sample)) # Check if sample run metrics name present in project database: if so, verify that database ids are consistent if s["name"] not in project_sample_d.keys(): LOG.warn( "no such sample run metrics '{}' in project sample run metrics dictionary" .format(s["name"])) else: if s["_id"] == project_sample_d[s["name"]]: LOG.debug( "project sample run metrics mapping found: '{}' : '{}'" .format(s["name"], project_sample_d[s["name"]])) else: LOG.warn( "inconsistent mapping for '{}': '{}' != '{}' (project summary id)" .format(s["name"], s["_id"], project_sample_d[s["name"]])) s_param['customer_name'] = project_sample_item.get( "customer_name", None) # Always normalize submitted id, since module textttable does not support unicode if type(s_param['customer_name']) is unicode: s_param['customer_name'] = unicodedata.normalize( 'NFKD', s_param['customer_name']).encode('ascii', 'ignore') # No project sample found. Manual upload to database necessary. else: s_param['customer_name'] = None LOG.warn( "No project sample name found for sample run name '{}'".format( s["barcode_name"])) LOG.info( "Please run 'pm qc upload-qc FLOWCELL_ID --extensive-matching' to update project sample names " ) LOG.info( "or 'pm qc update --sample_prj PROJECT_NAME --names BARCODE_TO_SAMPLE_MAP to update project sample names." ) LOG.info("Please refer to the pm documentation for examples.") query_ok(force=kw.get("force", False)) # Finally assess sequencing success, update parameters and set outputs s_param['success'] = sequencing_success(s_param, cutoffs) s_param.update({ k: "N/A" for k in s_param.keys() if s_param[k] is None or s_param[k] == "" or s_param[k] == -1.0 }) if sample_count[s.get("barcode_name")] > 1: outfile = "{}_{}_{}_{}.pdf".format(s["barcode_name"], s["date"], s["flowcell"], s["lane"]) else: outfile = "{}_{}_{}.pdf".format(s["barcode_name"], s["date"], s["flowcell"]) s_param["outfile"] = outfile s_param_out.append(s_param) # Write final output to reportlab and rst files output_data["debug"].write( json.dumps({ 's_param': s_param_out, 'sample_runs': {s["name"]: s["barcode_name"] for s in sample_run_list} })) notes = [ make_note(headers=headers, paragraphs=paragraphs, **sp) for sp in s_param_out ] rest_notes = make_sample_rest_notes( "{}_{}_{}_sample_summary.rst".format(project_name, s.get("date", None), s.get("flowcell", None)), s_param_out) concatenate_notes( notes, "{}_{}_{}_sample_summary.pdf".format(project_name, s.get("date", None), s.get("flowcell", None))) return output_data
def sample_status_note(project_name=None, flowcell=None, username=None, password=None, url=None, ordered_million_reads=None, uppnex_id=None, customer_reference=None, bc_count=None, project_alias=[], projectdb="projects", samplesdb="samples", flowcelldb="flowcells", phix=None, is_paired=True, **kw): """Make a sample status note. Used keywords: :param project_name: project name :param flowcell: flowcell id :param username: db username :param password: db password :param url: db url :param ordered_million_reads: number of ordered reads in millions :param uppnex_id: the uppnex id :param customer_reference: customer project name :param project_alias: project alias name :param phix: phix error rate :param is_paired: True if run is paired-end, False for single-end """ # Cutoffs cutoffs = { "phix_err_cutoff" : 2.0, "qv_cutoff" : 30, } instrument = _parse_instrument_config(os.path.expanduser(kw.get("instrument_config",""))) instrument_dict = {i['instrument_id']: i for i in instrument} # parameters parameters = { "project_name" : None, "start_date" : None, "FC_id" : None, "scilifelab_name" : None, "rounded_read_count" : None, "phix_error_rate" : None, "avg_quality_score" : None, "pct_q30_bases" : None, "success" : None, "run_mode":None, "is_paired":True } # key mapping from sample_run_metrics to parameter keys srm_to_parameter = {"project_name":"sample_prj", "FC_id":"flowcell", "scilifelab_name":"barcode_name", "start_date":"date", "rounded_read_count":"bc_count", "lane": "lane"} LOG.debug("got parameters {}".format(parameters)) output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()} if not _assert_flowcell_format(flowcell): LOG.warn("Wrong flowcell format {}; skipping. Please use the flowcell id (format \"[A-Z0-9\-]+\")".format(flowcell) ) return output_data output_data = _update_sample_output_data(output_data, cutoffs) # Connect and run s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url) fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url) p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url) # Set up paragraphs paragraphs = sample_note_paragraphs() headers = sample_note_headers() # Get project project = p_con.get_entry(project_name) source = p_con.get_info_source(project_name) if not project: LOG.warn("No such project '{}'".format(project_name)) return output_data # Set samples list sample_run_list = _set_sample_run_list(project_name, flowcell, project_alias, s_con) if len(sample_run_list) == 0: LOG.warn("No samples for project '{}', flowcell '{}'. Maybe there are no sample run metrics in statusdb?".format(project_name, flowcell)) return output_data # Set options ordered_million_reads = _literal_eval_option(ordered_million_reads) bc_count = _literal_eval_option(bc_count) phix = _literal_eval_option(phix) # Count number of times a sample has been run on a flowcell; if several, make lane-specific reports sample_count = Counter([x.get("barcode_name") for x in sample_run_list]) # Loop samples and collect information s_param_out = [] fcdoc = None for s in sample_run_list: s_param = {} LOG.debug("working on sample '{}', sample run metrics name '{}', id '{}'".format(s.get("barcode_name", None), s.get("name", None), s.get("_id", None))) s_param.update(parameters) s_param.update({key:s[srm_to_parameter[key]] for key in srm_to_parameter.keys()}) fc = "{}_{}".format(s.get("date"), s.get("flowcell")) # Get instrument try: s_param.update(instrument_dict[fc_con.get_instrument(str(fc))]) except: LOG.warn("Failed to set instrument and software versions for flowcell {} in report due to missing RunInfo -> Instrument field in statusdb. Either rerun 'pm qc update-qc' or search-and-replace 'NN' in the sample report.".format(fc)) s_param.update(instrument_dict['default']) # Get run mode if not fcdoc or fcdoc.get("name") != fc: fcdoc = fc_con.get_entry(fc) runp = fcdoc.get("RunParameters",{}) s_param["sequencing_platform"] = "MiSeq" if "MCSVersion" in runp else "HiSeq2500" s_param["clustering_method"] = "onboard clustering" if runp.get("ClusteringChoice","") == "OnBoardClustering" or s_param["sequencing_platform"] == "MiSeq" else "cBot" s_param["sequencing_setup"] = fcdoc.get("run_setup") s_param["sequencing_mode"] = runp.get("RunMode","High Output") s_param["sequencing_software"] = "RTA {}".format(runp.get("RTAVersion")) if s_param["sequencing_platform"] == "MiSeq": s_param["sequencing_software"] = "MCS {}/{}".format(runp.get("MCSVersion"),s_param["sequencing_software"]) else: s_param["sequencing_software"] = "{} {}/{}".format(runp.get("ApplicationName"),runp.get("ApplicationVersion"),s_param["sequencing_software"]) s_param["is_paired"] = fc_con.is_paired_end(str(fc)) if s_param["is_paired"] is None: LOG.warn("Could not determine run setup for flowcell {}. Will assume paired-end.".format(fc)) s_param["is_paired"] = True s_param.update(software_versions) s_param["phix_error_rate"] = fc_con.get_phix_error_rate(str(fc), s["lane"]) if phix: s_param["phix_error_rate"] = _get_phix_error_rate(s["lane"], phix) # Get quality score from demultiplex stats, if that fails # (which it shouldn't), fall back on fastqc data. (avg_quality_score, pct_q30_bases) = fc_con.get_barcode_lane_statistics(project_name, s.get("barcode_name"), fc, s["lane"]) s_param['avg_quality_score'] = avg_quality_score if avg_quality_score else calc_avg_qv(s) if not s_param['avg_quality_score']: LOG.warn("Setting average quality failed for sample {}, id {}".format(s.get("name"), s.get("_id"))) s_param['pct_q30_bases'] = pct_q30_bases if not s_param['pct_q30_bases']: LOG.warn("Setting % of >= Q30 Bases (PF) failed for sample {}, id {}".format(s.get("name"), s.get("_id"))) # Compare phix error and qv to cutoffs err_stat = "OK" qv_stat = "OK" if s_param["phix_error_rate"] > cutoffs["phix_err_cutoff"]: err_stat = "HIGH" elif s_param["phix_error_rate"] == -1: err_stat = "N/A" if s_param["avg_quality_score"] < cutoffs["qv_cutoff"]: qv_stat = "LOW" output_data["stdout"].write("{:>18}\t{:>6}\t{:>12}\t{:>12}\t{:>12}\t{:>12}\n".format(s["barcode_name"], s["lane"], s_param["phix_error_rate"], err_stat, s_param["avg_quality_score"], qv_stat)) # Update/set remaning sample run parameters, falling back on project defaults if *key* is missing s_param['ordered_amount'] = s_param.get('ordered_amount', p_con.get_ordered_amount(project_name, samples=p_con.get_entry(project_name,'samples'))) s_param['customer_reference'] = s_param.get('customer_reference', project.get('customer_reference')) s_param['uppnex_project_id'] = s_param.get('uppnex_project_id', project.get('uppnex_id')) # Override database settings if options passed at command line if ordered_million_reads: s_param["ordered_amount"] = _get_ordered_million_reads(s["barcode_name"], ordered_million_reads) if bc_count: s_param["rounded_read_count"] = _round_read_count_in_millions(_get_bc_count(s["barcode_name"], bc_count, s)) else: s_param["rounded_read_count"] = _round_read_count_in_millions(s_param["rounded_read_count"]) if uppnex_id: s_param["uppnex_project_id"] = uppnex_id if customer_reference: s_param["customer_reference"] = customer_reference # Get the project sample name corresponding to the sample run project_sample = p_con.get_project_sample(project_name, s.get("project_sample_name", None)) if project_sample: LOG.debug("project sample run metrics mapping found: '{}' : '{}'".format(s["name"], project_sample["sample_name"])) project_sample_item = project_sample['project_sample'] # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id project_sample_d = _set_project_sample_dict(project_sample_item, source) if not project_sample_d: LOG.warn("No sample_run_metrics information for sample '{}', barcode name '{}', id '{}'\n\tProject summary information {}".format(s["name"], s["barcode_name"], s["_id"], project_sample)) # Check if sample run metrics name present in project database: if so, verify that database ids are consistent if s["name"] not in project_sample_d.keys(): LOG.warn("no such sample run metrics '{}' in project sample run metrics dictionary".format(s["name"]) ) else: if s["_id"] == project_sample_d[s["name"]]: LOG.debug("project sample run metrics mapping found: '{}' : '{}'".format(s["name"], project_sample_d[s["name"]])) else: LOG.warn("inconsistent mapping for '{}': '{}' != '{}' (project summary id)".format(s["name"], s["_id"], project_sample_d[s["name"]])) s_param['customer_name'] = project_sample_item.get("customer_name", None) # Always normalize submitted id, since module textttable does not support unicode if type(s_param['customer_name']) is unicode: s_param['customer_name'] = unicodedata.normalize('NFKD', s_param['customer_name']).encode('ascii', 'ignore') # No project sample found. Manual upload to database necessary. else: s_param['customer_name'] = None LOG.warn("No project sample name found for sample run name '{}'".format(s["barcode_name"])) LOG.info("Please run 'pm qc upload-qc FLOWCELL_ID --extensive-matching' to update project sample names ") LOG.info("or 'pm qc update --sample_prj PROJECT_NAME --names BARCODE_TO_SAMPLE_MAP to update project sample names.") LOG.info("Please refer to the pm documentation for examples.") query_ok(force=kw.get("force", False)) # Finally assess sequencing success, update parameters and set outputs s_param['success'] = sequencing_success(s_param, cutoffs) s_param.update({k:"N/A" for k in s_param.keys() if s_param[k] is None or s_param[k] == "" or s_param[k] == -1.0}) if sample_count[s.get("barcode_name")] > 1: outfile = "{}_{}_{}_{}.pdf".format(s["barcode_name"], s["date"], s["flowcell"], s["lane"]) else: outfile = "{}_{}_{}.pdf".format(s["barcode_name"], s["date"], s["flowcell"]) s_param["outfile"] = outfile s_param_out.append(s_param) # Write final output to reportlab and rst files output_data["debug"].write(json.dumps({'s_param': s_param_out, 'sample_runs':{s["name"]:s["barcode_name"] for s in sample_run_list}})) notes = [make_note(headers=headers, paragraphs=paragraphs, **sp) for sp in s_param_out] rest_notes = make_sample_rest_notes("{}_{}_{}_sample_summary.rst".format(project_name, s.get("date", None), s.get("flowcell", None)), s_param_out) concatenate_notes(notes, "{}_{}_{}_sample_summary.pdf".format(project_name, s.get("date", None), s.get("flowcell", None))) return output_data
def multiplex_qc(self): MAX_UNDEMULTIPLEXED_INDEX_COUNT = 1000000 EXPECTED_LANE_YIELD = 143000000 MAX_PHIX_ERROR_RATE = 2.0 MIN_PHIX_ERROR_RATE = 0.0 MIN_GTQ30 = 80.0 read_pairs = True out_data = [] if not self._check_pargs(['flowcell']): return url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url") if not url: self.app.log.warn("Please provide a valid url: got {}".format(url)) return # Construct the short form of the fcid sp = os.path.basename(self.pargs.flowcell).split("_") fcid = "_".join([sp[0],sp[-1]]) # Get a connection to the flowcell database and fetch the corresponding document self.log.debug("Connecting to flowcell database".format(fcid)) fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get("db", "flowcells"), **vars(self.app.pargs)) self.log.debug("Fetching run metrics entry for flowcell {}".format(fcid)) fc_doc = fc_con.get_entry(fcid) if not fc_doc: self.log.warn("Could not fetch run metrics entry for flowcell {}".format(fcid)) return # Adjust the read pairs variable according to the run setup read_pairs = fc_con.is_paired_end(fcid) # Get the yield per sample from the Demultiplex_Stats self.log.debug("Getting yield for flowcell {}".format(fcid)) sample_yield = self._get_yield_per_sample(fc_doc, read_pairs) # Get the yield per lane from the Demultiplex_Stats self.log.debug("Getting lane yield for flowcell {}".format(fcid)) lane_yield = self._get_yield_per_lane(fc_doc, read_pairs) lanes = lane_yield.keys() # Get the number of samples in the pools from the Demultiplex_Stats self.log.debug("Getting lane pool sizes for flowcell {}".format(fcid)) pool_size = self._get_pool_size(fc_doc) # Get the sample information from the csv samplesheet self.log.debug("Getting csv samplesheet data for flowcell {}".format(fcid)) ssheet_samples = self._get_samplesheet_sample_data(fc_doc) if len(ssheet_samples) == 0: self.log.warn("No samplesheet data available for flowcell {}".format(fcid)) # Verify that all samples in samplesheet have reported metrics for id in ssheet_samples.keys(): for key in ssheet_samples[id].keys(): lane, index = key.split("_") project = ssheet_samples[id][key][0] if id not in sample_yield or \ key not in sample_yield[id]: self.log.warn("Sample {} from project {} is in samplesheet but no yield was reported in " \ "Demultiplex_Stats.htm for lane {} and index {}".format(id, project, lane, index)) continue sample_yield[id][key].append('verified') # Check that all samples in Demultiplex_Stats have entries in Samplesheet for id in sample_yield.keys(): for key in sample_yield[id].keys(): lane, index = key.split("_") if "verified" not in sample_yield[id][key] and \ index != "Undetermined": self.log.warn("Sample {} from project {}, with index {} on lane {} is in Demultiplex_Stats " \ "but no corresponding entry is present in SampleSheet".format(id, sample_yield[id][key][1], index, lane)) # Check the PhiX error rate for each lane self.log.debug("Getting PhiX error rates for flowcell {}".format(fcid)) for lane in lanes: status = "N/A" err_rate = fc_con.get_phix_error_rate(fcid,lane) if err_rate < 0: self.log.warn("Could not get PhiX error rate for lane {} on flowcell {}".format(lane,fcid)) elif err_rate <= MIN_PHIX_ERROR_RATE or err_rate > MAX_PHIX_ERROR_RATE: status = "FAIL" else: status = "PASS" out_data.append([status, "PhiX error rate", lane, err_rate, "{} < PhiX e (%) <= {}".format(MIN_PHIX_ERROR_RATE, MAX_PHIX_ERROR_RATE)]) # Check the %>=Q30 value for each sample sample_quality = self._get_quality_per_sample(fc_doc) for id in sample_quality.keys(): for key in sample_quality[id].keys(): lane, index = key.split("_") status = "FAIL" if float(sample_quality[id][key][0]) >= MIN_GTQ30: status = "PASS" out_data.append([status,"Sample quality",lane,sample_quality[id][key][2],id,sample_quality[id][key][0],"[%>=Q30 >= {}%]".format(MIN_GTQ30)]) # Check that each lane received the minimum amount of reads for lane, reads in lane_yield.items(): status = "FAIL" if reads >= EXPECTED_LANE_YIELD: status = "PASS" out_data.append([status,"Lane yield",lane,reads,"[Yield >= {}]".format(EXPECTED_LANE_YIELD)]) # Check that all samples in the pool have received a minimum number of reads for id in sample_yield.keys(): for key in sample_yield[id].keys(): lane, index = key.split("_") if index == "Undetermined": continue status = "FAIL" mplx_min = int(0.5*EXPECTED_LANE_YIELD/pool_size[lane]) if sample_yield[id][key][0] >= mplx_min: status = "PASS" out_data.append([status,"Sample yield",lane,sample_yield[id][key][1],id,sample_yield[id][key][0],"[Yield >= {}]".format(mplx_min)]) # Check that the number of undetermined reads in each lane is below 10% of the total yield for the lane for lane, reads in lane_yield.items(): status = "FAIL" key = "_".join([lane,"Undetermined"]) undetermined = sum([counts.get(key,[0])[0] for counts in sample_yield.values()]) cutoff = 0.1*reads if undetermined < cutoff: status = "PASS" out_data.append([status,"Index read",lane,undetermined,"[Undetermined < {}]".format(cutoff)]) # Check that no overrepresented index sequence exists in undemultiplexed output self.log.debug("Fetching undemultiplexed barcode data for flowcell {}".format(fcid)) undemux_data = self._get_undetermined_index_counts(fc_doc) if len(undemux_data) == 0: self.log.warn("No undemultiplexed barcode data available for flowcell {}".format(fcid)) for lane, counts in undemux_data.items(): mplx_min = int(min(MAX_UNDEMULTIPLEXED_INDEX_COUNT, 0.5*EXPECTED_LANE_YIELD/max(1,pool_size[lane]))) status = "N/A" if len(counts) > 0: for i in range(len(counts)): status = "FAIL" if int(counts[i][0]) < mplx_min: status = "PASS" out_data.append([status,"Index",lane,counts[i][1],counts[i][2],counts[i][0],"[Undetermined index < {}]".format(mplx_min)]) else: out_data.append([status,"Index",lane,"","",mplx_min,"-"]) self.app._output_data['stdout'].write("\n".join(["\t".join([str(r) for r in row]) for row in out_data]))
def multiplex_qc(self): MAX_UNDEMULTIPLEXED_INDEX_COUNT = 1000000 EXPECTED_LANE_YIELD = 143000000 MAX_PHIX_ERROR_RATE = 2.0 MIN_PHIX_ERROR_RATE = 0.0 MIN_GTQ30 = 80.0 read_pairs = True out_data = [] if not self._check_pargs(['flowcell']): return url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url") if not url: self.app.log.warn("Please provide a valid url: got {}".format(url)) return # Construct the short form of the fcid sp = os.path.basename(self.pargs.flowcell).split("_") fcid = "_".join([sp[0],sp[-1]]) # Get a connection to the flowcell database and fetch the corresponding document self.log.debug("Connecting to flowcell database".format(fcid)) fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get("db", "flowcells"), **vars(self.app.pargs)) self.log.debug("Fetching run metrics entry for flowcell {}".format(fcid)) fc_doc = fc_con.get_entry(fcid) if not fc_doc: self.log.warn("Could not fetch run metrics entry for flowcell {}".format(fcid)) return # Get the yield per sample from the Demultiplex_Stats self.log.debug("Getting yield for flowcell {}".format(fcid)) sample_yield = self._get_yield_per_sample(fc_doc, read_pairs) # Get the yield per lane from the Demultiplex_Stats self.log.debug("Getting lane yield for flowcell {}".format(fcid)) lane_yield = self._get_yield_per_lane(fc_doc, read_pairs) lanes = lane_yield.keys() # Get the number of samples in the pools from the Demultiplex_Stats self.log.debug("Getting lane pool sizes for flowcell {}".format(fcid)) pool_size = self._get_pool_size(fc_doc) # Get the sample information from the csv samplesheet self.log.debug("Getting csv samplesheet data for flowcell {}".format(fcid)) ssheet_samples = self._get_samplesheet_sample_data(fc_doc) if len(ssheet_samples) == 0: self.log.warn("No samplesheet data available for flowcell {}".format(fcid)) # Verify that all samples in samplesheet have reported metrics for id in ssheet_samples.keys(): for key in ssheet_samples[id].keys(): lane, index = key.split("_") project = ssheet_samples[id][key][0] if id not in sample_yield or \ key not in sample_yield[id]: self.log.warn("Sample {} from project {} is in samplesheet but no yield was reported in " \ "Demultiplex_Stats.htm for lane {} and index {}".format(id, project, lane, index)) continue sample_yield[id][key].append('verified') # Check that all samples in Demultiplex_Stats have entries in Samplesheet for id in sample_yield.keys(): for key in sample_yield[id].keys(): lane, index = key.split("_") if "verified" not in sample_yield[id][key] and \ index != "Undetermined": self.log.warn("Sample {} from project {}, with index {} on lane {} is in Demultiplex_Stats " \ "but no corresponding entry is present in SampleSheet".format(id, sample_yield[id][key][1], index, lane)) # Check the PhiX error rate for each lane self.log.debug("Getting PhiX error rates for flowcell {}".format(fcid)) for lane in lanes: status = "N/A" err_rate = fc_con.get_phix_error_rate(fcid,lane) if err_rate < 0: self.log.warn("Could not get PhiX error rate for lane {} on flowcell {}".format(lane,fcid)) elif err_rate <= MIN_PHIX_ERROR_RATE or err_rate > MAX_PHIX_ERROR_RATE: status = "FAIL" else: status = "PASS" out_data.append([status, "PhiX error rate", lane, err_rate, "{} < PhiX e (%) <= {}".format(MIN_PHIX_ERROR_RATE, MAX_PHIX_ERROR_RATE)]) # Check the %>=Q30 value for each sample sample_quality = self._get_quality_per_sample(fc_doc) for id in sample_quality.keys(): for key in sample_quality[id].keys(): lane, index = key.split("_") status = "FAIL" if float(sample_quality[id][key][0]) >= MIN_GTQ30: status = "PASS" out_data.append([status,"Sample quality",lane,sample_quality[id][key][2],id,sample_quality[id][key][0],"[%>=Q30 >= {}%]".format(MIN_GTQ30)]) # Check that each lane received the minimum amount of reads for lane, reads in lane_yield.items(): status = "FAIL" if reads >= EXPECTED_LANE_YIELD: status = "PASS" out_data.append([status,"Lane yield",lane,reads,"[Yield >= {}]".format(EXPECTED_LANE_YIELD)]) # Check that all samples in the pool have received a minimum number of reads for id in sample_yield.keys(): for key in sample_yield[id].keys(): lane, index = key.split("_") if index == "Undetermined": continue status = "FAIL" mplx_min = int(0.5*EXPECTED_LANE_YIELD/pool_size[lane]) if sample_yield[id][key][0] >= mplx_min: status = "PASS" out_data.append([status,"Sample yield",lane,sample_yield[id][key][1],id,sample_yield[id][key][0],"[Yield >= {}]".format(mplx_min)]) # Check that the number of undetermined reads in each lane is below 10% of the total yield for the lane for lane, reads in lane_yield.items(): status = "FAIL" key = "_".join([lane,"Undetermined"]) undetermined = sum([counts.get(key,[0])[0] for counts in sample_yield.values()]) cutoff = 0.1*reads if undetermined < cutoff: status = "PASS" out_data.append([status,"Index read",lane,undetermined,"[Undetermined < {}]".format(cutoff)]) # Check that no overrepresented index sequence exists in undemultiplexed output self.log.debug("Fetching undemultiplexed barcode data for flowcell {}".format(fcid)) undemux_data = self._get_undetermined_index_counts(fc_doc) if len(undemux_data) == 0: self.log.warn("No undemultiplexed barcode data available for flowcell {}".format(fcid)) for lane, counts in undemux_data.items(): mplx_min = int(min(MAX_UNDEMULTIPLEXED_INDEX_COUNT, 0.5*EXPECTED_LANE_YIELD/max(1,pool_size[lane]))) status = "N/A" if len(counts) > 0: for i in range(len(counts)): status = "FAIL" if int(counts[i][0]) < mplx_min: status = "PASS" out_data.append([status,"Index",lane,counts[i][1],counts[i][2],counts[i][0],"[Undetermined index < {}]".format(mplx_min)]) else: out_data.append([status,"Index",lane,"","",mplx_min,"-"]) self.app._output_data['stdout'].write("\n".join(["\t".join([str(r) for r in row]) for row in out_data]))
class TestQCUpload(PmFullTest): def setUp(self): """FIXME: All other tests depend on data being uploaded, so these are not real unit tests. The setup to TestQCUpload has to be run prior to other tests, else unexpected failures will occur.""" self.app = self.make_app( argv=['qc', 'upload-qc', flowcells[0], '--mtime', '10000'], extensions=[ 'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb' ]) self._run_app() self.app = self.make_app( argv=['qc', 'upload-qc', flowcells[1], '--mtime', '10000'], extensions=[ 'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb' ]) self._run_app() self.s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******") self.p_con = ProjectSummaryConnection(dbname="projects-test", username="******", password="******") self.fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******") def test_samplesheet(self): """Test samplesheet upload""" fc = self.fc_con.get_entry("120924_AC003CCCXX") self.assertEqual(fc["samplesheet_csv"][0]["Index"], "TGACCA") self.assertEqual(fc["samplesheet_csv"][0]["Description"], "J__Doe_00_01") self.assertEqual(fc["samplesheet_csv"][0]["FCID"], "C003CCCXX") self.assertEqual(fc["samplesheet_csv"][1]["SampleRef"], "hg19") self.assertEqual(fc["samplesheet_csv"][2]["SampleID"], "P002_101_index3") def test_qc_upload(self): """Test running qc upload to server. Slightly circular testing here - I setup the module with qc update so by definition the test must 'work'""" self.app = self.make_app( argv=['qc', 'upload-qc', flowcells[1], '--mtime', '100'], extensions=[ 'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb' ]) self._run_app() s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") self.assertIsNone(s["project_sample_name"]) self.assertEqual(s["project_id"], "P003") def test_qc_update(self): """Test running qc update of a project id""" s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") s["project_id"] = None self.assertIsNone(s["project_id"]) self.s_con.save(s) self.app = self.make_app(argv=[ 'qc', 'update', '--sample_prj', projects[2], '--project_id', 'P003', '--debug', '--force' ], extensions=[ 'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb' ]) self._run_app() s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") self.assertEqual(s["project_id"], "P003") def test_qc_update_sample_names(self): """Test running qc update of project sample names""" s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA") s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG") s1["project_sample_name"] = None s2["project_sample_name"] = None self.assertIsNone(s1["project_sample_name"]) self.assertIsNone(s2["project_sample_name"]) self.s_con.save(s1) self.s_con.save(s2) sample_map = { 'P001_101_index3': 'P001_101_index3', 'P001_102_index6': 'P001_102' } self.app = self.make_app(argv=[ 'qc', 'update', '--sample_prj', projects[0], '--names', "{}".format(sample_map), '--debug', '--force' ], extensions=[ 'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb' ]) self._run_app() s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA") s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG") self.assertEqual(s1["project_sample_name"], "P001_101_index3") self.assertEqual(s2["project_sample_name"], "P001_102")
class TestQCUpload(PmFullTest): def setUp(self): self.app = self.make_app( argv=["qc", "upload-qc", flowcells[0], "--mtime", "10000"], extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"], ) self._run_app() self.s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******") self.p_con = ProjectSummaryConnection(dbname="projects-test", username="******", password="******") self.fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******") def test_samplesheet(self): """Test samplesheet upload""" fc = self.fc_con.get_entry("120924_AC003CCCXX") self.assertEqual(fc["samplesheet_csv"][0]["Index"], "TGACCA") self.assertEqual(fc["samplesheet_csv"][0]["Description"], "J__Doe_00_01") self.assertEqual(fc["samplesheet_csv"][0]["FCID"], "C003CCCXX") self.assertEqual(fc["samplesheet_csv"][1]["SampleRef"], "hg19") self.assertEqual(fc["samplesheet_csv"][2]["SampleID"], "P001_101_index3") def test_qc_upload(self): """Test running qc upload to server""" self.app = self.make_app( argv=["qc", "upload-qc", flowcells[1], "--mtime", "100"], extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"], ) self._run_app() s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") self.assertIsNone(s["project_sample_name"]) self.assertEqual(s["project_id"], "P003") def test_qc_update(self): """Test running qc update of a project id""" s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") s["project_id"] = None self.assertIsNone(s["project_id"]) self.s_con.save(s) self.app = self.make_app( argv=["qc", "update", "--sample_prj", projects[2], "--project_id", "P003", "--debug", "--force"], extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"], ) self._run_app() s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") self.assertEqual(s["project_id"], "P003") def test_qc_update_sample_names(self): """Test running qc update of project sample names""" s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA") s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG") s1["project_sample_name"] = None s2["project_sample_name"] = None self.assertIsNone(s1["project_sample_name"]) self.assertIsNone(s2["project_sample_name"]) self.s_con.save(s1) self.s_con.save(s2) sample_map = {"P001_101_index3": "P001_101_index3", "P001_102_index6": "P001_102"} self.app = self.make_app( argv=[ "qc", "update", "--sample_prj", projects[0], "--names", "{}".format(sample_map), "--debug", "--force", ], extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"], ) self._run_app() s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA") s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG") self.assertEqual(s1["project_sample_name"], "P001_101_index3") self.assertEqual(s2["project_sample_name"], "P001_102")
class TestQCUpload(PmFullTest): def setUp(self): """FIXME: All other tests depend on data being uploaded, so these are not real unit tests. The setup to TestQCUpload has to be run prior to other tests, else unexpected failures will occur.""" self.app = self.make_app(argv = ['qc', 'upload-qc', flowcells[0], '--mtime', '10000'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb']) self._run_app() self.app = self.make_app(argv = ['qc', 'upload-qc', flowcells[1], '--mtime', '10000'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb']) self._run_app() self.s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******") self.p_con = ProjectSummaryConnection(dbname="projects-test", username="******", password="******") self.fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******") def test_samplesheet(self): """Test samplesheet upload""" fc = self.fc_con.get_entry("120924_AC003CCCXX") self.assertEqual(fc["samplesheet_csv"][0]["Index"], "TGACCA") self.assertEqual(fc["samplesheet_csv"][0]["Description"], "J__Doe_00_01") self.assertEqual(fc["samplesheet_csv"][0]["FCID"], "C003CCCXX") self.assertEqual(fc["samplesheet_csv"][1]["SampleRef"], "hg19") self.assertEqual(fc["samplesheet_csv"][2]["SampleID"], "P002_101_index3") def test_qc_upload(self): """Test running qc upload to server. Slightly circular testing here - I setup the module with qc update so by definition the test must 'work'""" self.app = self.make_app(argv = ['qc', 'upload-qc', flowcells[1], '--mtime', '100'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb']) self._run_app() s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") self.assertIsNone(s["project_sample_name"]) self.assertEqual(s["project_id"], "P003") def test_qc_update(self): """Test running qc update of a project id""" s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") s["project_id"]= None self.assertIsNone(s["project_id"]) self.s_con.save(s) self.app = self.make_app(argv = ['qc', 'update', '--sample_prj', projects[2], '--project_id', 'P003', '--debug', '--force'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb']) self._run_app() s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA") self.assertEqual(s["project_id"], "P003") def test_qc_update_sample_names(self): """Test running qc update of project sample names""" s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA") s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG") s1["project_sample_name"] = None s2["project_sample_name"] = None self.assertIsNone(s1["project_sample_name"]) self.assertIsNone(s2["project_sample_name"]) self.s_con.save(s1) self.s_con.save(s2) sample_map = {'P001_101_index3': 'P001_101_index3', 'P001_102_index6':'P001_102'} self.app = self.make_app(argv = ['qc', 'update', '--sample_prj', projects[0], '--names', "{}".format(sample_map), '--debug', '--force'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb']) self._run_app() s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA") s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG") self.assertEqual(s1["project_sample_name"], "P001_101_index3") self.assertEqual(s2["project_sample_name"], "P001_102")