Ejemplo n.º 1
0
 def test_dbcon(self):
     """Test database connection and that we get expected values."""
     s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******")
     samples = [s_con.get_entry(x) for x in s_con.name_view]
     samples_d = {x["name"]: x for x in samples}
     self.assertEqual(samples_d["1_120924_AC003CCCXX_TGACCA"]["date"], "120924")
     self.assertEqual(samples_d["1_121015_BB002BBBXX_TGACCA"]["flowcell"], "BB002BBBXX")
     self.assertEqual(samples_d["2_120924_AC003CCCXX_ACAGTG"]["entity_type"], "sample_run_metrics")
     self.assertEqual(samples_d["3_120924_AC003CCCXX_ACAGTG"]["lane"], "3")
     self.assertEqual(samples_d["4_120924_AC003CCCXX_CGTTAA"]["sequence"], "CGTTAA")
     self.assertEqual(samples_d["2_121015_BB002BBBXX_TGACCA"]["project_id"], "P002")
     fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******")
     flowcells = [fc_con.get_entry(x) for x in fc_con.name_view]
     flowcells_d = {x["name"]: x for x in flowcells}
     self.assertEqual(flowcells_d["120924_AC003CCCXX"]["name"], "120924_AC003CCCXX")
     self.assertEqual(flowcells_d["121015_BB002BBBXX"]["name"], "121015_BB002BBBXX")
     self.assertEqual(flowcells_d["120924_AC003CCCXX"]["entity_type"], "flowcell_run_metrics")
     p_con = ProjectSummaryConnection(dbname="projects-test", username="******", password="******")
     projects = [p_con.get_entry(x) for x in p_con.name_view]
     projects_d = {x["project_name"]: x for x in projects}
     self.assertEqual(projects_d["J.Doe_00_01"]["min_m_reads_per_sample_ordered"], 0.1)
     self.assertEqual(projects_d["J.Doe_00_01"]["no_of_samples"], 2)
     self.assertEqual(
         set(projects_d["J.Doe_00_01"]["samples"].keys()), set(["P001_101_index3", "P001_102", "P001_103"])
     )
     self.assertEqual(projects_d["J.Doe_00_01"]["customer_reference"], "GnuGenome")
     self.assertEqual(projects_d["J.Doe_00_02"]["min_m_reads_per_sample_ordered"], 0.2)
     self.assertEqual(projects_d["J.Doe_00_03"]["samples"].keys(), ["3_index6"])
     self.assertIn("A", projects_d["J.Doe_00_03"]["samples"]["3_index6"]["library_prep"])
Ejemplo n.º 2
0
    def list_projects(self):
        if not self._check_pargs(["flowcell"]):
            return

        url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url")
        if not url:
            self.app.log.warn("Please provide a valid url: got {}".format(url))
            return
        if not validate_fc_directory_format(self.pargs.flowcell):
            self.app.log.warn(
                "Path '{}' does not conform to bcbio flowcell directory format; aborting".format(self.pargs.flowcell)
            )
            return

        out_data = [[self.pargs.flowcell]]
        s = self.pargs.flowcell.split("_")
        fcid = "_".join([s[0], s[-1]])

        self.log.debug("Establishing FlowcellRunMetricsConnection")
        fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get("db", "flowcells"), **vars(self.app.pargs))
        self.log.debug("Establishing ProjectSummaryConnection")
        p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs))

        self.log.debug("Fetching flowcell metric document for flowcell {}".format(fcid))
        fc = fc_con.get_entry(fcid)
        if fc is None:
            self.log.warn("No flowcell metric document for flowcell {}".format(fcid))
            return

        self.log.debug("Fetching csv samplesheet data for flowcell {}".format(fcid))
        ssheet_data = self._get_samplesheet_sample_data(fc)
        if len(ssheet_data) == 0:
            self.log.warn("No csv samplesheet data for flowcell {}".format(fcid))
            return

        self.log.debug("Fetch runParameter data for flowcell {}".format(fcid))
        run_data = self._get_run_parameter_data(fc)
        if len(run_data) == 0:
            self.log.warn("No runParameter data for flowcell {}".format(fcid))

        out_data = [
            [self.pargs.flowcell, run_data.get("InstrumentType", "HiSeq2000"), run_data.get("RunMode", "High Output")]
        ]

        # Extract the project names
        projects = set([proj[0].replace("__", ".") for data in ssheet_data.values() for proj in data.values()])

        # Extract application for each project
        for project in projects:
            self.log.debug("Fetching project data document for project {}".format(project))
            pdoc = p_con.get_entry(project)
            if pdoc is None:
                self.log.warn("No project data document for project {}".format(project))
                pdoc = {}

            application = pdoc.get("application", "N/A")
            out_data.append([project, application])

        self.app._output_data["stdout"].write("\n".join(["\t".join([str(r) for r in row]) for row in out_data]))
Ejemplo n.º 3
0
 def list_projects(self):
     if not self._check_pargs(["flowcell"]):
         return
     
     url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url")
     if not url:
         self.app.log.warn("Please provide a valid url: got {}".format(url))
         return
     if not validate_fc_directory_format(self.pargs.flowcell):
         self.app.log.warn("Path '{}' does not conform to bcbio flowcell directory format; aborting".format(self.pargs.flowcell))
         return
     
     out_data = [[self.pargs.flowcell]]
     s = self.pargs.flowcell.split("_")
     fcid = "_".join([s[0],s[-1]])
     
     self.log.debug("Establishing FlowcellRunMetricsConnection")
     fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get("db", "flowcells"), **vars(self.app.pargs))
     self.log.debug("Establishing ProjectSummaryConnection")
     p_con = ProjectSummaryConnection(dbname=self.app.config.get("db", "projects"), **vars(self.app.pargs))
 
     self.log.debug("Fetching flowcell metric document for flowcell {}".format(fcid))
     fc = fc_con.get_entry(fcid)
     if fc is None:
         self.log.warn("No flowcell metric document for flowcell {}".format(fcid))
         return
 
     self.log.debug("Fetching csv samplesheet data for flowcell {}".format(fcid))
     ssheet_data = self._get_samplesheet_sample_data(fc)
     if len(ssheet_data) == 0:
         self.log.warn("No csv samplesheet data for flowcell {}".format(fcid))
         return
     
     self.log.debug("Fetch runParameter data for flowcell {}".format(fcid))
     run_data = self._get_run_parameter_data(fc)
     if len(run_data) == 0:
         self.log.warn("No runParameter data for flowcell {}".format(fcid))
     
     out_data = [[self.pargs.flowcell,
                  run_data.get("InstrumentType","HiSeq2000"),
                  run_data.get("RunMode","High Output")]]
     
     # Extract the project names
     projects = set([proj[0].replace("__",".") for data in ssheet_data.values() for proj in data.values()])
 
     # Extract application for each project
     for project in projects:    
         self.log.debug("Fetching project data document for project {}".format(project))
         pdoc = p_con.get_entry(project)
         if pdoc is None:
             self.log.warn("No project data document for project {}".format(project))
             pdoc = {}
     
         application = pdoc.get("application","N/A")
         type = pdoc.get("type","Check GPL")
         out_data.append([project,application,type])
     
     self.app._output_data['stdout'].write("\n".join(["\t".join([str(r) for r in row]) for row in out_data]))
Ejemplo n.º 4
0
 def test_dbcon(self):
     """Test database connection and that we get expected values."""
     s_con = SampleRunMetricsConnection(dbname="samples-test",
                                        username="******",
                                        password="******")
     samples = [s_con.get_entry(x) for x in s_con.name_view]
     samples_d = {x["name"]: x for x in samples}
     self.assertEqual(samples_d["1_120924_AC003CCCXX_TGACCA"]["date"],
                      "120924")
     self.assertEqual(samples_d["1_121015_BB002BBBXX_TGACCA"]["flowcell"],
                      "BB002BBBXX")
     self.assertEqual(
         samples_d["2_120924_AC003CCCXX_ACAGTG"]["entity_type"],
         "sample_run_metrics")
     self.assertEqual(samples_d["3_120924_AC003CCCXX_ACAGTG"]["lane"], "3")
     self.assertEqual(samples_d["4_120924_AC003CCCXX_CGTTAA"]["sequence"],
                      "CGTTAA")
     self.assertEqual(samples_d["2_121015_BB002BBBXX_TGACCA"]["project_id"],
                      "P002")
     fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test",
                                           username="******",
                                           password="******")
     flowcells = [fc_con.get_entry(x) for x in fc_con.name_view]
     flowcells_d = {x["name"]: x for x in flowcells}
     self.assertEqual(flowcells_d["120924_AC003CCCXX"]["name"],
                      "120924_AC003CCCXX")
     self.assertEqual(flowcells_d["121015_BB002BBBXX"]["name"],
                      "121015_BB002BBBXX")
     self.assertEqual(flowcells_d["120924_AC003CCCXX"]["entity_type"],
                      "flowcell_run_metrics")
     p_con = ProjectSummaryConnection(dbname="projects-test",
                                      username="******",
                                      password="******")
     projects = [p_con.get_entry(x) for x in p_con.name_view]
     projects_d = {x["project_name"]: x for x in projects}
     self.assertEqual(
         projects_d["J.Doe_00_01"]["min_m_reads_per_sample_ordered"], 0.1)
     self.assertEqual(projects_d["J.Doe_00_01"]["no_of_samples"], 2)
     self.assertEqual(set(projects_d["J.Doe_00_01"]["samples"].keys()),
                      set(["P001_101_index3", "P001_102", "P001_103"]))
     self.assertEqual(projects_d["J.Doe_00_01"]["customer_reference"],
                      "GnuGenome")
     self.assertEqual(
         projects_d["J.Doe_00_02"]["min_m_reads_per_sample_ordered"], 0.2)
     self.assertEqual(projects_d["J.Doe_00_03"]["samples"].keys(),
                      ["3_index6"])
     self.assertIn(
         "A",
         projects_d["J.Doe_00_03"]["samples"]["3_index6"]["library_prep"])
Ejemplo n.º 5
0
def sample_status_note(project_name=None,
                       flowcell=None,
                       username=None,
                       password=None,
                       url=None,
                       ordered_million_reads=None,
                       uppnex_id=None,
                       customer_reference=None,
                       bc_count=None,
                       project_alias=[],
                       projectdb="projects",
                       samplesdb="samples",
                       flowcelldb="flowcells",
                       phix=None,
                       is_paired=True,
                       **kw):
    """Make a sample status note. Used keywords:

    :param project_name: project name
    :param flowcell: flowcell id
    :param username: db username
    :param password: db password
    :param url: db url
    :param ordered_million_reads: number of ordered reads in millions
    :param uppnex_id: the uppnex id
    :param customer_reference: customer project name
    :param project_alias: project alias name
    :param phix: phix error rate
    :param is_paired: True if run is paired-end, False for single-end
    """
    # Cutoffs
    cutoffs = {
        "phix_err_cutoff": 2.0,
        "qv_cutoff": 30,
    }

    instrument = _parse_instrument_config(
        os.path.expanduser(kw.get("instrument_config", "")))
    instrument_dict = {i['instrument_id']: i for i in instrument}

    # parameters
    parameters = {
        "project_name": None,
        "start_date": None,
        "FC_id": None,
        "scilifelab_name": None,
        "rounded_read_count": None,
        "phix_error_rate": None,
        "avg_quality_score": None,
        "pct_q30_bases": None,
        "success": None,
        "run_mode": None,
        "is_paired": True
    }
    # key mapping from sample_run_metrics to parameter keys
    srm_to_parameter = {
        "project_name": "sample_prj",
        "FC_id": "flowcell",
        "scilifelab_name": "barcode_name",
        "start_date": "date",
        "rounded_read_count": "bc_count",
        "lane": "lane"
    }

    LOG.debug("got parameters {}".format(parameters))
    output_data = {
        'stdout': StringIO(),
        'stderr': StringIO(),
        'debug': StringIO()
    }
    if not _assert_flowcell_format(flowcell):
        LOG.warn(
            "Wrong flowcell format {}; skipping. Please use the flowcell id (format \"[A-Z0-9\-]+\")"
            .format(flowcell))
        return output_data
    output_data = _update_sample_output_data(output_data, cutoffs)

    # Connect and run
    s_con = SampleRunMetricsConnection(dbname=samplesdb,
                                       username=username,
                                       password=password,
                                       url=url)
    fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb,
                                          username=username,
                                          password=password,
                                          url=url)
    p_con = ProjectSummaryConnection(dbname=projectdb,
                                     username=username,
                                     password=password,
                                     url=url)

    # Set up paragraphs
    paragraphs = sample_note_paragraphs()
    headers = sample_note_headers()

    # Get project
    project = p_con.get_entry(project_name)
    source = p_con.get_info_source(project_name)
    if not project:
        LOG.warn("No such project '{}'".format(project_name))
        return output_data

    # Set samples list
    sample_run_list = _set_sample_run_list(project_name, flowcell,
                                           project_alias, s_con)
    if len(sample_run_list) == 0:
        LOG.warn(
            "No samples for project '{}', flowcell '{}'. Maybe there are no sample run metrics in statusdb?"
            .format(project_name, flowcell))
        return output_data

    # Set options
    ordered_million_reads = _literal_eval_option(ordered_million_reads)
    bc_count = _literal_eval_option(bc_count)
    phix = _literal_eval_option(phix)

    # Count number of times a sample has been run on a flowcell; if several, make lane-specific reports
    sample_count = Counter([x.get("barcode_name") for x in sample_run_list])

    # Loop samples and collect information
    s_param_out = []
    fcdoc = None
    for s in sample_run_list:
        s_param = {}
        LOG.debug(
            "working on sample '{}', sample run metrics name '{}', id '{}'".
            format(s.get("barcode_name", None), s.get("name", None),
                   s.get("_id", None)))
        s_param.update(parameters)
        s_param.update(
            {key: s[srm_to_parameter[key]]
             for key in srm_to_parameter.keys()})
        fc = "{}_{}".format(s.get("date"), s.get("flowcell"))
        # Get instrument
        try:
            s_param.update(instrument_dict[fc_con.get_instrument(str(fc))])
        except:
            LOG.warn(
                "Failed to set instrument and software versions for flowcell {} in report due to missing RunInfo -> Instrument field in statusdb. Either rerun 'pm qc update-qc' or search-and-replace 'NN' in the sample report."
                .format(fc))
            s_param.update(instrument_dict['default'])
        # Get run mode
        if not fcdoc or fcdoc.get("name") != fc:
            fcdoc = fc_con.get_entry(fc)
        runp = fcdoc.get("RunParameters", {})
        s_param[
            "sequencing_platform"] = "MiSeq" if "MCSVersion" in runp else "HiSeq2500"
        s_param["clustering_method"] = "onboard clustering" if runp.get(
            "ClusteringChoice", "") == "OnBoardClustering" or s_param[
                "sequencing_platform"] == "MiSeq" else "cBot"
        s_param["sequencing_setup"] = fcdoc.get("run_setup")
        s_param["sequencing_mode"] = runp.get("RunMode", "High Output")
        s_param["sequencing_software"] = "RTA {}".format(
            runp.get("RTAVersion"))
        if s_param["sequencing_platform"] == "MiSeq":
            s_param["sequencing_software"] = "MCS {}/{}".format(
                runp.get("MCSVersion"), s_param["sequencing_software"])
        else:
            s_param["sequencing_software"] = "{} {}/{}".format(
                runp.get("ApplicationName"), runp.get("ApplicationVersion"),
                s_param["sequencing_software"])
        s_param["is_paired"] = fc_con.is_paired_end(str(fc))
        if s_param["is_paired"] is None:
            LOG.warn(
                "Could not determine run setup for flowcell {}. Will assume paired-end."
                .format(fc))
            s_param["is_paired"] = True
        s_param.update(software_versions)
        s_param["phix_error_rate"] = fc_con.get_phix_error_rate(
            str(fc), s["lane"])
        if phix:
            s_param["phix_error_rate"] = _get_phix_error_rate(s["lane"], phix)
        # Get quality score from demultiplex stats, if that fails
        # (which it shouldn't), fall back on fastqc data.
        (avg_quality_score,
         pct_q30_bases) = fc_con.get_barcode_lane_statistics(
             project_name, s.get("barcode_name"), fc, s["lane"])
        s_param[
            'avg_quality_score'] = avg_quality_score if avg_quality_score else calc_avg_qv(
                s)
        if not s_param['avg_quality_score']:
            LOG.warn(
                "Setting average quality failed for sample {}, id {}".format(
                    s.get("name"), s.get("_id")))
        s_param['pct_q30_bases'] = pct_q30_bases
        if not s_param['pct_q30_bases']:
            LOG.warn(
                "Setting % of >= Q30 Bases (PF) failed for sample {}, id {}".
                format(s.get("name"), s.get("_id")))
        # Compare phix error and qv to cutoffs
        err_stat = "OK"
        qv_stat = "OK"
        if s_param["phix_error_rate"] > cutoffs["phix_err_cutoff"]:
            err_stat = "HIGH"
        elif s_param["phix_error_rate"] == -1:
            err_stat = "N/A"
        if s_param["avg_quality_score"] < cutoffs["qv_cutoff"]:
            qv_stat = "LOW"
        output_data["stdout"].write(
            "{:>18}\t{:>6}\t{:>12}\t{:>12}\t{:>12}\t{:>12}\n".format(
                s["barcode_name"], s["lane"], s_param["phix_error_rate"],
                err_stat, s_param["avg_quality_score"], qv_stat))

        # Update/set remaning sample run parameters, falling back on project defaults if *key* is missing
        s_param['ordered_amount'] = s_param.get(
            'ordered_amount',
            p_con.get_ordered_amount(project_name,
                                     samples=p_con.get_entry(
                                         project_name, 'samples')))
        s_param['customer_reference'] = s_param.get(
            'customer_reference', project.get('customer_reference'))
        s_param['uppnex_project_id'] = s_param.get('uppnex_project_id',
                                                   project.get('uppnex_id'))

        # Override database settings if options passed at command line
        if ordered_million_reads:
            s_param["ordered_amount"] = _get_ordered_million_reads(
                s["barcode_name"], ordered_million_reads)
        if bc_count:
            s_param["rounded_read_count"] = _round_read_count_in_millions(
                _get_bc_count(s["barcode_name"], bc_count, s))
        else:
            s_param["rounded_read_count"] = _round_read_count_in_millions(
                s_param["rounded_read_count"])
        if uppnex_id:
            s_param["uppnex_project_id"] = uppnex_id
        if customer_reference:
            s_param["customer_reference"] = customer_reference

        # Get the project sample name corresponding to the sample run
        project_sample = p_con.get_project_sample(
            project_name, s.get("project_sample_name", None))
        if project_sample:
            LOG.debug(
                "project sample run metrics mapping found: '{}' : '{}'".format(
                    s["name"], project_sample["sample_name"]))
            project_sample_item = project_sample['project_sample']
            # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id
            project_sample_d = _set_project_sample_dict(
                project_sample_item, source)
            if not project_sample_d:
                LOG.warn(
                    "No sample_run_metrics information for sample '{}', barcode name '{}', id '{}'\n\tProject summary information {}"
                    .format(s["name"], s["barcode_name"], s["_id"],
                            project_sample))
            # Check if sample run metrics name present in project database: if so, verify that database ids are consistent
            if s["name"] not in project_sample_d.keys():
                LOG.warn(
                    "no such sample run metrics '{}' in project sample run metrics dictionary"
                    .format(s["name"]))
            else:
                if s["_id"] == project_sample_d[s["name"]]:
                    LOG.debug(
                        "project sample run metrics mapping found: '{}' : '{}'"
                        .format(s["name"], project_sample_d[s["name"]]))
                else:
                    LOG.warn(
                        "inconsistent mapping for '{}': '{}' != '{}' (project summary id)"
                        .format(s["name"], s["_id"],
                                project_sample_d[s["name"]]))
            s_param['customer_name'] = project_sample_item.get(
                "customer_name", None)

            # Always normalize submitted id, since module textttable does not support unicode
            if type(s_param['customer_name']) is unicode:
                s_param['customer_name'] = unicodedata.normalize(
                    'NFKD',
                    s_param['customer_name']).encode('ascii', 'ignore')
        # No project sample found. Manual upload to database necessary.
        else:
            s_param['customer_name'] = None
            LOG.warn(
                "No project sample name found for sample run name '{}'".format(
                    s["barcode_name"]))
            LOG.info(
                "Please run 'pm qc upload-qc FLOWCELL_ID --extensive-matching' to update project sample names "
            )
            LOG.info(
                "or 'pm qc update --sample_prj PROJECT_NAME --names BARCODE_TO_SAMPLE_MAP to update project sample names."
            )
            LOG.info("Please refer to the pm documentation for examples.")
            query_ok(force=kw.get("force", False))

        # Finally assess sequencing success, update parameters and set outputs
        s_param['success'] = sequencing_success(s_param, cutoffs)
        s_param.update({
            k: "N/A"
            for k in s_param.keys()
            if s_param[k] is None or s_param[k] == "" or s_param[k] == -1.0
        })
        if sample_count[s.get("barcode_name")] > 1:
            outfile = "{}_{}_{}_{}.pdf".format(s["barcode_name"], s["date"],
                                               s["flowcell"], s["lane"])
        else:
            outfile = "{}_{}_{}.pdf".format(s["barcode_name"], s["date"],
                                            s["flowcell"])
        s_param["outfile"] = outfile
        s_param_out.append(s_param)

    # Write final output to reportlab and rst files
    output_data["debug"].write(
        json.dumps({
            's_param': s_param_out,
            'sample_runs':
            {s["name"]: s["barcode_name"]
             for s in sample_run_list}
        }))
    notes = [
        make_note(headers=headers, paragraphs=paragraphs, **sp)
        for sp in s_param_out
    ]
    rest_notes = make_sample_rest_notes(
        "{}_{}_{}_sample_summary.rst".format(project_name, s.get("date", None),
                                             s.get("flowcell", None)),
        s_param_out)
    concatenate_notes(
        notes, "{}_{}_{}_sample_summary.pdf".format(project_name,
                                                    s.get("date", None),
                                                    s.get("flowcell", None)))
    return output_data
def sample_status_note(project_name=None, flowcell=None, username=None, password=None, url=None,
                       ordered_million_reads=None, uppnex_id=None, customer_reference=None, bc_count=None,
                       project_alias=[], projectdb="projects", samplesdb="samples", flowcelldb="flowcells",
                       phix=None, is_paired=True, **kw):
    """Make a sample status note. Used keywords:

    :param project_name: project name
    :param flowcell: flowcell id
    :param username: db username
    :param password: db password
    :param url: db url
    :param ordered_million_reads: number of ordered reads in millions
    :param uppnex_id: the uppnex id
    :param customer_reference: customer project name
    :param project_alias: project alias name
    :param phix: phix error rate
    :param is_paired: True if run is paired-end, False for single-end
    """
    # Cutoffs
    cutoffs = {
        "phix_err_cutoff" : 2.0,
        "qv_cutoff" : 30,
        }

    instrument = _parse_instrument_config(os.path.expanduser(kw.get("instrument_config","")))
    instrument_dict = {i['instrument_id']: i for i in instrument}

    # parameters
    parameters = {
        "project_name" : None,
        "start_date" : None,
        "FC_id" : None,
        "scilifelab_name" : None,
        "rounded_read_count" : None,
        "phix_error_rate" : None,
        "avg_quality_score" : None,
        "pct_q30_bases" : None,
        "success" : None,
        "run_mode":None,
        "is_paired":True
        }
    # key mapping from sample_run_metrics to parameter keys
    srm_to_parameter = {"project_name":"sample_prj", "FC_id":"flowcell",
                        "scilifelab_name":"barcode_name", "start_date":"date",
                        "rounded_read_count":"bc_count", "lane": "lane"}

    LOG.debug("got parameters {}".format(parameters))
    output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()}
    if not _assert_flowcell_format(flowcell):
        LOG.warn("Wrong flowcell format {}; skipping. Please use the flowcell id (format \"[A-Z0-9\-]+\")".format(flowcell) )
        return output_data
    output_data = _update_sample_output_data(output_data, cutoffs)

    # Connect and run
    s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url)
    fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url)
    p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url)

    # Set up paragraphs
    paragraphs = sample_note_paragraphs()
    headers = sample_note_headers()

    # Get project
    project = p_con.get_entry(project_name)
    source = p_con.get_info_source(project_name)
    if not project:
        LOG.warn("No such project '{}'".format(project_name))
        return output_data

    # Set samples list
    sample_run_list = _set_sample_run_list(project_name, flowcell, project_alias, s_con)
    if len(sample_run_list) == 0:
        LOG.warn("No samples for project '{}', flowcell '{}'. Maybe there are no sample run metrics in statusdb?".format(project_name, flowcell))
        return output_data

    # Set options
    ordered_million_reads = _literal_eval_option(ordered_million_reads)
    bc_count = _literal_eval_option(bc_count)
    phix = _literal_eval_option(phix)

    # Count number of times a sample has been run on a flowcell; if several, make lane-specific reports
    sample_count = Counter([x.get("barcode_name") for x in sample_run_list])

    # Loop samples and collect information
    s_param_out = []
    fcdoc = None
    for s in sample_run_list:
        s_param = {}
        LOG.debug("working on sample '{}', sample run metrics name '{}', id '{}'".format(s.get("barcode_name", None), s.get("name", None), s.get("_id", None)))
        s_param.update(parameters)
        s_param.update({key:s[srm_to_parameter[key]] for key in srm_to_parameter.keys()})
        fc = "{}_{}".format(s.get("date"), s.get("flowcell"))
        # Get instrument
        try:
            s_param.update(instrument_dict[fc_con.get_instrument(str(fc))])
        except:
            LOG.warn("Failed to set instrument and software versions for flowcell {} in report due to missing RunInfo -> Instrument field in statusdb. Either rerun 'pm qc update-qc' or search-and-replace 'NN' in the sample report.".format(fc))
            s_param.update(instrument_dict['default'])
        # Get run mode
        if not fcdoc or fcdoc.get("name") != fc:
            fcdoc = fc_con.get_entry(fc)
        runp = fcdoc.get("RunParameters",{})
        s_param["sequencing_platform"] = "MiSeq" if "MCSVersion" in runp else "HiSeq2500"
        s_param["clustering_method"] = "onboard clustering" if runp.get("ClusteringChoice","") == "OnBoardClustering" or s_param["sequencing_platform"] == "MiSeq" else "cBot"
        s_param["sequencing_setup"] = fcdoc.get("run_setup")
        s_param["sequencing_mode"] = runp.get("RunMode","High Output")
        s_param["sequencing_software"] = "RTA {}".format(runp.get("RTAVersion"))
        if s_param["sequencing_platform"] == "MiSeq":
            s_param["sequencing_software"] = "MCS {}/{}".format(runp.get("MCSVersion"),s_param["sequencing_software"])
        else:
            s_param["sequencing_software"] = "{} {}/{}".format(runp.get("ApplicationName"),runp.get("ApplicationVersion"),s_param["sequencing_software"])
        s_param["is_paired"] = fc_con.is_paired_end(str(fc))
        if s_param["is_paired"] is None:
            LOG.warn("Could not determine run setup for flowcell {}. Will assume paired-end.".format(fc))
            s_param["is_paired"] = True
        s_param.update(software_versions)
        s_param["phix_error_rate"] = fc_con.get_phix_error_rate(str(fc), s["lane"])
        if phix:
            s_param["phix_error_rate"] = _get_phix_error_rate(s["lane"], phix)
        # Get quality score from demultiplex stats, if that fails
        # (which it shouldn't), fall back on fastqc data.
        (avg_quality_score, pct_q30_bases) = fc_con.get_barcode_lane_statistics(project_name, s.get("barcode_name"), fc, s["lane"])
        s_param['avg_quality_score'] = avg_quality_score if avg_quality_score else calc_avg_qv(s)
        if not s_param['avg_quality_score']:
            LOG.warn("Setting average quality failed for sample {}, id {}".format(s.get("name"), s.get("_id")))
        s_param['pct_q30_bases'] = pct_q30_bases
        if not s_param['pct_q30_bases']:
            LOG.warn("Setting % of >= Q30 Bases (PF) failed for sample {}, id {}".format(s.get("name"), s.get("_id")))
        # Compare phix error and qv to cutoffs
        err_stat = "OK"
        qv_stat = "OK"
        if s_param["phix_error_rate"] > cutoffs["phix_err_cutoff"]:
            err_stat = "HIGH"
        elif s_param["phix_error_rate"] == -1:
            err_stat = "N/A"
        if s_param["avg_quality_score"] < cutoffs["qv_cutoff"]:
            qv_stat = "LOW"
        output_data["stdout"].write("{:>18}\t{:>6}\t{:>12}\t{:>12}\t{:>12}\t{:>12}\n".format(s["barcode_name"], s["lane"], s_param["phix_error_rate"], err_stat, s_param["avg_quality_score"], qv_stat))

        # Update/set remaning sample run parameters, falling back on project defaults if *key* is missing
        s_param['ordered_amount'] = s_param.get('ordered_amount',
                                                p_con.get_ordered_amount(project_name,
                                                                         samples=p_con.get_entry(project_name,'samples')))
        s_param['customer_reference'] = s_param.get('customer_reference', project.get('customer_reference'))
        s_param['uppnex_project_id'] = s_param.get('uppnex_project_id', project.get('uppnex_id'))

        # Override database settings if options passed at command line
        if ordered_million_reads:
            s_param["ordered_amount"] = _get_ordered_million_reads(s["barcode_name"], ordered_million_reads)
        if bc_count:
            s_param["rounded_read_count"] = _round_read_count_in_millions(_get_bc_count(s["barcode_name"], bc_count, s))
        else:
            s_param["rounded_read_count"] = _round_read_count_in_millions(s_param["rounded_read_count"])
        if uppnex_id:
            s_param["uppnex_project_id"] = uppnex_id
        if customer_reference:
            s_param["customer_reference"] = customer_reference

        # Get the project sample name corresponding to the sample run
        project_sample = p_con.get_project_sample(project_name, s.get("project_sample_name", None))
        if project_sample:
            LOG.debug("project sample run metrics mapping found: '{}' : '{}'".format(s["name"], project_sample["sample_name"]))
            project_sample_item = project_sample['project_sample']
            # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id
            project_sample_d = _set_project_sample_dict(project_sample_item, source)
            if not project_sample_d:
                LOG.warn("No sample_run_metrics information for sample '{}', barcode name '{}', id '{}'\n\tProject summary information {}".format(s["name"], s["barcode_name"], s["_id"], project_sample))
            # Check if sample run metrics name present in project database: if so, verify that database ids are consistent
            if s["name"] not in project_sample_d.keys():
                LOG.warn("no such sample run metrics '{}' in project sample run metrics dictionary".format(s["name"]) )
            else:
                if s["_id"] == project_sample_d[s["name"]]:
                    LOG.debug("project sample run metrics mapping found: '{}' : '{}'".format(s["name"], project_sample_d[s["name"]]))
                else:
                    LOG.warn("inconsistent mapping for '{}': '{}' != '{}' (project summary id)".format(s["name"], s["_id"], project_sample_d[s["name"]]))
            s_param['customer_name'] = project_sample_item.get("customer_name", None)

            # Always normalize submitted id, since module textttable does not support unicode
            if type(s_param['customer_name']) is unicode:
                s_param['customer_name'] = unicodedata.normalize('NFKD', s_param['customer_name']).encode('ascii', 'ignore')
        # No project sample found. Manual upload to database necessary.
        else:
            s_param['customer_name'] = None
            LOG.warn("No project sample name found for sample run name '{}'".format(s["barcode_name"]))
            LOG.info("Please run 'pm qc upload-qc FLOWCELL_ID --extensive-matching' to update project sample names ")
            LOG.info("or 'pm qc update --sample_prj PROJECT_NAME --names BARCODE_TO_SAMPLE_MAP to update project sample names.")
            LOG.info("Please refer to the pm documentation for examples.")
            query_ok(force=kw.get("force", False))

        # Finally assess sequencing success, update parameters and set outputs
        s_param['success'] = sequencing_success(s_param, cutoffs)
        s_param.update({k:"N/A" for k in s_param.keys() if s_param[k] is None or s_param[k] ==  "" or s_param[k] == -1.0})
        if sample_count[s.get("barcode_name")] > 1:
            outfile = "{}_{}_{}_{}.pdf".format(s["barcode_name"], s["date"], s["flowcell"], s["lane"])
        else:
            outfile = "{}_{}_{}.pdf".format(s["barcode_name"], s["date"], s["flowcell"])
        s_param["outfile"] = outfile
        s_param_out.append(s_param)

    # Write final output to reportlab and rst files
    output_data["debug"].write(json.dumps({'s_param': s_param_out, 'sample_runs':{s["name"]:s["barcode_name"] for s in sample_run_list}}))
    notes = [make_note(headers=headers, paragraphs=paragraphs, **sp) for sp in s_param_out]
    rest_notes = make_sample_rest_notes("{}_{}_{}_sample_summary.rst".format(project_name, s.get("date", None), s.get("flowcell", None)), s_param_out)
    concatenate_notes(notes, "{}_{}_{}_sample_summary.pdf".format(project_name, s.get("date", None), s.get("flowcell", None)))
    return output_data
Ejemplo n.º 7
0
  def multiplex_qc(self):
      
      MAX_UNDEMULTIPLEXED_INDEX_COUNT = 1000000
      EXPECTED_LANE_YIELD = 143000000
      MAX_PHIX_ERROR_RATE = 2.0
      MIN_PHIX_ERROR_RATE = 0.0
      MIN_GTQ30 = 80.0
      read_pairs = True
      
      out_data = []
      
      if not self._check_pargs(['flowcell']):
          return
      url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url")
      if not url:
          self.app.log.warn("Please provide a valid url: got {}".format(url))
          return
      
      # Construct the short form of the fcid
      sp = os.path.basename(self.pargs.flowcell).split("_")
      fcid = "_".join([sp[0],sp[-1]])
      
      # Get a connection to the flowcell database and fetch the corresponding document
      self.log.debug("Connecting to flowcell database".format(fcid))
      fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get("db", "flowcells"), **vars(self.app.pargs))
      self.log.debug("Fetching run metrics entry for flowcell {}".format(fcid))
      fc_doc = fc_con.get_entry(fcid)
      if not fc_doc:
          self.log.warn("Could not fetch run metrics entry for flowcell {}".format(fcid))
          return
 
      # Adjust the read pairs variable according to the run setup
      read_pairs = fc_con.is_paired_end(fcid) 
      
      # Get the yield per sample from the Demultiplex_Stats
      self.log.debug("Getting yield for flowcell {}".format(fcid))
      sample_yield = self._get_yield_per_sample(fc_doc, read_pairs)
      
      # Get the yield per lane from the Demultiplex_Stats
      self.log.debug("Getting lane yield for flowcell {}".format(fcid))
      lane_yield = self._get_yield_per_lane(fc_doc, read_pairs)
      lanes = lane_yield.keys()
      
      # Get the number of samples in the pools from the Demultiplex_Stats
      self.log.debug("Getting lane pool sizes for flowcell {}".format(fcid))
      pool_size = self._get_pool_size(fc_doc)
      
      # Get the sample information from the csv samplesheet
      self.log.debug("Getting csv samplesheet data for flowcell {}".format(fcid))
      ssheet_samples = self._get_samplesheet_sample_data(fc_doc)
      if len(ssheet_samples) == 0: 
          self.log.warn("No samplesheet data available for flowcell {}".format(fcid))
      
      # Verify that all samples in samplesheet have reported metrics
      for id in ssheet_samples.keys():
          for key in ssheet_samples[id].keys():
              lane, index = key.split("_")
              project = ssheet_samples[id][key][0]
              if id not in sample_yield or \
              key not in sample_yield[id]: 
                  self.log.warn("Sample {} from project {} is in samplesheet but no yield was reported in " \
                                "Demultiplex_Stats.htm for lane {} and index {}".format(id,
                                                                                        project,
                                                                                        lane,
                                                                                        index))
                  continue
              sample_yield[id][key].append('verified')
      
      # Check that all samples in Demultiplex_Stats have entries in Samplesheet
      for id in sample_yield.keys():
          for key in sample_yield[id].keys():
              lane, index = key.split("_")
              if "verified" not in sample_yield[id][key] and \
              index != "Undetermined":
                  self.log.warn("Sample {} from project {}, with index {} on lane {} is in Demultiplex_Stats " \
                                "but no corresponding entry is present in SampleSheet".format(id,
                                                                                              sample_yield[id][key][1],
                                                                                              index,
                                                                                              lane))
                      
      # Check the PhiX error rate for each lane
      self.log.debug("Getting PhiX error rates for flowcell {}".format(fcid))
      for lane in lanes:
          status = "N/A"
          err_rate = fc_con.get_phix_error_rate(fcid,lane)
          if err_rate < 0:
              self.log.warn("Could not get PhiX error rate for lane {} on flowcell {}".format(lane,fcid))
          elif err_rate <= MIN_PHIX_ERROR_RATE or err_rate > MAX_PHIX_ERROR_RATE:
              status = "FAIL"
          else:
              status = "PASS"
          out_data.append([status,
                           "PhiX error rate",
                           lane,
                           err_rate,
                           "{} < PhiX e (%) <= {}".format(MIN_PHIX_ERROR_RATE,
                                                          MAX_PHIX_ERROR_RATE)])
      
      # Check the %>=Q30 value for each sample
      sample_quality = self._get_quality_per_sample(fc_doc)
      for id in sample_quality.keys():
          for key in sample_quality[id].keys():
              lane, index = key.split("_")
              status = "FAIL"
              if float(sample_quality[id][key][0]) >= MIN_GTQ30:
                  status = "PASS"
              out_data.append([status,"Sample quality",lane,sample_quality[id][key][2],id,sample_quality[id][key][0],"[%>=Q30 >= {}%]".format(MIN_GTQ30)])
              
      # Check that each lane received the minimum amount of reads
      for lane, reads in lane_yield.items():
          status = "FAIL"
          if reads >= EXPECTED_LANE_YIELD:
              status = "PASS"
          out_data.append([status,"Lane yield",lane,reads,"[Yield >= {}]".format(EXPECTED_LANE_YIELD)])
              
      # Check that all samples in the pool have received a minimum number of reads
      for id in sample_yield.keys():
          for key in sample_yield[id].keys():
              lane, index = key.split("_")
              if index == "Undetermined":
                  continue
              
              status = "FAIL"
              mplx_min = int(0.5*EXPECTED_LANE_YIELD/pool_size[lane])
              if sample_yield[id][key][0] >= mplx_min:
                  status = "PASS"
              out_data.append([status,"Sample yield",lane,sample_yield[id][key][1],id,sample_yield[id][key][0],"[Yield >= {}]".format(mplx_min)])
      
      # Check that the number of undetermined reads in each lane is below 10% of the total yield for the lane
      for lane, reads in lane_yield.items():
          status = "FAIL"
          key = "_".join([lane,"Undetermined"])
          undetermined = sum([counts.get(key,[0])[0] for counts in sample_yield.values()])
          cutoff = 0.1*reads
          if undetermined < cutoff:
              status = "PASS"
          out_data.append([status,"Index read",lane,undetermined,"[Undetermined < {}]".format(cutoff)])
      
      # Check that no overrepresented index sequence exists in undemultiplexed output
      self.log.debug("Fetching undemultiplexed barcode data for flowcell {}".format(fcid))
      undemux_data = self._get_undetermined_index_counts(fc_doc)
      if len(undemux_data) == 0:
          self.log.warn("No undemultiplexed barcode data available for flowcell {}".format(fcid))
      
      for lane, counts in undemux_data.items():
          mplx_min = int(min(MAX_UNDEMULTIPLEXED_INDEX_COUNT,
                             0.5*EXPECTED_LANE_YIELD/max(1,pool_size[lane])))
          status = "N/A"
          if len(counts) > 0:
              for i in range(len(counts)):
                  status = "FAIL"
                  if int(counts[i][0]) < mplx_min:
                      status = "PASS"
                  out_data.append([status,"Index",lane,counts[i][1],counts[i][2],counts[i][0],"[Undetermined index < {}]".format(mplx_min)])
          else:
              out_data.append([status,"Index",lane,"","",mplx_min,"-"])
                  
      self.app._output_data['stdout'].write("\n".join(["\t".join([str(r) for r in row]) for row in out_data]))
Ejemplo n.º 8
0
  def multiplex_qc(self):
      
      MAX_UNDEMULTIPLEXED_INDEX_COUNT = 1000000
      EXPECTED_LANE_YIELD = 143000000
      MAX_PHIX_ERROR_RATE = 2.0
      MIN_PHIX_ERROR_RATE = 0.0
      MIN_GTQ30 = 80.0
      read_pairs = True
      
      out_data = []
      
      if not self._check_pargs(['flowcell']):
          return
      url = self.pargs.url if self.pargs.url else self.app.config.get("db", "url")
      if not url:
          self.app.log.warn("Please provide a valid url: got {}".format(url))
          return
      
      # Construct the short form of the fcid
      sp = os.path.basename(self.pargs.flowcell).split("_")
      fcid = "_".join([sp[0],sp[-1]])
      
      # Get a connection to the flowcell database and fetch the corresponding document
      self.log.debug("Connecting to flowcell database".format(fcid))
      fc_con = FlowcellRunMetricsConnection(dbname=self.app.config.get("db", "flowcells"), **vars(self.app.pargs))
      self.log.debug("Fetching run metrics entry for flowcell {}".format(fcid))
      fc_doc = fc_con.get_entry(fcid)
      if not fc_doc:
          self.log.warn("Could not fetch run metrics entry for flowcell {}".format(fcid))
          return
 
      # Get the yield per sample from the Demultiplex_Stats
      self.log.debug("Getting yield for flowcell {}".format(fcid))
      sample_yield = self._get_yield_per_sample(fc_doc, read_pairs)
      
      # Get the yield per lane from the Demultiplex_Stats
      self.log.debug("Getting lane yield for flowcell {}".format(fcid))
      lane_yield = self._get_yield_per_lane(fc_doc, read_pairs)
      lanes = lane_yield.keys()
      
      # Get the number of samples in the pools from the Demultiplex_Stats
      self.log.debug("Getting lane pool sizes for flowcell {}".format(fcid))
      pool_size = self._get_pool_size(fc_doc)
      
      # Get the sample information from the csv samplesheet
      self.log.debug("Getting csv samplesheet data for flowcell {}".format(fcid))
      ssheet_samples = self._get_samplesheet_sample_data(fc_doc)
      if len(ssheet_samples) == 0: 
          self.log.warn("No samplesheet data available for flowcell {}".format(fcid))
      
      # Verify that all samples in samplesheet have reported metrics
      for id in ssheet_samples.keys():
          for key in ssheet_samples[id].keys():
              lane, index = key.split("_")
              project = ssheet_samples[id][key][0]
              if id not in sample_yield or \
              key not in sample_yield[id]: 
                  self.log.warn("Sample {} from project {} is in samplesheet but no yield was reported in " \
                                "Demultiplex_Stats.htm for lane {} and index {}".format(id,
                                                                                        project,
                                                                                        lane,
                                                                                        index))
                  continue
              sample_yield[id][key].append('verified')
      
      # Check that all samples in Demultiplex_Stats have entries in Samplesheet
      for id in sample_yield.keys():
          for key in sample_yield[id].keys():
              lane, index = key.split("_")
              if "verified" not in sample_yield[id][key] and \
              index != "Undetermined":
                  self.log.warn("Sample {} from project {}, with index {} on lane {} is in Demultiplex_Stats " \
                                "but no corresponding entry is present in SampleSheet".format(id,
                                                                                              sample_yield[id][key][1],
                                                                                              index,
                                                                                              lane))
                      
      # Check the PhiX error rate for each lane
      self.log.debug("Getting PhiX error rates for flowcell {}".format(fcid))
      for lane in lanes:
          status = "N/A"
          err_rate = fc_con.get_phix_error_rate(fcid,lane)
          if err_rate < 0:
              self.log.warn("Could not get PhiX error rate for lane {} on flowcell {}".format(lane,fcid))
          elif err_rate <= MIN_PHIX_ERROR_RATE or err_rate > MAX_PHIX_ERROR_RATE:
              status = "FAIL"
          else:
              status = "PASS"
          out_data.append([status,
                           "PhiX error rate",
                           lane,
                           err_rate,
                           "{} < PhiX e (%) <= {}".format(MIN_PHIX_ERROR_RATE,
                                                          MAX_PHIX_ERROR_RATE)])
      
      # Check the %>=Q30 value for each sample
      sample_quality = self._get_quality_per_sample(fc_doc)
      for id in sample_quality.keys():
          for key in sample_quality[id].keys():
              lane, index = key.split("_")
              status = "FAIL"
              if float(sample_quality[id][key][0]) >= MIN_GTQ30:
                  status = "PASS"
              out_data.append([status,"Sample quality",lane,sample_quality[id][key][2],id,sample_quality[id][key][0],"[%>=Q30 >= {}%]".format(MIN_GTQ30)])
              
      # Check that each lane received the minimum amount of reads
      for lane, reads in lane_yield.items():
          status = "FAIL"
          if reads >= EXPECTED_LANE_YIELD:
              status = "PASS"
          out_data.append([status,"Lane yield",lane,reads,"[Yield >= {}]".format(EXPECTED_LANE_YIELD)])
              
      # Check that all samples in the pool have received a minimum number of reads
      for id in sample_yield.keys():
          for key in sample_yield[id].keys():
              lane, index = key.split("_")
              if index == "Undetermined":
                  continue
              
              status = "FAIL"
              mplx_min = int(0.5*EXPECTED_LANE_YIELD/pool_size[lane])
              if sample_yield[id][key][0] >= mplx_min:
                  status = "PASS"
              out_data.append([status,"Sample yield",lane,sample_yield[id][key][1],id,sample_yield[id][key][0],"[Yield >= {}]".format(mplx_min)])
      
      # Check that the number of undetermined reads in each lane is below 10% of the total yield for the lane
      for lane, reads in lane_yield.items():
          status = "FAIL"
          key = "_".join([lane,"Undetermined"])
          undetermined = sum([counts.get(key,[0])[0] for counts in sample_yield.values()])
          cutoff = 0.1*reads
          if undetermined < cutoff:
              status = "PASS"
          out_data.append([status,"Index read",lane,undetermined,"[Undetermined < {}]".format(cutoff)])
      
      # Check that no overrepresented index sequence exists in undemultiplexed output
      self.log.debug("Fetching undemultiplexed barcode data for flowcell {}".format(fcid))
      undemux_data = self._get_undetermined_index_counts(fc_doc)
      if len(undemux_data) == 0:
          self.log.warn("No undemultiplexed barcode data available for flowcell {}".format(fcid))
      
      for lane, counts in undemux_data.items():
          mplx_min = int(min(MAX_UNDEMULTIPLEXED_INDEX_COUNT,
                             0.5*EXPECTED_LANE_YIELD/max(1,pool_size[lane])))
          status = "N/A"
          if len(counts) > 0:
              for i in range(len(counts)):
                  status = "FAIL"
                  if int(counts[i][0]) < mplx_min:
                      status = "PASS"
                  out_data.append([status,"Index",lane,counts[i][1],counts[i][2],counts[i][0],"[Undetermined index < {}]".format(mplx_min)])
          else:
              out_data.append([status,"Index",lane,"","",mplx_min,"-"])
                  
      self.app._output_data['stdout'].write("\n".join(["\t".join([str(r) for r in row]) for row in out_data]))
Ejemplo n.º 9
0
class TestQCUpload(PmFullTest):
    def setUp(self):
        """FIXME: All other tests depend on data being uploaded, so
        these are not real unit tests. The setup to TestQCUpload has to
        be run prior to other tests, else unexpected failures will
        occur."""
        self.app = self.make_app(
            argv=['qc', 'upload-qc', flowcells[0], '--mtime', '10000'],
            extensions=[
                'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb'
            ])
        self._run_app()
        self.app = self.make_app(
            argv=['qc', 'upload-qc', flowcells[1], '--mtime', '10000'],
            extensions=[
                'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb'
            ])
        self._run_app()
        self.s_con = SampleRunMetricsConnection(dbname="samples-test",
                                                username="******",
                                                password="******")
        self.p_con = ProjectSummaryConnection(dbname="projects-test",
                                              username="******",
                                              password="******")
        self.fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test",
                                                   username="******",
                                                   password="******")

    def test_samplesheet(self):
        """Test samplesheet upload"""
        fc = self.fc_con.get_entry("120924_AC003CCCXX")
        self.assertEqual(fc["samplesheet_csv"][0]["Index"], "TGACCA")
        self.assertEqual(fc["samplesheet_csv"][0]["Description"],
                         "J__Doe_00_01")
        self.assertEqual(fc["samplesheet_csv"][0]["FCID"], "C003CCCXX")
        self.assertEqual(fc["samplesheet_csv"][1]["SampleRef"], "hg19")
        self.assertEqual(fc["samplesheet_csv"][2]["SampleID"],
                         "P002_101_index3")

    def test_qc_upload(self):
        """Test running qc upload to server. Slightly circular testing
        here - I setup the module with qc update so by definition the
        test must 'work'"""
        self.app = self.make_app(
            argv=['qc', 'upload-qc', flowcells[1], '--mtime', '100'],
            extensions=[
                'scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb'
            ])
        self._run_app()
        s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA")
        self.assertIsNone(s["project_sample_name"])
        self.assertEqual(s["project_id"], "P003")

    def test_qc_update(self):
        """Test running qc update of a project id"""
        s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA")
        s["project_id"] = None
        self.assertIsNone(s["project_id"])
        self.s_con.save(s)
        self.app = self.make_app(argv=[
            'qc', 'update', '--sample_prj', projects[2], '--project_id',
            'P003', '--debug', '--force'
        ],
                                 extensions=[
                                     'scilifelab.pm.ext.ext_qc',
                                     'scilifelab.pm.ext.ext_couchdb'
                                 ])
        self._run_app()
        s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA")
        self.assertEqual(s["project_id"], "P003")

    def test_qc_update_sample_names(self):
        """Test running qc update of project sample names"""
        s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA")
        s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG")
        s1["project_sample_name"] = None
        s2["project_sample_name"] = None
        self.assertIsNone(s1["project_sample_name"])
        self.assertIsNone(s2["project_sample_name"])
        self.s_con.save(s1)
        self.s_con.save(s2)
        sample_map = {
            'P001_101_index3': 'P001_101_index3',
            'P001_102_index6': 'P001_102'
        }
        self.app = self.make_app(argv=[
            'qc', 'update', '--sample_prj', projects[0], '--names',
            "{}".format(sample_map), '--debug', '--force'
        ],
                                 extensions=[
                                     'scilifelab.pm.ext.ext_qc',
                                     'scilifelab.pm.ext.ext_couchdb'
                                 ])
        self._run_app()
        s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA")
        s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG")
        self.assertEqual(s1["project_sample_name"], "P001_101_index3")
        self.assertEqual(s2["project_sample_name"], "P001_102")
Ejemplo n.º 10
0
class TestQCUpload(PmFullTest):
    def setUp(self):
        self.app = self.make_app(
            argv=["qc", "upload-qc", flowcells[0], "--mtime", "10000"],
            extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"],
        )
        self._run_app()
        self.s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******")
        self.p_con = ProjectSummaryConnection(dbname="projects-test", username="******", password="******")
        self.fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******")

    def test_samplesheet(self):
        """Test samplesheet upload"""
        fc = self.fc_con.get_entry("120924_AC003CCCXX")
        self.assertEqual(fc["samplesheet_csv"][0]["Index"], "TGACCA")
        self.assertEqual(fc["samplesheet_csv"][0]["Description"], "J__Doe_00_01")
        self.assertEqual(fc["samplesheet_csv"][0]["FCID"], "C003CCCXX")
        self.assertEqual(fc["samplesheet_csv"][1]["SampleRef"], "hg19")
        self.assertEqual(fc["samplesheet_csv"][2]["SampleID"], "P001_101_index3")

    def test_qc_upload(self):
        """Test running qc upload to server"""
        self.app = self.make_app(
            argv=["qc", "upload-qc", flowcells[1], "--mtime", "100"],
            extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"],
        )
        self._run_app()
        s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA")
        self.assertIsNone(s["project_sample_name"])
        self.assertEqual(s["project_id"], "P003")

    def test_qc_update(self):
        """Test running qc update of a project id"""
        s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA")
        s["project_id"] = None
        self.assertIsNone(s["project_id"])
        self.s_con.save(s)
        self.app = self.make_app(
            argv=["qc", "update", "--sample_prj", projects[2], "--project_id", "P003", "--debug", "--force"],
            extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"],
        )
        self._run_app()
        s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA")
        self.assertEqual(s["project_id"], "P003")

    def test_qc_update_sample_names(self):
        """Test running qc update of project sample names"""
        s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA")
        s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG")
        s1["project_sample_name"] = None
        s2["project_sample_name"] = None
        self.assertIsNone(s1["project_sample_name"])
        self.assertIsNone(s2["project_sample_name"])
        self.s_con.save(s1)
        self.s_con.save(s2)
        sample_map = {"P001_101_index3": "P001_101_index3", "P001_102_index6": "P001_102"}
        self.app = self.make_app(
            argv=[
                "qc",
                "update",
                "--sample_prj",
                projects[0],
                "--names",
                "{}".format(sample_map),
                "--debug",
                "--force",
            ],
            extensions=["scilifelab.pm.ext.ext_qc", "scilifelab.pm.ext.ext_couchdb"],
        )
        self._run_app()
        s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA")
        s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG")
        self.assertEqual(s1["project_sample_name"], "P001_101_index3")
        self.assertEqual(s2["project_sample_name"], "P001_102")
Ejemplo n.º 11
0
class TestQCUpload(PmFullTest):
    def setUp(self):
        """FIXME: All other tests depend on data being uploaded, so
        these are not real unit tests. The setup to TestQCUpload has to
        be run prior to other tests, else unexpected failures will
        occur."""
        self.app = self.make_app(argv = ['qc', 'upload-qc', flowcells[0], '--mtime',  '10000'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb'])
        self._run_app()
        self.app = self.make_app(argv = ['qc', 'upload-qc', flowcells[1], '--mtime',  '10000'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb'])
        self._run_app()
        self.s_con = SampleRunMetricsConnection(dbname="samples-test", username="******", password="******")
        self.p_con = ProjectSummaryConnection(dbname="projects-test", username="******", password="******")
        self.fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******")

    def test_samplesheet(self):
        """Test samplesheet upload"""
        fc = self.fc_con.get_entry("120924_AC003CCCXX")
        self.assertEqual(fc["samplesheet_csv"][0]["Index"], "TGACCA")
        self.assertEqual(fc["samplesheet_csv"][0]["Description"], "J__Doe_00_01")
        self.assertEqual(fc["samplesheet_csv"][0]["FCID"], "C003CCCXX")
        self.assertEqual(fc["samplesheet_csv"][1]["SampleRef"], "hg19")
        self.assertEqual(fc["samplesheet_csv"][2]["SampleID"], "P002_101_index3")

    def test_qc_upload(self):
        """Test running qc upload to server. Slightly circular testing
        here - I setup the module with qc update so by definition the
        test must 'work'"""
        self.app = self.make_app(argv = ['qc', 'upload-qc', flowcells[1], '--mtime',  '100'], extensions=['scilifelab.pm.ext.ext_qc',  'scilifelab.pm.ext.ext_couchdb'])
        self._run_app()
        s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA")
        self.assertIsNone(s["project_sample_name"])
        self.assertEqual(s["project_id"], "P003")
        
    def test_qc_update(self):
        """Test running qc update of a project id"""
        s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA")
        s["project_id"]= None
        self.assertIsNone(s["project_id"])
        self.s_con.save(s)
        self.app = self.make_app(argv = ['qc', 'update', '--sample_prj', projects[2], '--project_id', 'P003', '--debug', '--force'], extensions=['scilifelab.pm.ext.ext_qc', 'scilifelab.pm.ext.ext_couchdb'])
        self._run_app()
        s = self.s_con.get_entry("4_120924_AC003CCCXX_CGTTAA")
        self.assertEqual(s["project_id"], "P003")

    def test_qc_update_sample_names(self):
        """Test running qc update of project sample names"""
        s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA")
        s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG")
        s1["project_sample_name"] = None
        s2["project_sample_name"] = None
        self.assertIsNone(s1["project_sample_name"])
        self.assertIsNone(s2["project_sample_name"])
        self.s_con.save(s1)
        self.s_con.save(s2)
        sample_map = {'P001_101_index3': 'P001_101_index3', 'P001_102_index6':'P001_102'}
        self.app = self.make_app(argv = ['qc', 'update', '--sample_prj', projects[0], '--names', "{}".format(sample_map), '--debug', '--force'], extensions=['scilifelab.pm.ext.ext_qc',  'scilifelab.pm.ext.ext_couchdb'])
        self._run_app()
        s1 = self.s_con.get_entry("1_120924_AC003CCCXX_TGACCA")
        s2 = self.s_con.get_entry("2_120924_AC003CCCXX_ACAGTG")
        self.assertEqual(s1["project_sample_name"], "P001_101_index3")
        self.assertEqual(s2["project_sample_name"], "P001_102")