Exemple #1
0
def best_practice_note(project_name=None, samples=None, capture_kit="agilent_v4", application="seqcap", flist=[], sample_name_map=None, **kw):
    """Make a best practice application note.

    NB: currently only works for seqcap application.

    :param project_name: project name
    :param samples: samples to work on. Defaults to all samples.
    :param application: chosen application
    """
    param = parameters
    output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()}
    if application not in BEST_PRACTICE_NOTES:
        LOG.warn("No such application '{}'. Valid choices are: \n\t{}".format(application, "\n\t".join(BEST_PRACTICE_NOTES)))
    if application == "seqcap":
        df, samples_df = _get_seqcap_summary(flist, kw.get("amplicon", False))
        software_df = _get_software_table(flist)
        database_df = _get_database_table(flist, post_process=kw.get("post_process", None))
        if sample_name_map:
            samples_df.CustomerName = [sample_name_map[s]['customer_name'] for s in samples_df.Sample]
        df.Total = _format_num_reads(df.Total)
        ttab = _indent_texttable_for_rst(_dataframe_to_texttable(df[["Sample"] + SEQCAP_TABLE_COLUMNS[1:5]], align=["left", "right", "right", "right", "right"]))
        ttab_target = _indent_texttable_for_rst(_dataframe_to_texttable(df[["Sample"] + SEQCAP_TABLE_COLUMNS[5:9]], align=["left", "right", "right", "right", "right"]))
        ttab_dbsnp = _indent_texttable_for_rst(_dataframe_to_texttable(df[["Sample"] + SEQCAP_TABLE_COLUMNS[9:14]], align=["left", "right", "right", "right", "right", "right"]))
        ttab_samples = _indent_texttable_for_rst(_dataframe_to_texttable(samples_df[["Sample", "CustomerName", "Sequence"]], align=["left", "right", "right"]))
        ttab_software = _indent_texttable_for_rst(_dataframe_to_texttable(software_df, align=["left", "right"]))
        ttab_database = _indent_texttable_for_rst(_dataframe_to_texttable(database_df, align=["left", "right"]))
        param.update({'project_summary':ttab, 'project_target_summary':ttab_target, 'project_dbsnp_summary':ttab_dbsnp, 'table_sample_summary':ttab_samples, 'capturekit':SEQCAP_KITS[capture_kit],
                      'software_versions_table':ttab_software, 'database_versions_table': ttab_database})
        param['project_name'] = project_name if project_name else kw.get("statusdb_project_name", None)
    # Add applications here
    else:
        pass
    # Generic rest call for all templates
    make_rest_note("{}_best_practice.rst".format(kw.get("project", None)), report="bp_seqcap", outdir=kw.get("basedir", os.curdir), **param)
    return output_data
Exemple #2
0
def project_status_note(project_name=None,
                        username=None,
                        password=None,
                        url=None,
                        use_ps_map=True,
                        use_bc_map=False,
                        check_consistency=False,
                        ordered_million_reads=None,
                        uppnex_id=None,
                        customer_reference=None,
                        exclude_sample_ids={},
                        project_alias=None,
                        sample_aliases={},
                        projectdb="projects",
                        samplesdb="samples",
                        flowcelldb="flowcells",
                        include_all_samples=False,
                        flat_table=False,
                        **kw):
    """Make a project status note. Used keywords:

    :param project_name: project name
    :param user: db user name
    :param password: db password
    :param url: db url
    :param use_ps_map: use project summary mapping
    :param use_bc_map: use project to barcode name mapping
    :param check_consistency: check consistency between mappings
    :param ordered_million_reads: number of ordered reads in millions
    :param uppnex_id: the uppnex id
    :param customer_reference: customer project name
    :param exclude_sample_ids: exclude some sample ids from project note
    :param project_alias: project alias name
    :param sample_aliases: sample alias names
    :param projectdb: project db name
    :param samplesdb: samples db name
    :param flowcelldb: flowcells db name
    :param include_all_samples: include all samples in report
    :param flat_table: Just create a simple tab-separated version of the table instead of the fancy pdf
    """

    # parameters
    parameters = {
        "project_name": project_name,
        "finished": "",
    }

    output_data, sample_table, param = _project_status_note_table(
        project_name, username, password, url, use_ps_map, use_bc_map,
        check_consistency, ordered_million_reads, uppnex_id,
        customer_reference, exclude_sample_ids, project_alias, sample_aliases,
        projectdb, samplesdb, flowcelldb, include_all_samples, parameters,
        **kw)

    if not flat_table:
        # Set report paragraphs
        paragraphs = project_note_paragraphs()
        headers = project_note_headers()

        #Hack: removes Comments paragraph if it is empty
        if not param["finished"]:
            paragraphs.pop("Comments", None)

        paragraphs["Samples"]["tpl"] = make_sample_table(sample_table)
        make_note("{}_project_summary.pdf".format(project_name), headers,
                  paragraphs, **param)
        make_rest_note("{}_project_summary.rst".format(project_name),
                       sample_table=sample_table,
                       report="project_report",
                       **param)

    else:
        # Write tab-separated output
        sample_table[0].insert(0, 'ProjectID')
        table_cols = [
            sample_table[0].index(col) for col in [
                'ProjectID', 'ScilifeID', 'SubmittedID', 'BarcodeSeq',
                'MSequenced'
            ]
        ]
        outfile = "{}_project_summary.csv".format(project_name)
        with open(outfile, "w") as outh:
            csvw = csv.writer(outh)
            for i, sample in enumerate(sample_table):
                if i > 0:
                    sample.insert(0, project_name)
                data = [str(sample[col]) for col in table_cols]
                csvw.writerow(data)
                output_data['stdout'].write("{}\n".format("\t".join(data)))

    param.update(
        {k: "N/A"
         for k in param.keys() if param[k] is None or param[k] == ""})
    output_data["debug"].write(
        json.dumps({
            'param': param,
            'table': sample_table
        }))

    return output_data
def project_status_note(project_name=None, username=None, password=None, url=None,
                        use_ps_map=True, use_bc_map=False, check_consistency=False,
                        ordered_million_reads=None, uppnex_id=None, customer_reference=None,
                        exclude_sample_ids={}, project_alias=None, sample_aliases={},
                        projectdb="projects", samplesdb="samples", flowcelldb="flowcells",
                        include_all_samples=False, flat_table=False, **kw):
    """Make a project status note. Used keywords:

    :param project_name: project name
    :param user: db user name
    :param password: db password
    :param url: db url
    :param use_ps_map: use project summary mapping
    :param use_bc_map: use project to barcode name mapping
    :param check_consistency: check consistency between mappings
    :param ordered_million_reads: number of ordered reads in millions
    :param uppnex_id: the uppnex id
    :param customer_reference: customer project name
    :param exclude_sample_ids: exclude some sample ids from project note
    :param project_alias: project alias name
    :param sample_aliases: sample alias names
    :param projectdb: project db name
    :param samplesdb: samples db name
    :param flowcelldb: flowcells db name
    :param include_all_samples: include all samples in report
    :param flat_table: Just create a simple tab-separated version of the table instead of the fancy pdf
    """

    # parameters
    parameters = {
        "project_name" : project_name,
        "finished" : "",
        }

    output_data, sample_table, param = _project_status_note_table(project_name, username, password, url,
                                                                  use_ps_map, use_bc_map, check_consistency,
                                                                  ordered_million_reads, uppnex_id,
                                                                  customer_reference, exclude_sample_ids,
                                                                  project_alias, sample_aliases, projectdb,
                                                                  samplesdb, flowcelldb, include_all_samples,
                                                                  parameters, **kw)

    if not flat_table:
        # Set report paragraphs
        paragraphs = project_note_paragraphs()
        headers = project_note_headers()

        #Hack: removes Comments paragraph if it is empty
        if not param["finished"]:
            paragraphs.pop("Comments",None)

        paragraphs["Samples"]["tpl"] = make_sample_table(sample_table)
        make_note("{}_project_summary.pdf".format(project_name), headers, paragraphs, **param)
        make_rest_note("{}_project_summary.rst".format(project_name), sample_table=sample_table, report="project_report", **param)

    else:
        # Write tab-separated output
        sample_table[0].insert(0,'ProjectID')
        table_cols = [sample_table[0].index(col) for col in ['ProjectID', 'ScilifeID', 'SubmittedID', 'BarcodeSeq', 'MSequenced']]
        outfile = "{}_project_summary.csv".format(project_name)
        with open(outfile,"w") as outh:
            csvw = csv.writer(outh)
            for i,sample in enumerate(sample_table):
                if i > 0:
                    sample.insert(0,project_name)
                data = [str(sample[col]) for col in table_cols]
                csvw.writerow(data)
                output_data['stdout'].write("{}\n".format("\t".join(data)))

    param.update({k:"N/A" for k in param.keys() if param[k] is None or param[k] ==  ""})
    output_data["debug"].write(json.dumps({'param':param, 'table':sample_table}))

    return output_data
Exemple #4
0
def project_status_note(project_name=None,
                        username=None,
                        password=None,
                        url=None,
                        use_ps_map=True,
                        use_bc_map=False,
                        check_consistency=False,
                        ordered_million_reads=None,
                        uppnex_id=None,
                        customer_reference=None,
                        exclude_sample_ids={},
                        project_alias=None,
                        sample_aliases={},
                        projectdb="projects",
                        samplesdb="samples",
                        flowcelldb="flowcells",
                        include_all_samples=False,
                        **kw):
    """Make a project status note. Used keywords:

    :param project_name: project name
    :param user: db user name
    :param password: db password
    :param url: db url
    :param use_ps_map: use project summary mapping
    :param use_bc_map: use project to barcode name mapping
    :param check_consistency: check consistency between mappings
    :param ordered_million_reads: number of ordered reads in millions
    :param uppnex_id: the uppnex id
    :param customer_reference: customer project name
    :param exclude_sample_ids: exclude some sample ids from project note
    :param project_alias: project alias name
    :param sample_aliases: sample alias names
    :param projectdb: project db name
    :param samplesdb: samples db name
    :param flowcelldb: flowcells db name
    :param include_all_samples: include all samples in report
    """
    # parameters
    parameters = {
        "project_name": project_name,
        "finished": "Not finished, or cannot yet assess if finished.",
    }
    # mapping project_summary to parameter keys
    ps_to_parameter = {
        "scilife_name": "scilife_name",
        "customer_name": "customer_name",
        "project_name": "project_name"
    }
    # mapping project sample to table
    table_keys = [
        'ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered',
        'Status'
    ]

    output_data = {
        'stdout': StringIO(),
        'stderr': StringIO(),
        'debug': StringIO()
    }
    # Connect and run
    s_con = SampleRunMetricsConnection(dbname=samplesdb,
                                       username=username,
                                       password=password,
                                       url=url)
    fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb,
                                          username=username,
                                          password=password,
                                          url=url)
    p_con = ProjectSummaryConnection(dbname=projectdb,
                                     username=username,
                                     password=password,
                                     url=url)

    # Set report paragraphs
    paragraphs = project_note_paragraphs()
    headers = project_note_headers()
    # Set local param variable
    param = parameters

    # Get project summary from project database
    sample_aliases = _literal_eval_option(sample_aliases, default={})
    prj_summary = p_con.get_entry(project_name)
    if not prj_summary:
        LOG.warn("No such project '{}'".format(project_name))
        return
    LOG.debug("Working on project '{}'.".format(project_name))

    # Get sample run list and loop samples to make mapping sample -> {sampleruns}
    sample_run_list = _set_sample_run_list(project_name,
                                           flowcell=None,
                                           project_alias=project_alias,
                                           s_con=s_con)
    samples = {}
    for s in sample_run_list:
        prj_sample = p_con.get_project_sample(
            project_name, s.get("project_sample_name", None))
        if prj_sample:
            sample_name = prj_sample['project_sample'].get(
                "scilife_name", None)
            s_d = {s["name"]: {'sample': sample_name, 'id': s["_id"]}}
            samples.update(s_d)
        else:
            if s["barcode_name"] in sample_aliases:
                s_d = {
                    sample_aliases[s["barcode_name"]]: {
                        'sample': sample_aliases[s["barcode_name"]],
                        'id': s["_id"]
                    }
                }
                samples.update(s_d)
            else:
                s_d = {
                    s["name"]: {
                        'sample': s["name"],
                        'id': s["_id"],
                        'barcode_name': s["barcode_name"]
                    }
                }
                LOG.warn(
                    "No mapping found for sample run:\n  '{}'".format(s_d))

    # Convert to mapping from desired sample name to list of aliases
    # Less important for the moment; one solution is to update the
    # Google docs summary table to use the P names
    sample_dict = prj_summary['samples']
    param.update({
        key: prj_summary.get(ps_to_parameter[key], None)
        for key in ps_to_parameter.keys()
    })
    param["ordered_amount"] = param.get("ordered_amount",
                                        p_con.get_ordered_amount(project_name))
    param['customer_reference'] = param.get(
        'customer_reference', prj_summary.get('customer_reference'))
    param['uppnex_project_id'] = param.get('uppnex_project_id',
                                           prj_summary.get('uppnex_id'))

    # Override database values if options passed at command line
    if uppnex_id:
        param["uppnex_project_id"] = uppnex_id
    if customer_reference:
        param["customer_reference"] = customer_reference

    # Process options
    ordered_million_reads = _literal_eval_option(ordered_million_reads)
    exclude_sample_ids = _literal_eval_option(exclude_sample_ids, default={})

    ## Start collecting the data
    sample_table = []
    samples_excluded = []
    all_passed = True
    last_library_preps = p_con.get_latest_library_prep(project_name)
    last_library_preps_srm = [
        x for l in last_library_preps.values() for x in l
    ]
    LOG.debug(
        "Looping through sample map that maps project sample names to sample run metrics ids"
    )
    for k, v in samples.items():
        LOG.debug("project sample '{}' maps to '{}'".format(k, v))
        if not include_all_samples:
            if v['sample'] not in last_library_preps.keys():
                LOG.info(
                    "No library prep information for sample {}; keeping in report"
                    .format(v['sample']))
            else:
                if k not in last_library_preps_srm:
                    LOG.info(
                        "Sample run {} ('{}') is not latest library prep ({}) for project sample {}: excluding from report"
                        .format(k, v["id"],
                                last_library_preps[v['sample']].values()[0],
                                v['sample']))
                    continue
        else:
            pass

        if re.search("Unexpected", k):
            continue
        barcode_seq = s_con.get_entry(k, "sequence")
        # Exclude sample id?
        if _exclude_sample_id(exclude_sample_ids, v['sample'], barcode_seq):
            samples_excluded.append(v['sample'])
            continue
        # Get the project sample name from the sample run and set table values
        project_sample = sample_dict[v['sample']]
        vals = _set_sample_table_values(v['sample'], project_sample,
                                        barcode_seq, ordered_million_reads,
                                        param)
        if vals['Status'] == "N/A" or vals['Status'] == "NP":
            all_passed = False
        sample_table.append([vals[k] for k in table_keys])

    # Loop through samples in sample_dict for which there is no sample run information
    samples_in_table_or_excluded = list(set([x[0] for x in sample_table
                                             ])) + samples_excluded
    samples_not_in_table = list(
        set(sample_dict.keys()) - set(samples_in_table_or_excluded))
    for sample in samples_not_in_table:
        if re.search("Unexpected", sample):
            continue
        project_sample = sample_dict[sample]
        # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id
        project_sample_d = _set_project_sample_dict(project_sample)
        if project_sample_d:
            for k, v in project_sample_d.iteritems():
                barcode_seq = s_con.get_entry(k, "sequence")
                vals = _set_sample_table_values(sample, project_sample,
                                                barcode_seq,
                                                ordered_million_reads, param)
                if vals['Status'] == "N/A" or vals['Status'] == "NP":
                    all_passed = False
                sample_table.append([vals[k] for k in table_keys])
        else:
            barcode_seq = None
            vals = _set_sample_table_values(sample, project_sample,
                                            barcode_seq, ordered_million_reads,
                                            param)
            if vals['Status'] == "N/A" or vals['Status'] == "NP":
                all_passed = False
            sample_table.append([vals[k] for k in table_keys])
    if all_passed: param["finished"] = 'Project finished.'
    sample_table.sort()
    sample_table = list(sample_table
                        for sample_table, _ in itertools.groupby(sample_table))
    sample_table.insert(0, [
        'ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered',
        'Status'
    ])
    paragraphs["Samples"]["tpl"] = make_sample_table(sample_table)
    make_note("{}_project_summary.pdf".format(project_name), headers,
              paragraphs, **param)
    make_rest_note("{}_project_summary.rst".format(project_name),
                   sample_table=sample_table,
                   report="project_report",
                   **param)
    param.update(
        {k: "N/A"
         for k in param.keys() if param[k] is None or param[k] == ""})
    output_data["debug"].write(
        json.dumps({
            'param': param,
            'table': sample_table
        }))
    return output_data
Exemple #5
0
def project_status_note(project_name=None, username=None, password=None, url=None,
                        use_ps_map=True, use_bc_map=False, check_consistency=False,
                        ordered_million_reads=None, uppnex_id=None, customer_reference=None,
                        exclude_sample_ids={}, project_alias=None, sample_aliases={},
                        projectdb="projects", samplesdb="samples", flowcelldb="flowcells",
                        include_all_samples=False, **kw):
    """Make a project status note. Used keywords:

    :param project_name: project name
    :param user: db user name
    :param password: db password
    :param url: db url
    :param use_ps_map: use project summary mapping
    :param use_bc_map: use project to barcode name mapping
    :param check_consistency: check consistency between mappings
    :param ordered_million_reads: number of ordered reads in millions
    :param uppnex_id: the uppnex id
    :param customer_reference: customer project name
    :param exclude_sample_ids: exclude some sample ids from project note
    :param project_alias: project alias name
    :param sample_aliases: sample alias names
    :param projectdb: project db name
    :param samplesdb: samples db name
    :param flowcelldb: flowcells db name
    :param include_all_samples: include all samples in report
    """
    # parameters
    parameters = {
        "project_name" : project_name,
        "finished" : "Not finished, or cannot yet assess if finished.",
        }
    # mapping project_summary to parameter keys
    ps_to_parameter = {"scilife_name":"scilife_name", "customer_name":"customer_name", "project_name":"project_name"}
    # mapping project sample to table
    table_keys = ['ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status']

    output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()}
    # Connect and run
    s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url)
    fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url)
    p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url)

    # Set report paragraphs
    paragraphs = project_note_paragraphs()
    headers = project_note_headers()
    # Set local param variable
    param = parameters
    
    # Get project summary from project database
    sample_aliases = _literal_eval_option(sample_aliases, default={})
    prj_summary = p_con.get_entry(project_name)
    if not prj_summary:
        LOG.warn("No such project '{}'".format(project_name))
        return
    LOG.debug("Working on project '{}'.".format(project_name))

    # Get sample run list and loop samples to make mapping sample -> {sampleruns}
    sample_run_list = _set_sample_run_list(project_name, flowcell=None, project_alias=project_alias, s_con=s_con)
    samples = {}
    for s in sample_run_list:
        prj_sample = p_con.get_project_sample(project_name, s.get("project_sample_name", None))
        if prj_sample:
            sample_name = prj_sample['project_sample'].get("scilife_name", None)
            s_d = {s["name"] : {'sample':sample_name, 'id':s["_id"]}}
            samples.update(s_d)
        else:
            if s["barcode_name"] in sample_aliases:
                s_d = {sample_aliases[s["barcode_name"]] : {'sample':sample_aliases[s["barcode_name"]], 'id':s["_id"]}}
                samples.update(s_d)
            else:
                s_d = {s["name"]:{'sample':s["name"], 'id':s["_id"], 'barcode_name':s["barcode_name"]}}
                LOG.warn("No mapping found for sample run:\n  '{}'".format(s_d))

    # Convert to mapping from desired sample name to list of aliases
    # Less important for the moment; one solution is to update the
    # Google docs summary table to use the P names
    sample_dict = prj_summary['samples']
    param.update({key:prj_summary.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys()})
    param["ordered_amount"] = param.get("ordered_amount", p_con.get_ordered_amount(project_name))
    param['customer_reference'] = param.get('customer_reference', prj_summary.get('customer_reference'))
    param['uppnex_project_id'] = param.get('uppnex_project_id', prj_summary.get('uppnex_id'))

    # Override database values if options passed at command line
    if uppnex_id:
        param["uppnex_project_id"] = uppnex_id
    if customer_reference:
        param["customer_reference"] = customer_reference

    # Process options
    ordered_million_reads = _literal_eval_option(ordered_million_reads)
    exclude_sample_ids = _literal_eval_option(exclude_sample_ids, default={})

    ## Start collecting the data
    sample_table = []
    samples_excluded = []
    all_passed = True
    last_library_preps = p_con.get_latest_library_prep(project_name)
    last_library_preps_srm = [x for l in last_library_preps.values() for x in l] 
    LOG.debug("Looping through sample map that maps project sample names to sample run metrics ids")
    for k,v in samples.items():
        LOG.debug("project sample '{}' maps to '{}'".format(k, v))
        if not include_all_samples:
            if v['sample'] not in last_library_preps.keys():
                LOG.info("No library prep information for sample {}; keeping in report".format(v['sample']))
            else:
                if k not in last_library_preps_srm:
                    LOG.info("Sample run {} ('{}') is not latest library prep ({}) for project sample {}: excluding from report".format(k, v["id"], last_library_preps[v['sample']].values()[0], v['sample']))
                    continue
        else:
            pass
                    
        if re.search("Unexpected", k):
            continue
        barcode_seq = s_con.get_entry(k, "sequence")
        # Exclude sample id?
        if _exclude_sample_id(exclude_sample_ids, v['sample'], barcode_seq):
            samples_excluded.append(v['sample'])
            continue
        # Get the project sample name from the sample run and set table values
        project_sample = sample_dict[v['sample']]
        vals = _set_sample_table_values(v['sample'], project_sample, barcode_seq, ordered_million_reads, param)
        if vals['Status']=="N/A" or vals['Status']=="NP": all_passed = False
        sample_table.append([vals[k] for k in table_keys])

    # Loop through samples in sample_dict for which there is no sample run information
    samples_in_table_or_excluded = list(set([x[0] for x in sample_table])) + samples_excluded
    samples_not_in_table = list(set(sample_dict.keys()) - set(samples_in_table_or_excluded))
    for sample in samples_not_in_table:
        if re.search("Unexpected", sample):
            continue
        project_sample = sample_dict[sample]
        # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id
        project_sample_d = _set_project_sample_dict(project_sample)
        if project_sample_d:
            for k,v in project_sample_d.iteritems():
                barcode_seq = s_con.get_entry(k, "sequence")
                vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param)
                if vals['Status']=="N/A" or vals['Status']=="NP": all_passed = False
                sample_table.append([vals[k] for k in table_keys])
        else:
            barcode_seq = None
            vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param)
            if vals['Status']=="N/A" or vals['Status']=="NP": all_passed = False
            sample_table.append([vals[k] for k in table_keys])
    if all_passed: param["finished"] = 'Project finished.'
    sample_table.sort()
    sample_table = list(sample_table for sample_table,_ in itertools.groupby(sample_table))
    sample_table.insert(0, ['ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status'])
    paragraphs["Samples"]["tpl"] = make_sample_table(sample_table)
    make_note("{}_project_summary.pdf".format(project_name), headers, paragraphs, **param)
    make_rest_note("{}_project_summary.rst".format(project_name), sample_table=sample_table, report="project_report", **param)
    param.update({k:"N/A" for k in param.keys() if param[k] is None or param[k] ==  ""})
    output_data["debug"].write(json.dumps({'param':param, 'table':sample_table}))
    return output_data