def _project_status_note_table(project_name=None, username=None, password=None, url=None,
                               use_ps_map=True, use_bc_map=False, check_consistency=False,
                               ordered_million_reads=None, uppnex_id=None, customer_reference=None,
                               exclude_sample_ids={}, project_alias=None, sample_aliases={},
                               projectdb="projects", samplesdb="samples", flowcelldb="flowcells",
                               include_all_samples=False, param={}, **kw):

    # mapping project_summary to parameter keys
    ps_to_parameter = {"scilife_name":"scilife_name", "customer_name":"customer_name", "project_name":"project_name"}
    # mapping project sample to table
    table_keys = ['ScilifeID', 'SubmittedID', 'BarcodeSeq', 'MSequenced', 'MOrdered']

    output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()}
    # Connect and run
    s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url)
    fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url)
    p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url)

    #Get the information source for this project
    source = p_con.get_info_source(project_name)

    # Get project summary from project database
    sample_aliases = _literal_eval_option(sample_aliases, default={})
    prj_summary = p_con.get_entry(project_name)
    if not prj_summary:
        LOG.warn("No such project '{}'".format(project_name))
        return
    LOG.debug("Working on project '{}'.".format(project_name))

    # Determine if project is finished by getting all samples sequenced date
    try:
        all_samples_sequenced = prj_summary['project_summary']['all_samples_sequenced']
    except (TypeError,KeyError):
        all_samples_sequenced = False

    # Get sample run list and loop samples to make mapping sample -> {sampleruns}
    sample_run_list = _set_sample_run_list(project_name, flowcell=None, project_alias=project_alias, s_con=s_con)
    samples = {}
    for s in sample_run_list:
        prj_sample = p_con.get_project_sample(project_name, s.get("project_sample_name", None))
        if prj_sample:
            sample_name = prj_sample['project_sample'].get("scilife_name", None)
            s_d = {s["name"] : {'sample':sample_name, 'id':s["_id"]}}
            samples.update(s_d)
        else:
            if s["barcode_name"] in sample_aliases:
                s_d = {sample_aliases[s["barcode_name"]] : {'sample':sample_aliases[s["barcode_name"]], 'id':s["_id"]}}
                samples.update(s_d)
            else:
                s_d = {s["name"]:{'sample':s["name"], 'id':s["_id"], 'barcode_name':s["barcode_name"]}}
                LOG.warn("No mapping found for sample run:\n  '{}'".format(s_d))

    # Convert to mapping from desired sample name to list of aliases
    # Less important for the moment; one solution is to update the
    # Google docs summary table to use the P names
    sample_dict = prj_summary['samples']
    param.update({key:prj_summary.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys()})
    param["ordered_amount"] = param.get("ordered_amount", p_con.get_ordered_amount(project_name, samples=sample_dict))

    if not param.get('customer_reference') :
        try:
            param['customer_reference'] = prj_summary['details']['customer_project_reference']
        except (TypeError,KeyError):
            param['customer_reference'] = prj_summary.get('customer_reference')
    param['uppnex_project_id'] = param.get('uppnex_project_id', prj_summary.get('uppnex_id'))

    # Override database values if options passed at command line
    if uppnex_id:
        param["uppnex_project_id"] = uppnex_id
    if customer_reference:
        param["customer_reference"] = customer_reference

    # Process options
    ordered_million_reads = _literal_eval_option(ordered_million_reads)
    exclude_sample_ids = _literal_eval_option(exclude_sample_ids, default={})

    ## Start collecting the data
    sample_table = []
    samples_excluded = []
    last_library_preps = p_con.get_latest_library_prep(project_name)
    last_library_preps_srm = [x for l in last_library_preps.values() for x in l]
    LOG.debug("Looping through sample map that maps project sample names to sample run metrics ids")
    for k,v in samples.items():
        LOG.debug("project sample '{}' maps to '{}'".format(k, v))
        if not include_all_samples:
            if v['sample'] not in last_library_preps.keys():
                LOG.info("No library prep information for sample {}; keeping in report".format(v['sample']))
            else:
                if k not in last_library_preps_srm:
                    LOG.info("Sample run {} ('{}') is not latest library prep ({}) for project sample {}: excluding from report".format(k, v["id"], ",".join(list(set(last_library_preps[v['sample']].values()))), v['sample']))
                    continue
        else:
            pass

        if re.search("Unexpected", k):
            continue
        barcode_seq = s_con.get_entry(k, "sequence")
        # Exclude sample id?
        if _exclude_sample_id(exclude_sample_ids, v['sample'], barcode_seq):
            samples_excluded.append(v['sample'])
            continue
        # Get the project sample name from the sample run and set table values
        project_sample = sample_dict[v['sample']]
        vals = _set_sample_table_values(v['sample'], project_sample, barcode_seq, ordered_million_reads, param)
        sample_table.append([vals[k] for k in table_keys])

    # Loop through samples in sample_dict for which there is no sample run information
    samples_in_table_or_excluded = list(set([x[0] for x in sample_table])) + samples_excluded
    samples_not_in_table = list(set(sample_dict.keys()) - set(samples_in_table_or_excluded))
    for sample in samples_not_in_table:
        if re.search("Unexpected", sample):
            continue
        project_sample = sample_dict[sample]
        # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id
        project_sample_d = _set_project_sample_dict(project_sample, source)
        if project_sample_d:
            for k,v in project_sample_d.iteritems():
                barcode_seq = s_con.get_entry(k, "sequence")
                vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param)
                sample_table.append([vals[k] for k in table_keys])
        else:
            barcode_seq = None
            vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param)
            sample_table.append([vals[k] for k in table_keys])
    if all_samples_sequenced: param["finished"] = 'All samples for this project have been sequenced.'
    sample_table.sort()
    sample_table = list(sample_table for sample_table,_ in itertools.groupby(sample_table))
    sample_table.insert(0, ['ScilifeID', 'SubmittedID', 'BarcodeSeq', 'MSequenced', 'MOrdered'])

    return output_data, sample_table, param
Ejemplo n.º 2
0
def _project_status_note_table(project_name=None,
                               username=None,
                               password=None,
                               url=None,
                               use_ps_map=True,
                               use_bc_map=False,
                               check_consistency=False,
                               ordered_million_reads=None,
                               uppnex_id=None,
                               customer_reference=None,
                               exclude_sample_ids={},
                               project_alias=None,
                               sample_aliases={},
                               projectdb="projects",
                               samplesdb="samples",
                               flowcelldb="flowcells",
                               include_all_samples=False,
                               param={},
                               **kw):

    # mapping project_summary to parameter keys
    ps_to_parameter = {
        "scilife_name": "scilife_name",
        "customer_name": "customer_name",
        "project_name": "project_name"
    }
    # mapping project sample to table
    table_keys = [
        'ScilifeID', 'SubmittedID', 'BarcodeSeq', 'MSequenced', 'MOrdered'
    ]

    output_data = {
        'stdout': StringIO(),
        'stderr': StringIO(),
        'debug': StringIO()
    }
    # Connect and run
    s_con = SampleRunMetricsConnection(dbname=samplesdb,
                                       username=username,
                                       password=password,
                                       url=url)
    fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb,
                                          username=username,
                                          password=password,
                                          url=url)
    p_con = ProjectSummaryConnection(dbname=projectdb,
                                     username=username,
                                     password=password,
                                     url=url)

    #Get the information source for this project
    source = p_con.get_info_source(project_name)

    # Get project summary from project database
    sample_aliases = _literal_eval_option(sample_aliases, default={})
    prj_summary = p_con.get_entry(project_name)
    if not prj_summary:
        LOG.warn("No such project '{}'".format(project_name))
        return
    LOG.debug("Working on project '{}'.".format(project_name))

    # Determine if project is finished by getting all samples sequenced date
    try:
        all_samples_sequenced = prj_summary['project_summary'][
            'all_samples_sequenced']
    except (TypeError, KeyError):
        all_samples_sequenced = False

    # Get sample run list and loop samples to make mapping sample -> {sampleruns}
    sample_run_list = _set_sample_run_list(project_name,
                                           flowcell=None,
                                           project_alias=project_alias,
                                           s_con=s_con)
    samples = {}
    for s in sample_run_list:
        prj_sample = p_con.get_project_sample(
            project_name, s.get("project_sample_name", None))
        if prj_sample:
            sample_name = prj_sample['project_sample'].get(
                "scilife_name", None)
            s_d = {s["name"]: {'sample': sample_name, 'id': s["_id"]}}
            samples.update(s_d)
        else:
            if s["barcode_name"] in sample_aliases:
                s_d = {
                    sample_aliases[s["barcode_name"]]: {
                        'sample': sample_aliases[s["barcode_name"]],
                        'id': s["_id"]
                    }
                }
                samples.update(s_d)
            else:
                s_d = {
                    s["name"]: {
                        'sample': s["name"],
                        'id': s["_id"],
                        'barcode_name': s["barcode_name"]
                    }
                }
                LOG.warn(
                    "No mapping found for sample run:\n  '{}'".format(s_d))

    # Convert to mapping from desired sample name to list of aliases
    # Less important for the moment; one solution is to update the
    # Google docs summary table to use the P names
    sample_dict = prj_summary['samples']
    param.update({
        key: prj_summary.get(ps_to_parameter[key], None)
        for key in ps_to_parameter.keys()
    })
    param["ordered_amount"] = param.get(
        "ordered_amount",
        p_con.get_ordered_amount(project_name, samples=sample_dict))

    if not param.get('customer_reference'):
        try:
            param['customer_reference'] = prj_summary['details'][
                'customer_project_reference']
        except (TypeError, KeyError):
            param['customer_reference'] = prj_summary.get('customer_reference')
    param['uppnex_project_id'] = param.get('uppnex_project_id',
                                           prj_summary.get('uppnex_id'))

    # Override database values if options passed at command line
    if uppnex_id:
        param["uppnex_project_id"] = uppnex_id
    if customer_reference:
        param["customer_reference"] = customer_reference

    # Process options
    ordered_million_reads = _literal_eval_option(ordered_million_reads)
    exclude_sample_ids = _literal_eval_option(exclude_sample_ids, default={})

    ## Start collecting the data
    sample_table = []
    samples_excluded = []
    last_library_preps = p_con.get_latest_library_prep(project_name)
    last_library_preps_srm = [
        x for l in last_library_preps.values() for x in l
    ]
    LOG.debug(
        "Looping through sample map that maps project sample names to sample run metrics ids"
    )
    for k, v in samples.items():
        LOG.debug("project sample '{}' maps to '{}'".format(k, v))
        if not include_all_samples:
            if v['sample'] not in last_library_preps.keys():
                LOG.info(
                    "No library prep information for sample {}; keeping in report"
                    .format(v['sample']))
            else:
                if k not in last_library_preps_srm:
                    LOG.info(
                        "Sample run {} ('{}') is not latest library prep ({}) for project sample {}: excluding from report"
                        .format(
                            k, v["id"], ",".join(
                                list(
                                    set(last_library_preps[
                                        v['sample']].values()))), v['sample']))
                    continue
        else:
            pass

        if re.search("Unexpected", k):
            continue
        barcode_seq = s_con.get_entry(k, "sequence")
        # Exclude sample id?
        if _exclude_sample_id(exclude_sample_ids, v['sample'], barcode_seq):
            samples_excluded.append(v['sample'])
            continue
        # Get the project sample name from the sample run and set table values
        project_sample = sample_dict[v['sample']]
        vals = _set_sample_table_values(v['sample'], project_sample,
                                        barcode_seq, ordered_million_reads,
                                        param)
        sample_table.append([vals[k] for k in table_keys])

    # Loop through samples in sample_dict for which there is no sample run information
    samples_in_table_or_excluded = list(set([x[0] for x in sample_table
                                             ])) + samples_excluded
    samples_not_in_table = list(
        set(sample_dict.keys()) - set(samples_in_table_or_excluded))
    for sample in samples_not_in_table:
        if re.search("Unexpected", sample):
            continue
        project_sample = sample_dict[sample]
        # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id
        project_sample_d = _set_project_sample_dict(project_sample, source)
        if project_sample_d:
            for k, v in project_sample_d.iteritems():
                barcode_seq = s_con.get_entry(k, "sequence")
                vals = _set_sample_table_values(sample, project_sample,
                                                barcode_seq,
                                                ordered_million_reads, param)
                sample_table.append([vals[k] for k in table_keys])
        else:
            barcode_seq = None
            vals = _set_sample_table_values(sample, project_sample,
                                            barcode_seq, ordered_million_reads,
                                            param)
            sample_table.append([vals[k] for k in table_keys])
    if all_samples_sequenced:
        param["finished"] = 'All samples for this project have been sequenced.'
    sample_table.sort()
    sample_table = list(sample_table
                        for sample_table, _ in itertools.groupby(sample_table))
    sample_table.insert(
        0,
        ['ScilifeID', 'SubmittedID', 'BarcodeSeq', 'MSequenced', 'MOrdered'])

    return output_data, sample_table, param
Ejemplo n.º 3
0
def project_status_note(project_name=None,
                        username=None,
                        password=None,
                        url=None,
                        use_ps_map=True,
                        use_bc_map=False,
                        check_consistency=False,
                        ordered_million_reads=None,
                        uppnex_id=None,
                        customer_reference=None,
                        exclude_sample_ids={},
                        project_alias=None,
                        sample_aliases={},
                        projectdb="projects",
                        samplesdb="samples",
                        flowcelldb="flowcells",
                        include_all_samples=False,
                        **kw):
    """Make a project status note. Used keywords:

    :param project_name: project name
    :param user: db user name
    :param password: db password
    :param url: db url
    :param use_ps_map: use project summary mapping
    :param use_bc_map: use project to barcode name mapping
    :param check_consistency: check consistency between mappings
    :param ordered_million_reads: number of ordered reads in millions
    :param uppnex_id: the uppnex id
    :param customer_reference: customer project name
    :param exclude_sample_ids: exclude some sample ids from project note
    :param project_alias: project alias name
    :param sample_aliases: sample alias names
    :param projectdb: project db name
    :param samplesdb: samples db name
    :param flowcelldb: flowcells db name
    :param include_all_samples: include all samples in report
    """
    # parameters
    parameters = {
        "project_name": project_name,
        "finished": "Not finished, or cannot yet assess if finished.",
    }
    # mapping project_summary to parameter keys
    ps_to_parameter = {
        "scilife_name": "scilife_name",
        "customer_name": "customer_name",
        "project_name": "project_name"
    }
    # mapping project sample to table
    table_keys = [
        'ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered',
        'Status'
    ]

    output_data = {
        'stdout': StringIO(),
        'stderr': StringIO(),
        'debug': StringIO()
    }
    # Connect and run
    s_con = SampleRunMetricsConnection(dbname=samplesdb,
                                       username=username,
                                       password=password,
                                       url=url)
    fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb,
                                          username=username,
                                          password=password,
                                          url=url)
    p_con = ProjectSummaryConnection(dbname=projectdb,
                                     username=username,
                                     password=password,
                                     url=url)

    # Set report paragraphs
    paragraphs = project_note_paragraphs()
    headers = project_note_headers()
    # Set local param variable
    param = parameters

    # Get project summary from project database
    sample_aliases = _literal_eval_option(sample_aliases, default={})
    prj_summary = p_con.get_entry(project_name)
    if not prj_summary:
        LOG.warn("No such project '{}'".format(project_name))
        return
    LOG.debug("Working on project '{}'.".format(project_name))

    # Get sample run list and loop samples to make mapping sample -> {sampleruns}
    sample_run_list = _set_sample_run_list(project_name,
                                           flowcell=None,
                                           project_alias=project_alias,
                                           s_con=s_con)
    samples = {}
    for s in sample_run_list:
        prj_sample = p_con.get_project_sample(
            project_name, s.get("project_sample_name", None))
        if prj_sample:
            sample_name = prj_sample['project_sample'].get(
                "scilife_name", None)
            s_d = {s["name"]: {'sample': sample_name, 'id': s["_id"]}}
            samples.update(s_d)
        else:
            if s["barcode_name"] in sample_aliases:
                s_d = {
                    sample_aliases[s["barcode_name"]]: {
                        'sample': sample_aliases[s["barcode_name"]],
                        'id': s["_id"]
                    }
                }
                samples.update(s_d)
            else:
                s_d = {
                    s["name"]: {
                        'sample': s["name"],
                        'id': s["_id"],
                        'barcode_name': s["barcode_name"]
                    }
                }
                LOG.warn(
                    "No mapping found for sample run:\n  '{}'".format(s_d))

    # Convert to mapping from desired sample name to list of aliases
    # Less important for the moment; one solution is to update the
    # Google docs summary table to use the P names
    sample_dict = prj_summary['samples']
    param.update({
        key: prj_summary.get(ps_to_parameter[key], None)
        for key in ps_to_parameter.keys()
    })
    param["ordered_amount"] = param.get("ordered_amount",
                                        p_con.get_ordered_amount(project_name))
    param['customer_reference'] = param.get(
        'customer_reference', prj_summary.get('customer_reference'))
    param['uppnex_project_id'] = param.get('uppnex_project_id',
                                           prj_summary.get('uppnex_id'))

    # Override database values if options passed at command line
    if uppnex_id:
        param["uppnex_project_id"] = uppnex_id
    if customer_reference:
        param["customer_reference"] = customer_reference

    # Process options
    ordered_million_reads = _literal_eval_option(ordered_million_reads)
    exclude_sample_ids = _literal_eval_option(exclude_sample_ids, default={})

    ## Start collecting the data
    sample_table = []
    samples_excluded = []
    all_passed = True
    last_library_preps = p_con.get_latest_library_prep(project_name)
    last_library_preps_srm = [
        x for l in last_library_preps.values() for x in l
    ]
    LOG.debug(
        "Looping through sample map that maps project sample names to sample run metrics ids"
    )
    for k, v in samples.items():
        LOG.debug("project sample '{}' maps to '{}'".format(k, v))
        if not include_all_samples:
            if v['sample'] not in last_library_preps.keys():
                LOG.info(
                    "No library prep information for sample {}; keeping in report"
                    .format(v['sample']))
            else:
                if k not in last_library_preps_srm:
                    LOG.info(
                        "Sample run {} ('{}') is not latest library prep ({}) for project sample {}: excluding from report"
                        .format(k, v["id"],
                                last_library_preps[v['sample']].values()[0],
                                v['sample']))
                    continue
        else:
            pass

        if re.search("Unexpected", k):
            continue
        barcode_seq = s_con.get_entry(k, "sequence")
        # Exclude sample id?
        if _exclude_sample_id(exclude_sample_ids, v['sample'], barcode_seq):
            samples_excluded.append(v['sample'])
            continue
        # Get the project sample name from the sample run and set table values
        project_sample = sample_dict[v['sample']]
        vals = _set_sample_table_values(v['sample'], project_sample,
                                        barcode_seq, ordered_million_reads,
                                        param)
        if vals['Status'] == "N/A" or vals['Status'] == "NP":
            all_passed = False
        sample_table.append([vals[k] for k in table_keys])

    # Loop through samples in sample_dict for which there is no sample run information
    samples_in_table_or_excluded = list(set([x[0] for x in sample_table
                                             ])) + samples_excluded
    samples_not_in_table = list(
        set(sample_dict.keys()) - set(samples_in_table_or_excluded))
    for sample in samples_not_in_table:
        if re.search("Unexpected", sample):
            continue
        project_sample = sample_dict[sample]
        # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id
        project_sample_d = _set_project_sample_dict(project_sample)
        if project_sample_d:
            for k, v in project_sample_d.iteritems():
                barcode_seq = s_con.get_entry(k, "sequence")
                vals = _set_sample_table_values(sample, project_sample,
                                                barcode_seq,
                                                ordered_million_reads, param)
                if vals['Status'] == "N/A" or vals['Status'] == "NP":
                    all_passed = False
                sample_table.append([vals[k] for k in table_keys])
        else:
            barcode_seq = None
            vals = _set_sample_table_values(sample, project_sample,
                                            barcode_seq, ordered_million_reads,
                                            param)
            if vals['Status'] == "N/A" or vals['Status'] == "NP":
                all_passed = False
            sample_table.append([vals[k] for k in table_keys])
    if all_passed: param["finished"] = 'Project finished.'
    sample_table.sort()
    sample_table = list(sample_table
                        for sample_table, _ in itertools.groupby(sample_table))
    sample_table.insert(0, [
        'ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered',
        'Status'
    ])
    paragraphs["Samples"]["tpl"] = make_sample_table(sample_table)
    make_note("{}_project_summary.pdf".format(project_name), headers,
              paragraphs, **param)
    make_rest_note("{}_project_summary.rst".format(project_name),
                   sample_table=sample_table,
                   report="project_report",
                   **param)
    param.update(
        {k: "N/A"
         for k in param.keys() if param[k] is None or param[k] == ""})
    output_data["debug"].write(
        json.dumps({
            'param': param,
            'table': sample_table
        }))
    return output_data
Ejemplo n.º 4
0
def project_status_note(project_name=None, username=None, password=None, url=None,
                        use_ps_map=True, use_bc_map=False, check_consistency=False,
                        ordered_million_reads=None, uppnex_id=None, customer_reference=None,
                        exclude_sample_ids={}, project_alias=None, sample_aliases={},
                        projectdb="projects", samplesdb="samples", flowcelldb="flowcells",
                        include_all_samples=False, **kw):
    """Make a project status note. Used keywords:

    :param project_name: project name
    :param user: db user name
    :param password: db password
    :param url: db url
    :param use_ps_map: use project summary mapping
    :param use_bc_map: use project to barcode name mapping
    :param check_consistency: check consistency between mappings
    :param ordered_million_reads: number of ordered reads in millions
    :param uppnex_id: the uppnex id
    :param customer_reference: customer project name
    :param exclude_sample_ids: exclude some sample ids from project note
    :param project_alias: project alias name
    :param sample_aliases: sample alias names
    :param projectdb: project db name
    :param samplesdb: samples db name
    :param flowcelldb: flowcells db name
    :param include_all_samples: include all samples in report
    """
    # parameters
    parameters = {
        "project_name" : project_name,
        "finished" : "Not finished, or cannot yet assess if finished.",
        }
    # mapping project_summary to parameter keys
    ps_to_parameter = {"scilife_name":"scilife_name", "customer_name":"customer_name", "project_name":"project_name"}
    # mapping project sample to table
    table_keys = ['ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status']

    output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()}
    # Connect and run
    s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url)
    fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url)
    p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url)

    # Set report paragraphs
    paragraphs = project_note_paragraphs()
    headers = project_note_headers()
    # Set local param variable
    param = parameters
    
    # Get project summary from project database
    sample_aliases = _literal_eval_option(sample_aliases, default={})
    prj_summary = p_con.get_entry(project_name)
    if not prj_summary:
        LOG.warn("No such project '{}'".format(project_name))
        return
    LOG.debug("Working on project '{}'.".format(project_name))

    # Get sample run list and loop samples to make mapping sample -> {sampleruns}
    sample_run_list = _set_sample_run_list(project_name, flowcell=None, project_alias=project_alias, s_con=s_con)
    samples = {}
    for s in sample_run_list:
        prj_sample = p_con.get_project_sample(project_name, s.get("project_sample_name", None))
        if prj_sample:
            sample_name = prj_sample['project_sample'].get("scilife_name", None)
            s_d = {s["name"] : {'sample':sample_name, 'id':s["_id"]}}
            samples.update(s_d)
        else:
            if s["barcode_name"] in sample_aliases:
                s_d = {sample_aliases[s["barcode_name"]] : {'sample':sample_aliases[s["barcode_name"]], 'id':s["_id"]}}
                samples.update(s_d)
            else:
                s_d = {s["name"]:{'sample':s["name"], 'id':s["_id"], 'barcode_name':s["barcode_name"]}}
                LOG.warn("No mapping found for sample run:\n  '{}'".format(s_d))

    # Convert to mapping from desired sample name to list of aliases
    # Less important for the moment; one solution is to update the
    # Google docs summary table to use the P names
    sample_dict = prj_summary['samples']
    param.update({key:prj_summary.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys()})
    param["ordered_amount"] = param.get("ordered_amount", p_con.get_ordered_amount(project_name))
    param['customer_reference'] = param.get('customer_reference', prj_summary.get('customer_reference'))
    param['uppnex_project_id'] = param.get('uppnex_project_id', prj_summary.get('uppnex_id'))

    # Override database values if options passed at command line
    if uppnex_id:
        param["uppnex_project_id"] = uppnex_id
    if customer_reference:
        param["customer_reference"] = customer_reference

    # Process options
    ordered_million_reads = _literal_eval_option(ordered_million_reads)
    exclude_sample_ids = _literal_eval_option(exclude_sample_ids, default={})

    ## Start collecting the data
    sample_table = []
    samples_excluded = []
    all_passed = True
    last_library_preps = p_con.get_latest_library_prep(project_name)
    last_library_preps_srm = [x for l in last_library_preps.values() for x in l] 
    LOG.debug("Looping through sample map that maps project sample names to sample run metrics ids")
    for k,v in samples.items():
        LOG.debug("project sample '{}' maps to '{}'".format(k, v))
        if not include_all_samples:
            if v['sample'] not in last_library_preps.keys():
                LOG.info("No library prep information for sample {}; keeping in report".format(v['sample']))
            else:
                if k not in last_library_preps_srm:
                    LOG.info("Sample run {} ('{}') is not latest library prep ({}) for project sample {}: excluding from report".format(k, v["id"], last_library_preps[v['sample']].values()[0], v['sample']))
                    continue
        else:
            pass
                    
        if re.search("Unexpected", k):
            continue
        barcode_seq = s_con.get_entry(k, "sequence")
        # Exclude sample id?
        if _exclude_sample_id(exclude_sample_ids, v['sample'], barcode_seq):
            samples_excluded.append(v['sample'])
            continue
        # Get the project sample name from the sample run and set table values
        project_sample = sample_dict[v['sample']]
        vals = _set_sample_table_values(v['sample'], project_sample, barcode_seq, ordered_million_reads, param)
        if vals['Status']=="N/A" or vals['Status']=="NP": all_passed = False
        sample_table.append([vals[k] for k in table_keys])

    # Loop through samples in sample_dict for which there is no sample run information
    samples_in_table_or_excluded = list(set([x[0] for x in sample_table])) + samples_excluded
    samples_not_in_table = list(set(sample_dict.keys()) - set(samples_in_table_or_excluded))
    for sample in samples_not_in_table:
        if re.search("Unexpected", sample):
            continue
        project_sample = sample_dict[sample]
        # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id
        project_sample_d = _set_project_sample_dict(project_sample)
        if project_sample_d:
            for k,v in project_sample_d.iteritems():
                barcode_seq = s_con.get_entry(k, "sequence")
                vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param)
                if vals['Status']=="N/A" or vals['Status']=="NP": all_passed = False
                sample_table.append([vals[k] for k in table_keys])
        else:
            barcode_seq = None
            vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param)
            if vals['Status']=="N/A" or vals['Status']=="NP": all_passed = False
            sample_table.append([vals[k] for k in table_keys])
    if all_passed: param["finished"] = 'Project finished.'
    sample_table.sort()
    sample_table = list(sample_table for sample_table,_ in itertools.groupby(sample_table))
    sample_table.insert(0, ['ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status'])
    paragraphs["Samples"]["tpl"] = make_sample_table(sample_table)
    make_note("{}_project_summary.pdf".format(project_name), headers, paragraphs, **param)
    make_rest_note("{}_project_summary.rst".format(project_name), sample_table=sample_table, report="project_report", **param)
    param.update({k:"N/A" for k in param.keys() if param[k] is None or param[k] ==  ""})
    output_data["debug"].write(json.dumps({'param':param, 'table':sample_table}))
    return output_data
Ejemplo n.º 5
0
class TestDbConnection(unittest.TestCase):
    def setUp(self):
        self.user = "******"
        self.pw = "pw"
        self.url = "localhost"
        self.examples = {
            "sample": "1_120924_AC003CCCXX_TGACCA",
            "flowcell": "AC003CCCXX",
            "project": "J.Doe_00_01"
        }
        self.p_con = ProjectSummaryConnection(dbname="projects-test",
                                              username=self.user,
                                              password=self.pw,
                                              url=self.url)

    def test_connection(self):
        """Test database connection"""
        sample_con = SampleRunMetricsConnection(dbname="samples-test",
                                                username=self.user,
                                                password=self.pw,
                                                url=self.url)
        self.assertEqual(sample_con.url_string,
                         "http://{}:5984".format(self.url))

    def test_get_flowcell(self):
        """Test getting a flowcell for a given sample"""
        sample_con = SampleRunMetricsConnection(dbname="samples-test",
                                                username=self.user,
                                                password=self.pw,
                                                url=self.url)
        fc = sample_con.get_entry(self.examples["sample"], "flowcell")
        self.assertEqual(str(fc), self.examples["flowcell"])

    def test_get_sample_ids(self):
        """Test getting sample ids given flowcell and sample_prj"""
        sample_con = SampleRunMetricsConnection(dbname="samples-test",
                                                username=self.user,
                                                password=self.pw,
                                                url=self.url)
        sample_ids = sample_con.get_sample_ids(fc_id=self.examples["flowcell"])
        LOG.info("Number of samples before subsetting: " +
                 str(len(sample_ids)))
        self.assertEqual(len(sample_ids), 5)
        sample_ids = sample_con.get_sample_ids(
            fc_id=self.examples["flowcell"],
            sample_prj=self.examples["project"])
        LOG.info("Number of samples after subsetting: " + str(len(sample_ids)))
        self.assertEqual(len(sample_ids), 2)

    def test_get_samples(self):
        """Test getting samples given flowcell and sample_prj."""
        sample_con = SampleRunMetricsConnection(dbname="samples-test",
                                                username=self.user,
                                                password=self.pw,
                                                url=self.url)

        samples = sample_con.get_samples(fc_id=self.examples["flowcell"])
        LOG.info("Selecting on flowcell: " + str(len(samples)))
        self.assertEqual(len(samples), 5)
        samples = sample_con.get_samples(fc_id=self.examples["flowcell"],
                                         sample_prj=self.examples["project"])
        LOG.info("Selecting on flowcell, subsetting on project: " +
                 str(len(samples)))
        self.assertEqual(len(samples), 2)

        samples = sample_con.get_samples(sample_prj=self.examples["project"])
        LOG.info("Selecting on project: " + str(len(samples)))
        self.assertEqual(len(samples), 3)
        samples = sample_con.get_samples(sample_prj=self.examples["project"],
                                         fc_id=self.examples["flowcell"])
        LOG.info("Selecting on project, subsetting on flowcell: " +
                 str(len(samples)))
        self.assertEqual(len(samples), 2)

    def test_get_samples_wrong_info(self):
        """Test getting samples when either flowcell or project id information is wrong"""
        sample_con = SampleRunMetricsConnection(dbname="samples-test",
                                                username=self.user,
                                                password=self.pw,
                                                url=self.url)

        samples = sample_con.get_samples(sample_prj="bogusproject",
                                         fc_id=self.examples["flowcell"])
        LOG.info("Selecting on bogus project, subsetting on flowcell: " +
                 str(len(samples)))
        self.assertEqual(len(samples), 0)

    def test_get_project_sample_ids(self):
        """Test getting project sample ids"""
        sample_con = SampleRunMetricsConnection(dbname="samples-test",
                                                username=self.user,
                                                password=self.pw,
                                                url=self.url)
        sample_ids = sample_con.get_sample_ids(
            sample_prj=self.examples["project"])
        sample_names = [sample_con.db.get(x)["name"] for x in sample_ids]
        self.assertEqual(
            set(sample_names),
            set([
                '1_120924_AC003CCCXX_TGACCA', '2_120924_AC003CCCXX_ACAGTG',
                '1_121015_BB002BBBXX_TGACCA'
            ]))

    def test_get_latest_library_prep(self):
        """Test getting latest library prep"""
        prj = self.p_con.get_entry("J.Doe_00_01")
        prj['samples']['P001_102']['library_prep']['B'] = {
            'sample_run_metrics': {
                '2_120924_AC003CCCXX_TTGGAA': None
            }
        }
        self.p_con.save(prj)
        preps = self.p_con.get_latest_library_prep(
            project_name=self.examples["project"])
        srm = [x for l in preps.values() for x in l]
        # Make sure A prep not in list
        self.assertNotIn('2_120924_AC003CCCXX_ACAGTG', srm)
        # Make sure B prep in list
        self.assertIn('2_120924_AC003CCCXX_TTGGAA', srm)
        # Reset data
        prj = self.p_con.get_entry("J.Doe_00_01")
        del prj['samples']['P001_102']['library_prep']['B']
        self.p_con.save(prj)

    def test_get_barcode_lane_statistics(self):
        """Test getting barcode lane statistics from flowcell database"""
        fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test",
                                              username="******",
                                              password="******")
        # Try getting wrong sample name, should return None
        data = fc_con.get_barcode_lane_statistics("J.Doe_00_01",
                                                  "P001_101_index6",
                                                  "120924_AC003CCCXX", "1")
        self.assertEqual(data, (None, None))
        data = fc_con.get_barcode_lane_statistics("J.Doe_00_01",
                                                  "P001_101_index3",
                                                  "120924_AC003CCCXX", "1")
        self.assertEqual(data, (u'35.22', u'90.05'))
Ejemplo n.º 6
0
class TestDbConnection(unittest.TestCase):
    def setUp(self):
        self.user = "******"
        self.pw = "pw"
        self.url = "localhost"
        self.examples = {"sample": "1_120924_AC003CCCXX_TGACCA", "flowcell": "AC003CCCXX", "project": "J.Doe_00_01"}
        self.p_con = ProjectSummaryConnection(
            dbname="projects-test", username=self.user, password=self.pw, url=self.url
        )

    def test_connection(self):
        """Test database connection"""
        sample_con = SampleRunMetricsConnection(
            dbname="samples-test", username=self.user, password=self.pw, url=self.url
        )
        self.assertEqual(sample_con.url_string, "http://{}:5984".format(self.url))

    def test_get_flowcell(self):
        """Test getting a flowcell for a given sample"""
        sample_con = SampleRunMetricsConnection(
            dbname="samples-test", username=self.user, password=self.pw, url=self.url
        )
        fc = sample_con.get_entry(self.examples["sample"], "flowcell")
        self.assertEqual(str(fc), self.examples["flowcell"])

    def test_get_sample_ids(self):
        """Test getting sample ids given flowcell and sample_prj"""
        sample_con = SampleRunMetricsConnection(
            dbname="samples-test", username=self.user, password=self.pw, url=self.url
        )
        sample_ids = sample_con.get_sample_ids(fc_id=self.examples["flowcell"])
        LOG.info("Number of samples before subsetting: " + str(len(sample_ids)))
        self.assertEqual(len(sample_ids), 4)
        sample_ids = sample_con.get_sample_ids(fc_id=self.examples["flowcell"], sample_prj=self.examples["project"])
        LOG.info("Number of samples after subsetting: " + str(len(sample_ids)))
        self.assertEqual(len(sample_ids), 2)

    def test_get_samples(self):
        """Test getting samples given flowcell and sample_prj."""
        sample_con = SampleRunMetricsConnection(
            dbname="samples-test", username=self.user, password=self.pw, url=self.url
        )

        samples = sample_con.get_samples(fc_id=self.examples["flowcell"])
        LOG.info("Selecting on flowcell: " + str(len(samples)))
        self.assertEqual(len(samples), 4)
        samples = sample_con.get_samples(fc_id=self.examples["flowcell"], sample_prj=self.examples["project"])
        LOG.info("Selecting on flowcell, subsetting on project: " + str(len(samples)))
        self.assertEqual(len(samples), 2)

        samples = sample_con.get_samples(sample_prj=self.examples["project"])
        LOG.info("Selecting on project: " + str(len(samples)))
        self.assertEqual(len(samples), 3)
        samples = sample_con.get_samples(sample_prj=self.examples["project"], fc_id=self.examples["flowcell"])
        LOG.info("Selecting on project, subsetting on flowcell: " + str(len(samples)))
        self.assertEqual(len(samples), 2)

    def test_get_samples_wrong_info(self):
        """Test getting samples when either flowcell or project id information is wrong"""
        sample_con = SampleRunMetricsConnection(
            dbname="samples-test", username=self.user, password=self.pw, url=self.url
        )

        samples = sample_con.get_samples(sample_prj="bogusproject", fc_id=self.examples["flowcell"])
        LOG.info("Selecting on bogus project, subsetting on flowcell: " + str(len(samples)))
        self.assertEqual(len(samples), 0)

    def test_get_project_sample_ids(self):
        """Test getting project sample ids"""
        sample_con = SampleRunMetricsConnection(
            dbname="samples-test", username=self.user, password=self.pw, url=self.url
        )
        sample_ids = sample_con.get_sample_ids(sample_prj=self.examples["project"])
        sample_names = [sample_con.db.get(x)["name"] for x in sample_ids]
        self.assertEqual(
            set(sample_names),
            set(["1_120924_AC003CCCXX_TGACCA", "2_120924_AC003CCCXX_ACAGTG", "1_121015_BB002BBBXX_TGACCA"]),
        )

    def test_get_latest_library_prep(self):
        """Test getting latest library prep"""
        prj = self.p_con.get_entry("J.Doe_00_01")
        prj["samples"]["P001_102"]["library_prep"]["B"] = {"sample_run_metrics": {"2_120924_AC003CCCXX_TTGGAA": None}}
        self.p_con.save(prj)
        preps = self.p_con.get_latest_library_prep(project_name=self.examples["project"])
        srm = [x for l in preps.values() for x in l]
        # Make sure A prep not in list
        self.assertNotIn("2_120924_AC003CCCXX_ACAGTG", srm)
        # Make sure B prep in list
        self.assertIn("2_120924_AC003CCCXX_TTGGAA", srm)
        # Reset data
        prj = self.p_con.get_entry("J.Doe_00_01")
        del prj["samples"]["P001_102"]["library_prep"]["B"]
        self.p_con.save(prj)
Ejemplo n.º 7
0
class TestDbConnection(unittest.TestCase):
    def setUp(self):
        self.user = "******"
        self.pw = "pw"
        self.url = "localhost"
        self.examples = {"sample":"1_120924_AC003CCCXX_TGACCA",
                         "flowcell":"AC003CCCXX",
                         "project":"J.Doe_00_01"}
        self.p_con = ProjectSummaryConnection(dbname="projects-test", username=self.user, password=self.pw, url=self.url)

    def test_connection(self):
        """Test database connection"""
        sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url)
        self.assertEqual(sample_con.url_string, "http://{}:5984".format(self.url))

    def test_get_flowcell(self):
        """Test getting a flowcell for a given sample"""
        sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url)
        fc = sample_con.get_entry(self.examples["sample"], "flowcell")
        self.assertEqual(str(fc), self.examples["flowcell"])

    def test_get_sample_ids(self):
        """Test getting sample ids given flowcell and sample_prj"""
        sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url)
        sample_ids = sample_con.get_sample_ids(fc_id=self.examples["flowcell"])
        LOG.info("Number of samples before subsetting: " + str(len(sample_ids)))
        self.assertEqual(len(sample_ids), 5)
        sample_ids = sample_con.get_sample_ids(fc_id=self.examples["flowcell"], sample_prj=self.examples["project"])
        LOG.info( "Number of samples after subsetting: " + str(len(sample_ids)))
        self.assertEqual(len(sample_ids), 2)

    def test_get_samples(self):
        """Test getting samples given flowcell and sample_prj."""
        sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url)

        samples = sample_con.get_samples(fc_id=self.examples["flowcell"])
        LOG.info("Selecting on flowcell: " + str(len(samples)))
        self.assertEqual(len(samples), 5)
        samples = sample_con.get_samples(fc_id=self.examples["flowcell"], sample_prj=self.examples["project"])
        LOG.info("Selecting on flowcell, subsetting on project: " + str(len(samples)))
        self.assertEqual(len(samples), 2)

        samples = sample_con.get_samples(sample_prj=self.examples["project"])
        LOG.info("Selecting on project: " + str(len(samples)))
        self.assertEqual(len(samples), 3)
        samples = sample_con.get_samples(sample_prj=self.examples["project"], fc_id=self.examples["flowcell"])
        LOG.info("Selecting on project, subsetting on flowcell: " + str(len(samples)))
        self.assertEqual(len(samples), 2)

    def test_get_samples_wrong_info(self):
        """Test getting samples when either flowcell or project id information is wrong"""
        sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url)

        samples = sample_con.get_samples(sample_prj="bogusproject", fc_id=self.examples["flowcell"])
        LOG.info("Selecting on bogus project, subsetting on flowcell: " + str(len(samples)))
        self.assertEqual(len(samples), 0)
        
                
    def test_get_project_sample_ids(self):
        """Test getting project sample ids"""
        sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url)
        sample_ids = sample_con.get_sample_ids(sample_prj=self.examples["project"])
        sample_names = [sample_con.db.get(x)["name"] for x in sample_ids]
        self.assertEqual(set(sample_names) , set(['1_120924_AC003CCCXX_TGACCA', '2_120924_AC003CCCXX_ACAGTG', '1_121015_BB002BBBXX_TGACCA']))
        
    def test_get_latest_library_prep(self):
        """Test getting latest library prep"""
        prj = self.p_con.get_entry("J.Doe_00_01")
        prj['samples']['P001_102']['library_prep']['B'] = {'sample_run_metrics': {'2_120924_AC003CCCXX_TTGGAA': None}}
        self.p_con.save(prj)
        preps = self.p_con.get_latest_library_prep(project_name=self.examples["project"])
        srm = [x for l in preps.values() for x in l]
        # Make sure A prep not in list
        self.assertNotIn('2_120924_AC003CCCXX_ACAGTG', srm)
        # Make sure B prep in list
        self.assertIn('2_120924_AC003CCCXX_TTGGAA', srm)
        # Reset data
        prj = self.p_con.get_entry("J.Doe_00_01")
        del prj['samples']['P001_102']['library_prep']['B']
        self.p_con.save(prj)

    def test_get_barcode_lane_statistics(self):
        """Test getting barcode lane statistics from flowcell database"""
        fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******")
        # Try getting wrong sample name, should return None
        data = fc_con.get_barcode_lane_statistics("J.Doe_00_01", "P001_101_index6", "120924_AC003CCCXX", "1")
        self.assertEqual(data, (None, None))
        data = fc_con.get_barcode_lane_statistics("J.Doe_00_01", "P001_101_index3", "120924_AC003CCCXX", "1")
        self.assertEqual(data, (u'35.22', u'90.05'))