def best_practice_note(project_name=None, samples=None, capture_kit="agilent_v4", application="seqcap", flist=[], sample_name_map=None, **kw): """Make a best practice application note. NB: currently only works for seqcap application. :param project_name: project name :param samples: samples to work on. Defaults to all samples. :param application: chosen application """ param = parameters output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()} if application not in BEST_PRACTICE_NOTES: LOG.warn("No such application '{}'. Valid choices are: \n\t{}".format(application, "\n\t".join(BEST_PRACTICE_NOTES))) if application == "seqcap": df, samples_df = _get_seqcap_summary(flist, kw.get("amplicon", False)) software_df = _get_software_table(flist) database_df = _get_database_table(flist, post_process=kw.get("post_process", None)) if sample_name_map: samples_df.CustomerName = [sample_name_map[s]['customer_name'] for s in samples_df.Sample] df.Total = _format_num_reads(df.Total) ttab = _indent_texttable_for_rst(_dataframe_to_texttable(df[["Sample"] + SEQCAP_TABLE_COLUMNS[1:5]], align=["left", "right", "right", "right", "right"])) ttab_target = _indent_texttable_for_rst(_dataframe_to_texttable(df[["Sample"] + SEQCAP_TABLE_COLUMNS[5:9]], align=["left", "right", "right", "right", "right"])) ttab_dbsnp = _indent_texttable_for_rst(_dataframe_to_texttable(df[["Sample"] + SEQCAP_TABLE_COLUMNS[9:14]], align=["left", "right", "right", "right", "right", "right"])) ttab_samples = _indent_texttable_for_rst(_dataframe_to_texttable(samples_df[["Sample", "CustomerName", "Sequence"]], align=["left", "right", "right"])) ttab_software = _indent_texttable_for_rst(_dataframe_to_texttable(software_df, align=["left", "right"])) ttab_database = _indent_texttable_for_rst(_dataframe_to_texttable(database_df, align=["left", "right"])) param.update({'project_summary':ttab, 'project_target_summary':ttab_target, 'project_dbsnp_summary':ttab_dbsnp, 'table_sample_summary':ttab_samples, 'capturekit':SEQCAP_KITS[capture_kit], 'software_versions_table':ttab_software, 'database_versions_table': ttab_database}) param['project_name'] = project_name if project_name else kw.get("statusdb_project_name", None) # Add applications here else: pass # Generic rest call for all templates make_rest_note("{}_best_practice.rst".format(kw.get("project", None)), report="bp_seqcap", outdir=kw.get("basedir", os.curdir), **param) return output_data
def project_status_note(project_name=None, username=None, password=None, url=None, use_ps_map=True, use_bc_map=False, check_consistency=False, ordered_million_reads=None, uppnex_id=None, customer_reference=None, exclude_sample_ids={}, project_alias=None, sample_aliases={}, projectdb="projects", samplesdb="samples", flowcelldb="flowcells", include_all_samples=False, flat_table=False, **kw): """Make a project status note. Used keywords: :param project_name: project name :param user: db user name :param password: db password :param url: db url :param use_ps_map: use project summary mapping :param use_bc_map: use project to barcode name mapping :param check_consistency: check consistency between mappings :param ordered_million_reads: number of ordered reads in millions :param uppnex_id: the uppnex id :param customer_reference: customer project name :param exclude_sample_ids: exclude some sample ids from project note :param project_alias: project alias name :param sample_aliases: sample alias names :param projectdb: project db name :param samplesdb: samples db name :param flowcelldb: flowcells db name :param include_all_samples: include all samples in report :param flat_table: Just create a simple tab-separated version of the table instead of the fancy pdf """ # parameters parameters = { "project_name": project_name, "finished": "", } output_data, sample_table, param = _project_status_note_table( project_name, username, password, url, use_ps_map, use_bc_map, check_consistency, ordered_million_reads, uppnex_id, customer_reference, exclude_sample_ids, project_alias, sample_aliases, projectdb, samplesdb, flowcelldb, include_all_samples, parameters, **kw) if not flat_table: # Set report paragraphs paragraphs = project_note_paragraphs() headers = project_note_headers() #Hack: removes Comments paragraph if it is empty if not param["finished"]: paragraphs.pop("Comments", None) paragraphs["Samples"]["tpl"] = make_sample_table(sample_table) make_note("{}_project_summary.pdf".format(project_name), headers, paragraphs, **param) make_rest_note("{}_project_summary.rst".format(project_name), sample_table=sample_table, report="project_report", **param) else: # Write tab-separated output sample_table[0].insert(0, 'ProjectID') table_cols = [ sample_table[0].index(col) for col in [ 'ProjectID', 'ScilifeID', 'SubmittedID', 'BarcodeSeq', 'MSequenced' ] ] outfile = "{}_project_summary.csv".format(project_name) with open(outfile, "w") as outh: csvw = csv.writer(outh) for i, sample in enumerate(sample_table): if i > 0: sample.insert(0, project_name) data = [str(sample[col]) for col in table_cols] csvw.writerow(data) output_data['stdout'].write("{}\n".format("\t".join(data))) param.update( {k: "N/A" for k in param.keys() if param[k] is None or param[k] == ""}) output_data["debug"].write( json.dumps({ 'param': param, 'table': sample_table })) return output_data
def project_status_note(project_name=None, username=None, password=None, url=None, use_ps_map=True, use_bc_map=False, check_consistency=False, ordered_million_reads=None, uppnex_id=None, customer_reference=None, exclude_sample_ids={}, project_alias=None, sample_aliases={}, projectdb="projects", samplesdb="samples", flowcelldb="flowcells", include_all_samples=False, flat_table=False, **kw): """Make a project status note. Used keywords: :param project_name: project name :param user: db user name :param password: db password :param url: db url :param use_ps_map: use project summary mapping :param use_bc_map: use project to barcode name mapping :param check_consistency: check consistency between mappings :param ordered_million_reads: number of ordered reads in millions :param uppnex_id: the uppnex id :param customer_reference: customer project name :param exclude_sample_ids: exclude some sample ids from project note :param project_alias: project alias name :param sample_aliases: sample alias names :param projectdb: project db name :param samplesdb: samples db name :param flowcelldb: flowcells db name :param include_all_samples: include all samples in report :param flat_table: Just create a simple tab-separated version of the table instead of the fancy pdf """ # parameters parameters = { "project_name" : project_name, "finished" : "", } output_data, sample_table, param = _project_status_note_table(project_name, username, password, url, use_ps_map, use_bc_map, check_consistency, ordered_million_reads, uppnex_id, customer_reference, exclude_sample_ids, project_alias, sample_aliases, projectdb, samplesdb, flowcelldb, include_all_samples, parameters, **kw) if not flat_table: # Set report paragraphs paragraphs = project_note_paragraphs() headers = project_note_headers() #Hack: removes Comments paragraph if it is empty if not param["finished"]: paragraphs.pop("Comments",None) paragraphs["Samples"]["tpl"] = make_sample_table(sample_table) make_note("{}_project_summary.pdf".format(project_name), headers, paragraphs, **param) make_rest_note("{}_project_summary.rst".format(project_name), sample_table=sample_table, report="project_report", **param) else: # Write tab-separated output sample_table[0].insert(0,'ProjectID') table_cols = [sample_table[0].index(col) for col in ['ProjectID', 'ScilifeID', 'SubmittedID', 'BarcodeSeq', 'MSequenced']] outfile = "{}_project_summary.csv".format(project_name) with open(outfile,"w") as outh: csvw = csv.writer(outh) for i,sample in enumerate(sample_table): if i > 0: sample.insert(0,project_name) data = [str(sample[col]) for col in table_cols] csvw.writerow(data) output_data['stdout'].write("{}\n".format("\t".join(data))) param.update({k:"N/A" for k in param.keys() if param[k] is None or param[k] == ""}) output_data["debug"].write(json.dumps({'param':param, 'table':sample_table})) return output_data
def project_status_note(project_name=None, username=None, password=None, url=None, use_ps_map=True, use_bc_map=False, check_consistency=False, ordered_million_reads=None, uppnex_id=None, customer_reference=None, exclude_sample_ids={}, project_alias=None, sample_aliases={}, projectdb="projects", samplesdb="samples", flowcelldb="flowcells", include_all_samples=False, **kw): """Make a project status note. Used keywords: :param project_name: project name :param user: db user name :param password: db password :param url: db url :param use_ps_map: use project summary mapping :param use_bc_map: use project to barcode name mapping :param check_consistency: check consistency between mappings :param ordered_million_reads: number of ordered reads in millions :param uppnex_id: the uppnex id :param customer_reference: customer project name :param exclude_sample_ids: exclude some sample ids from project note :param project_alias: project alias name :param sample_aliases: sample alias names :param projectdb: project db name :param samplesdb: samples db name :param flowcelldb: flowcells db name :param include_all_samples: include all samples in report """ # parameters parameters = { "project_name": project_name, "finished": "Not finished, or cannot yet assess if finished.", } # mapping project_summary to parameter keys ps_to_parameter = { "scilife_name": "scilife_name", "customer_name": "customer_name", "project_name": "project_name" } # mapping project sample to table table_keys = [ 'ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status' ] output_data = { 'stdout': StringIO(), 'stderr': StringIO(), 'debug': StringIO() } # Connect and run s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url) fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url) p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url) # Set report paragraphs paragraphs = project_note_paragraphs() headers = project_note_headers() # Set local param variable param = parameters # Get project summary from project database sample_aliases = _literal_eval_option(sample_aliases, default={}) prj_summary = p_con.get_entry(project_name) if not prj_summary: LOG.warn("No such project '{}'".format(project_name)) return LOG.debug("Working on project '{}'.".format(project_name)) # Get sample run list and loop samples to make mapping sample -> {sampleruns} sample_run_list = _set_sample_run_list(project_name, flowcell=None, project_alias=project_alias, s_con=s_con) samples = {} for s in sample_run_list: prj_sample = p_con.get_project_sample( project_name, s.get("project_sample_name", None)) if prj_sample: sample_name = prj_sample['project_sample'].get( "scilife_name", None) s_d = {s["name"]: {'sample': sample_name, 'id': s["_id"]}} samples.update(s_d) else: if s["barcode_name"] in sample_aliases: s_d = { sample_aliases[s["barcode_name"]]: { 'sample': sample_aliases[s["barcode_name"]], 'id': s["_id"] } } samples.update(s_d) else: s_d = { s["name"]: { 'sample': s["name"], 'id': s["_id"], 'barcode_name': s["barcode_name"] } } LOG.warn( "No mapping found for sample run:\n '{}'".format(s_d)) # Convert to mapping from desired sample name to list of aliases # Less important for the moment; one solution is to update the # Google docs summary table to use the P names sample_dict = prj_summary['samples'] param.update({ key: prj_summary.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys() }) param["ordered_amount"] = param.get("ordered_amount", p_con.get_ordered_amount(project_name)) param['customer_reference'] = param.get( 'customer_reference', prj_summary.get('customer_reference')) param['uppnex_project_id'] = param.get('uppnex_project_id', prj_summary.get('uppnex_id')) # Override database values if options passed at command line if uppnex_id: param["uppnex_project_id"] = uppnex_id if customer_reference: param["customer_reference"] = customer_reference # Process options ordered_million_reads = _literal_eval_option(ordered_million_reads) exclude_sample_ids = _literal_eval_option(exclude_sample_ids, default={}) ## Start collecting the data sample_table = [] samples_excluded = [] all_passed = True last_library_preps = p_con.get_latest_library_prep(project_name) last_library_preps_srm = [ x for l in last_library_preps.values() for x in l ] LOG.debug( "Looping through sample map that maps project sample names to sample run metrics ids" ) for k, v in samples.items(): LOG.debug("project sample '{}' maps to '{}'".format(k, v)) if not include_all_samples: if v['sample'] not in last_library_preps.keys(): LOG.info( "No library prep information for sample {}; keeping in report" .format(v['sample'])) else: if k not in last_library_preps_srm: LOG.info( "Sample run {} ('{}') is not latest library prep ({}) for project sample {}: excluding from report" .format(k, v["id"], last_library_preps[v['sample']].values()[0], v['sample'])) continue else: pass if re.search("Unexpected", k): continue barcode_seq = s_con.get_entry(k, "sequence") # Exclude sample id? if _exclude_sample_id(exclude_sample_ids, v['sample'], barcode_seq): samples_excluded.append(v['sample']) continue # Get the project sample name from the sample run and set table values project_sample = sample_dict[v['sample']] vals = _set_sample_table_values(v['sample'], project_sample, barcode_seq, ordered_million_reads, param) if vals['Status'] == "N/A" or vals['Status'] == "NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) # Loop through samples in sample_dict for which there is no sample run information samples_in_table_or_excluded = list(set([x[0] for x in sample_table ])) + samples_excluded samples_not_in_table = list( set(sample_dict.keys()) - set(samples_in_table_or_excluded)) for sample in samples_not_in_table: if re.search("Unexpected", sample): continue project_sample = sample_dict[sample] # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id project_sample_d = _set_project_sample_dict(project_sample) if project_sample_d: for k, v in project_sample_d.iteritems(): barcode_seq = s_con.get_entry(k, "sequence") vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) if vals['Status'] == "N/A" or vals['Status'] == "NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) else: barcode_seq = None vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) if vals['Status'] == "N/A" or vals['Status'] == "NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) if all_passed: param["finished"] = 'Project finished.' sample_table.sort() sample_table = list(sample_table for sample_table, _ in itertools.groupby(sample_table)) sample_table.insert(0, [ 'ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status' ]) paragraphs["Samples"]["tpl"] = make_sample_table(sample_table) make_note("{}_project_summary.pdf".format(project_name), headers, paragraphs, **param) make_rest_note("{}_project_summary.rst".format(project_name), sample_table=sample_table, report="project_report", **param) param.update( {k: "N/A" for k in param.keys() if param[k] is None or param[k] == ""}) output_data["debug"].write( json.dumps({ 'param': param, 'table': sample_table })) return output_data
def project_status_note(project_name=None, username=None, password=None, url=None, use_ps_map=True, use_bc_map=False, check_consistency=False, ordered_million_reads=None, uppnex_id=None, customer_reference=None, exclude_sample_ids={}, project_alias=None, sample_aliases={}, projectdb="projects", samplesdb="samples", flowcelldb="flowcells", include_all_samples=False, **kw): """Make a project status note. Used keywords: :param project_name: project name :param user: db user name :param password: db password :param url: db url :param use_ps_map: use project summary mapping :param use_bc_map: use project to barcode name mapping :param check_consistency: check consistency between mappings :param ordered_million_reads: number of ordered reads in millions :param uppnex_id: the uppnex id :param customer_reference: customer project name :param exclude_sample_ids: exclude some sample ids from project note :param project_alias: project alias name :param sample_aliases: sample alias names :param projectdb: project db name :param samplesdb: samples db name :param flowcelldb: flowcells db name :param include_all_samples: include all samples in report """ # parameters parameters = { "project_name" : project_name, "finished" : "Not finished, or cannot yet assess if finished.", } # mapping project_summary to parameter keys ps_to_parameter = {"scilife_name":"scilife_name", "customer_name":"customer_name", "project_name":"project_name"} # mapping project sample to table table_keys = ['ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status'] output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()} # Connect and run s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url) fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url) p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url) # Set report paragraphs paragraphs = project_note_paragraphs() headers = project_note_headers() # Set local param variable param = parameters # Get project summary from project database sample_aliases = _literal_eval_option(sample_aliases, default={}) prj_summary = p_con.get_entry(project_name) if not prj_summary: LOG.warn("No such project '{}'".format(project_name)) return LOG.debug("Working on project '{}'.".format(project_name)) # Get sample run list and loop samples to make mapping sample -> {sampleruns} sample_run_list = _set_sample_run_list(project_name, flowcell=None, project_alias=project_alias, s_con=s_con) samples = {} for s in sample_run_list: prj_sample = p_con.get_project_sample(project_name, s.get("project_sample_name", None)) if prj_sample: sample_name = prj_sample['project_sample'].get("scilife_name", None) s_d = {s["name"] : {'sample':sample_name, 'id':s["_id"]}} samples.update(s_d) else: if s["barcode_name"] in sample_aliases: s_d = {sample_aliases[s["barcode_name"]] : {'sample':sample_aliases[s["barcode_name"]], 'id':s["_id"]}} samples.update(s_d) else: s_d = {s["name"]:{'sample':s["name"], 'id':s["_id"], 'barcode_name':s["barcode_name"]}} LOG.warn("No mapping found for sample run:\n '{}'".format(s_d)) # Convert to mapping from desired sample name to list of aliases # Less important for the moment; one solution is to update the # Google docs summary table to use the P names sample_dict = prj_summary['samples'] param.update({key:prj_summary.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys()}) param["ordered_amount"] = param.get("ordered_amount", p_con.get_ordered_amount(project_name)) param['customer_reference'] = param.get('customer_reference', prj_summary.get('customer_reference')) param['uppnex_project_id'] = param.get('uppnex_project_id', prj_summary.get('uppnex_id')) # Override database values if options passed at command line if uppnex_id: param["uppnex_project_id"] = uppnex_id if customer_reference: param["customer_reference"] = customer_reference # Process options ordered_million_reads = _literal_eval_option(ordered_million_reads) exclude_sample_ids = _literal_eval_option(exclude_sample_ids, default={}) ## Start collecting the data sample_table = [] samples_excluded = [] all_passed = True last_library_preps = p_con.get_latest_library_prep(project_name) last_library_preps_srm = [x for l in last_library_preps.values() for x in l] LOG.debug("Looping through sample map that maps project sample names to sample run metrics ids") for k,v in samples.items(): LOG.debug("project sample '{}' maps to '{}'".format(k, v)) if not include_all_samples: if v['sample'] not in last_library_preps.keys(): LOG.info("No library prep information for sample {}; keeping in report".format(v['sample'])) else: if k not in last_library_preps_srm: LOG.info("Sample run {} ('{}') is not latest library prep ({}) for project sample {}: excluding from report".format(k, v["id"], last_library_preps[v['sample']].values()[0], v['sample'])) continue else: pass if re.search("Unexpected", k): continue barcode_seq = s_con.get_entry(k, "sequence") # Exclude sample id? if _exclude_sample_id(exclude_sample_ids, v['sample'], barcode_seq): samples_excluded.append(v['sample']) continue # Get the project sample name from the sample run and set table values project_sample = sample_dict[v['sample']] vals = _set_sample_table_values(v['sample'], project_sample, barcode_seq, ordered_million_reads, param) if vals['Status']=="N/A" or vals['Status']=="NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) # Loop through samples in sample_dict for which there is no sample run information samples_in_table_or_excluded = list(set([x[0] for x in sample_table])) + samples_excluded samples_not_in_table = list(set(sample_dict.keys()) - set(samples_in_table_or_excluded)) for sample in samples_not_in_table: if re.search("Unexpected", sample): continue project_sample = sample_dict[sample] # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id project_sample_d = _set_project_sample_dict(project_sample) if project_sample_d: for k,v in project_sample_d.iteritems(): barcode_seq = s_con.get_entry(k, "sequence") vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) if vals['Status']=="N/A" or vals['Status']=="NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) else: barcode_seq = None vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) if vals['Status']=="N/A" or vals['Status']=="NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) if all_passed: param["finished"] = 'Project finished.' sample_table.sort() sample_table = list(sample_table for sample_table,_ in itertools.groupby(sample_table)) sample_table.insert(0, ['ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status']) paragraphs["Samples"]["tpl"] = make_sample_table(sample_table) make_note("{}_project_summary.pdf".format(project_name), headers, paragraphs, **param) make_rest_note("{}_project_summary.rst".format(project_name), sample_table=sample_table, report="project_report", **param) param.update({k:"N/A" for k in param.keys() if param[k] is None or param[k] == ""}) output_data["debug"].write(json.dumps({'param':param, 'table':sample_table})) return output_data