def _project_status_note_table(project_name=None, username=None, password=None, url=None, use_ps_map=True, use_bc_map=False, check_consistency=False, ordered_million_reads=None, uppnex_id=None, customer_reference=None, exclude_sample_ids={}, project_alias=None, sample_aliases={}, projectdb="projects", samplesdb="samples", flowcelldb="flowcells", include_all_samples=False, param={}, **kw): # mapping project_summary to parameter keys ps_to_parameter = {"scilife_name":"scilife_name", "customer_name":"customer_name", "project_name":"project_name"} # mapping project sample to table table_keys = ['ScilifeID', 'SubmittedID', 'BarcodeSeq', 'MSequenced', 'MOrdered'] output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()} # Connect and run s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url) fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url) p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url) #Get the information source for this project source = p_con.get_info_source(project_name) # Get project summary from project database sample_aliases = _literal_eval_option(sample_aliases, default={}) prj_summary = p_con.get_entry(project_name) if not prj_summary: LOG.warn("No such project '{}'".format(project_name)) return LOG.debug("Working on project '{}'.".format(project_name)) # Determine if project is finished by getting all samples sequenced date try: all_samples_sequenced = prj_summary['project_summary']['all_samples_sequenced'] except (TypeError,KeyError): all_samples_sequenced = False # Get sample run list and loop samples to make mapping sample -> {sampleruns} sample_run_list = _set_sample_run_list(project_name, flowcell=None, project_alias=project_alias, s_con=s_con) samples = {} for s in sample_run_list: prj_sample = p_con.get_project_sample(project_name, s.get("project_sample_name", None)) if prj_sample: sample_name = prj_sample['project_sample'].get("scilife_name", None) s_d = {s["name"] : {'sample':sample_name, 'id':s["_id"]}} samples.update(s_d) else: if s["barcode_name"] in sample_aliases: s_d = {sample_aliases[s["barcode_name"]] : {'sample':sample_aliases[s["barcode_name"]], 'id':s["_id"]}} samples.update(s_d) else: s_d = {s["name"]:{'sample':s["name"], 'id':s["_id"], 'barcode_name':s["barcode_name"]}} LOG.warn("No mapping found for sample run:\n '{}'".format(s_d)) # Convert to mapping from desired sample name to list of aliases # Less important for the moment; one solution is to update the # Google docs summary table to use the P names sample_dict = prj_summary['samples'] param.update({key:prj_summary.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys()}) param["ordered_amount"] = param.get("ordered_amount", p_con.get_ordered_amount(project_name, samples=sample_dict)) if not param.get('customer_reference') : try: param['customer_reference'] = prj_summary['details']['customer_project_reference'] except (TypeError,KeyError): param['customer_reference'] = prj_summary.get('customer_reference') param['uppnex_project_id'] = param.get('uppnex_project_id', prj_summary.get('uppnex_id')) # Override database values if options passed at command line if uppnex_id: param["uppnex_project_id"] = uppnex_id if customer_reference: param["customer_reference"] = customer_reference # Process options ordered_million_reads = _literal_eval_option(ordered_million_reads) exclude_sample_ids = _literal_eval_option(exclude_sample_ids, default={}) ## Start collecting the data sample_table = [] samples_excluded = [] last_library_preps = p_con.get_latest_library_prep(project_name) last_library_preps_srm = [x for l in last_library_preps.values() for x in l] LOG.debug("Looping through sample map that maps project sample names to sample run metrics ids") for k,v in samples.items(): LOG.debug("project sample '{}' maps to '{}'".format(k, v)) if not include_all_samples: if v['sample'] not in last_library_preps.keys(): LOG.info("No library prep information for sample {}; keeping in report".format(v['sample'])) else: if k not in last_library_preps_srm: LOG.info("Sample run {} ('{}') is not latest library prep ({}) for project sample {}: excluding from report".format(k, v["id"], ",".join(list(set(last_library_preps[v['sample']].values()))), v['sample'])) continue else: pass if re.search("Unexpected", k): continue barcode_seq = s_con.get_entry(k, "sequence") # Exclude sample id? if _exclude_sample_id(exclude_sample_ids, v['sample'], barcode_seq): samples_excluded.append(v['sample']) continue # Get the project sample name from the sample run and set table values project_sample = sample_dict[v['sample']] vals = _set_sample_table_values(v['sample'], project_sample, barcode_seq, ordered_million_reads, param) sample_table.append([vals[k] for k in table_keys]) # Loop through samples in sample_dict for which there is no sample run information samples_in_table_or_excluded = list(set([x[0] for x in sample_table])) + samples_excluded samples_not_in_table = list(set(sample_dict.keys()) - set(samples_in_table_or_excluded)) for sample in samples_not_in_table: if re.search("Unexpected", sample): continue project_sample = sample_dict[sample] # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id project_sample_d = _set_project_sample_dict(project_sample, source) if project_sample_d: for k,v in project_sample_d.iteritems(): barcode_seq = s_con.get_entry(k, "sequence") vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) sample_table.append([vals[k] for k in table_keys]) else: barcode_seq = None vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) sample_table.append([vals[k] for k in table_keys]) if all_samples_sequenced: param["finished"] = 'All samples for this project have been sequenced.' sample_table.sort() sample_table = list(sample_table for sample_table,_ in itertools.groupby(sample_table)) sample_table.insert(0, ['ScilifeID', 'SubmittedID', 'BarcodeSeq', 'MSequenced', 'MOrdered']) return output_data, sample_table, param
def _project_status_note_table(project_name=None, username=None, password=None, url=None, use_ps_map=True, use_bc_map=False, check_consistency=False, ordered_million_reads=None, uppnex_id=None, customer_reference=None, exclude_sample_ids={}, project_alias=None, sample_aliases={}, projectdb="projects", samplesdb="samples", flowcelldb="flowcells", include_all_samples=False, param={}, **kw): # mapping project_summary to parameter keys ps_to_parameter = { "scilife_name": "scilife_name", "customer_name": "customer_name", "project_name": "project_name" } # mapping project sample to table table_keys = [ 'ScilifeID', 'SubmittedID', 'BarcodeSeq', 'MSequenced', 'MOrdered' ] output_data = { 'stdout': StringIO(), 'stderr': StringIO(), 'debug': StringIO() } # Connect and run s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url) fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url) p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url) #Get the information source for this project source = p_con.get_info_source(project_name) # Get project summary from project database sample_aliases = _literal_eval_option(sample_aliases, default={}) prj_summary = p_con.get_entry(project_name) if not prj_summary: LOG.warn("No such project '{}'".format(project_name)) return LOG.debug("Working on project '{}'.".format(project_name)) # Determine if project is finished by getting all samples sequenced date try: all_samples_sequenced = prj_summary['project_summary'][ 'all_samples_sequenced'] except (TypeError, KeyError): all_samples_sequenced = False # Get sample run list and loop samples to make mapping sample -> {sampleruns} sample_run_list = _set_sample_run_list(project_name, flowcell=None, project_alias=project_alias, s_con=s_con) samples = {} for s in sample_run_list: prj_sample = p_con.get_project_sample( project_name, s.get("project_sample_name", None)) if prj_sample: sample_name = prj_sample['project_sample'].get( "scilife_name", None) s_d = {s["name"]: {'sample': sample_name, 'id': s["_id"]}} samples.update(s_d) else: if s["barcode_name"] in sample_aliases: s_d = { sample_aliases[s["barcode_name"]]: { 'sample': sample_aliases[s["barcode_name"]], 'id': s["_id"] } } samples.update(s_d) else: s_d = { s["name"]: { 'sample': s["name"], 'id': s["_id"], 'barcode_name': s["barcode_name"] } } LOG.warn( "No mapping found for sample run:\n '{}'".format(s_d)) # Convert to mapping from desired sample name to list of aliases # Less important for the moment; one solution is to update the # Google docs summary table to use the P names sample_dict = prj_summary['samples'] param.update({ key: prj_summary.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys() }) param["ordered_amount"] = param.get( "ordered_amount", p_con.get_ordered_amount(project_name, samples=sample_dict)) if not param.get('customer_reference'): try: param['customer_reference'] = prj_summary['details'][ 'customer_project_reference'] except (TypeError, KeyError): param['customer_reference'] = prj_summary.get('customer_reference') param['uppnex_project_id'] = param.get('uppnex_project_id', prj_summary.get('uppnex_id')) # Override database values if options passed at command line if uppnex_id: param["uppnex_project_id"] = uppnex_id if customer_reference: param["customer_reference"] = customer_reference # Process options ordered_million_reads = _literal_eval_option(ordered_million_reads) exclude_sample_ids = _literal_eval_option(exclude_sample_ids, default={}) ## Start collecting the data sample_table = [] samples_excluded = [] last_library_preps = p_con.get_latest_library_prep(project_name) last_library_preps_srm = [ x for l in last_library_preps.values() for x in l ] LOG.debug( "Looping through sample map that maps project sample names to sample run metrics ids" ) for k, v in samples.items(): LOG.debug("project sample '{}' maps to '{}'".format(k, v)) if not include_all_samples: if v['sample'] not in last_library_preps.keys(): LOG.info( "No library prep information for sample {}; keeping in report" .format(v['sample'])) else: if k not in last_library_preps_srm: LOG.info( "Sample run {} ('{}') is not latest library prep ({}) for project sample {}: excluding from report" .format( k, v["id"], ",".join( list( set(last_library_preps[ v['sample']].values()))), v['sample'])) continue else: pass if re.search("Unexpected", k): continue barcode_seq = s_con.get_entry(k, "sequence") # Exclude sample id? if _exclude_sample_id(exclude_sample_ids, v['sample'], barcode_seq): samples_excluded.append(v['sample']) continue # Get the project sample name from the sample run and set table values project_sample = sample_dict[v['sample']] vals = _set_sample_table_values(v['sample'], project_sample, barcode_seq, ordered_million_reads, param) sample_table.append([vals[k] for k in table_keys]) # Loop through samples in sample_dict for which there is no sample run information samples_in_table_or_excluded = list(set([x[0] for x in sample_table ])) + samples_excluded samples_not_in_table = list( set(sample_dict.keys()) - set(samples_in_table_or_excluded)) for sample in samples_not_in_table: if re.search("Unexpected", sample): continue project_sample = sample_dict[sample] # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id project_sample_d = _set_project_sample_dict(project_sample, source) if project_sample_d: for k, v in project_sample_d.iteritems(): barcode_seq = s_con.get_entry(k, "sequence") vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) sample_table.append([vals[k] for k in table_keys]) else: barcode_seq = None vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) sample_table.append([vals[k] for k in table_keys]) if all_samples_sequenced: param["finished"] = 'All samples for this project have been sequenced.' sample_table.sort() sample_table = list(sample_table for sample_table, _ in itertools.groupby(sample_table)) sample_table.insert( 0, ['ScilifeID', 'SubmittedID', 'BarcodeSeq', 'MSequenced', 'MOrdered']) return output_data, sample_table, param
def project_status_note(project_name=None, username=None, password=None, url=None, use_ps_map=True, use_bc_map=False, check_consistency=False, ordered_million_reads=None, uppnex_id=None, customer_reference=None, exclude_sample_ids={}, project_alias=None, sample_aliases={}, projectdb="projects", samplesdb="samples", flowcelldb="flowcells", include_all_samples=False, **kw): """Make a project status note. Used keywords: :param project_name: project name :param user: db user name :param password: db password :param url: db url :param use_ps_map: use project summary mapping :param use_bc_map: use project to barcode name mapping :param check_consistency: check consistency between mappings :param ordered_million_reads: number of ordered reads in millions :param uppnex_id: the uppnex id :param customer_reference: customer project name :param exclude_sample_ids: exclude some sample ids from project note :param project_alias: project alias name :param sample_aliases: sample alias names :param projectdb: project db name :param samplesdb: samples db name :param flowcelldb: flowcells db name :param include_all_samples: include all samples in report """ # parameters parameters = { "project_name": project_name, "finished": "Not finished, or cannot yet assess if finished.", } # mapping project_summary to parameter keys ps_to_parameter = { "scilife_name": "scilife_name", "customer_name": "customer_name", "project_name": "project_name" } # mapping project sample to table table_keys = [ 'ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status' ] output_data = { 'stdout': StringIO(), 'stderr': StringIO(), 'debug': StringIO() } # Connect and run s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url) fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url) p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url) # Set report paragraphs paragraphs = project_note_paragraphs() headers = project_note_headers() # Set local param variable param = parameters # Get project summary from project database sample_aliases = _literal_eval_option(sample_aliases, default={}) prj_summary = p_con.get_entry(project_name) if not prj_summary: LOG.warn("No such project '{}'".format(project_name)) return LOG.debug("Working on project '{}'.".format(project_name)) # Get sample run list and loop samples to make mapping sample -> {sampleruns} sample_run_list = _set_sample_run_list(project_name, flowcell=None, project_alias=project_alias, s_con=s_con) samples = {} for s in sample_run_list: prj_sample = p_con.get_project_sample( project_name, s.get("project_sample_name", None)) if prj_sample: sample_name = prj_sample['project_sample'].get( "scilife_name", None) s_d = {s["name"]: {'sample': sample_name, 'id': s["_id"]}} samples.update(s_d) else: if s["barcode_name"] in sample_aliases: s_d = { sample_aliases[s["barcode_name"]]: { 'sample': sample_aliases[s["barcode_name"]], 'id': s["_id"] } } samples.update(s_d) else: s_d = { s["name"]: { 'sample': s["name"], 'id': s["_id"], 'barcode_name': s["barcode_name"] } } LOG.warn( "No mapping found for sample run:\n '{}'".format(s_d)) # Convert to mapping from desired sample name to list of aliases # Less important for the moment; one solution is to update the # Google docs summary table to use the P names sample_dict = prj_summary['samples'] param.update({ key: prj_summary.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys() }) param["ordered_amount"] = param.get("ordered_amount", p_con.get_ordered_amount(project_name)) param['customer_reference'] = param.get( 'customer_reference', prj_summary.get('customer_reference')) param['uppnex_project_id'] = param.get('uppnex_project_id', prj_summary.get('uppnex_id')) # Override database values if options passed at command line if uppnex_id: param["uppnex_project_id"] = uppnex_id if customer_reference: param["customer_reference"] = customer_reference # Process options ordered_million_reads = _literal_eval_option(ordered_million_reads) exclude_sample_ids = _literal_eval_option(exclude_sample_ids, default={}) ## Start collecting the data sample_table = [] samples_excluded = [] all_passed = True last_library_preps = p_con.get_latest_library_prep(project_name) last_library_preps_srm = [ x for l in last_library_preps.values() for x in l ] LOG.debug( "Looping through sample map that maps project sample names to sample run metrics ids" ) for k, v in samples.items(): LOG.debug("project sample '{}' maps to '{}'".format(k, v)) if not include_all_samples: if v['sample'] not in last_library_preps.keys(): LOG.info( "No library prep information for sample {}; keeping in report" .format(v['sample'])) else: if k not in last_library_preps_srm: LOG.info( "Sample run {} ('{}') is not latest library prep ({}) for project sample {}: excluding from report" .format(k, v["id"], last_library_preps[v['sample']].values()[0], v['sample'])) continue else: pass if re.search("Unexpected", k): continue barcode_seq = s_con.get_entry(k, "sequence") # Exclude sample id? if _exclude_sample_id(exclude_sample_ids, v['sample'], barcode_seq): samples_excluded.append(v['sample']) continue # Get the project sample name from the sample run and set table values project_sample = sample_dict[v['sample']] vals = _set_sample_table_values(v['sample'], project_sample, barcode_seq, ordered_million_reads, param) if vals['Status'] == "N/A" or vals['Status'] == "NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) # Loop through samples in sample_dict for which there is no sample run information samples_in_table_or_excluded = list(set([x[0] for x in sample_table ])) + samples_excluded samples_not_in_table = list( set(sample_dict.keys()) - set(samples_in_table_or_excluded)) for sample in samples_not_in_table: if re.search("Unexpected", sample): continue project_sample = sample_dict[sample] # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id project_sample_d = _set_project_sample_dict(project_sample) if project_sample_d: for k, v in project_sample_d.iteritems(): barcode_seq = s_con.get_entry(k, "sequence") vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) if vals['Status'] == "N/A" or vals['Status'] == "NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) else: barcode_seq = None vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) if vals['Status'] == "N/A" or vals['Status'] == "NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) if all_passed: param["finished"] = 'Project finished.' sample_table.sort() sample_table = list(sample_table for sample_table, _ in itertools.groupby(sample_table)) sample_table.insert(0, [ 'ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status' ]) paragraphs["Samples"]["tpl"] = make_sample_table(sample_table) make_note("{}_project_summary.pdf".format(project_name), headers, paragraphs, **param) make_rest_note("{}_project_summary.rst".format(project_name), sample_table=sample_table, report="project_report", **param) param.update( {k: "N/A" for k in param.keys() if param[k] is None or param[k] == ""}) output_data["debug"].write( json.dumps({ 'param': param, 'table': sample_table })) return output_data
def project_status_note(project_name=None, username=None, password=None, url=None, use_ps_map=True, use_bc_map=False, check_consistency=False, ordered_million_reads=None, uppnex_id=None, customer_reference=None, exclude_sample_ids={}, project_alias=None, sample_aliases={}, projectdb="projects", samplesdb="samples", flowcelldb="flowcells", include_all_samples=False, **kw): """Make a project status note. Used keywords: :param project_name: project name :param user: db user name :param password: db password :param url: db url :param use_ps_map: use project summary mapping :param use_bc_map: use project to barcode name mapping :param check_consistency: check consistency between mappings :param ordered_million_reads: number of ordered reads in millions :param uppnex_id: the uppnex id :param customer_reference: customer project name :param exclude_sample_ids: exclude some sample ids from project note :param project_alias: project alias name :param sample_aliases: sample alias names :param projectdb: project db name :param samplesdb: samples db name :param flowcelldb: flowcells db name :param include_all_samples: include all samples in report """ # parameters parameters = { "project_name" : project_name, "finished" : "Not finished, or cannot yet assess if finished.", } # mapping project_summary to parameter keys ps_to_parameter = {"scilife_name":"scilife_name", "customer_name":"customer_name", "project_name":"project_name"} # mapping project sample to table table_keys = ['ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status'] output_data = {'stdout':StringIO(), 'stderr':StringIO(), 'debug':StringIO()} # Connect and run s_con = SampleRunMetricsConnection(dbname=samplesdb, username=username, password=password, url=url) fc_con = FlowcellRunMetricsConnection(dbname=flowcelldb, username=username, password=password, url=url) p_con = ProjectSummaryConnection(dbname=projectdb, username=username, password=password, url=url) # Set report paragraphs paragraphs = project_note_paragraphs() headers = project_note_headers() # Set local param variable param = parameters # Get project summary from project database sample_aliases = _literal_eval_option(sample_aliases, default={}) prj_summary = p_con.get_entry(project_name) if not prj_summary: LOG.warn("No such project '{}'".format(project_name)) return LOG.debug("Working on project '{}'.".format(project_name)) # Get sample run list and loop samples to make mapping sample -> {sampleruns} sample_run_list = _set_sample_run_list(project_name, flowcell=None, project_alias=project_alias, s_con=s_con) samples = {} for s in sample_run_list: prj_sample = p_con.get_project_sample(project_name, s.get("project_sample_name", None)) if prj_sample: sample_name = prj_sample['project_sample'].get("scilife_name", None) s_d = {s["name"] : {'sample':sample_name, 'id':s["_id"]}} samples.update(s_d) else: if s["barcode_name"] in sample_aliases: s_d = {sample_aliases[s["barcode_name"]] : {'sample':sample_aliases[s["barcode_name"]], 'id':s["_id"]}} samples.update(s_d) else: s_d = {s["name"]:{'sample':s["name"], 'id':s["_id"], 'barcode_name':s["barcode_name"]}} LOG.warn("No mapping found for sample run:\n '{}'".format(s_d)) # Convert to mapping from desired sample name to list of aliases # Less important for the moment; one solution is to update the # Google docs summary table to use the P names sample_dict = prj_summary['samples'] param.update({key:prj_summary.get(ps_to_parameter[key], None) for key in ps_to_parameter.keys()}) param["ordered_amount"] = param.get("ordered_amount", p_con.get_ordered_amount(project_name)) param['customer_reference'] = param.get('customer_reference', prj_summary.get('customer_reference')) param['uppnex_project_id'] = param.get('uppnex_project_id', prj_summary.get('uppnex_id')) # Override database values if options passed at command line if uppnex_id: param["uppnex_project_id"] = uppnex_id if customer_reference: param["customer_reference"] = customer_reference # Process options ordered_million_reads = _literal_eval_option(ordered_million_reads) exclude_sample_ids = _literal_eval_option(exclude_sample_ids, default={}) ## Start collecting the data sample_table = [] samples_excluded = [] all_passed = True last_library_preps = p_con.get_latest_library_prep(project_name) last_library_preps_srm = [x for l in last_library_preps.values() for x in l] LOG.debug("Looping through sample map that maps project sample names to sample run metrics ids") for k,v in samples.items(): LOG.debug("project sample '{}' maps to '{}'".format(k, v)) if not include_all_samples: if v['sample'] not in last_library_preps.keys(): LOG.info("No library prep information for sample {}; keeping in report".format(v['sample'])) else: if k not in last_library_preps_srm: LOG.info("Sample run {} ('{}') is not latest library prep ({}) for project sample {}: excluding from report".format(k, v["id"], last_library_preps[v['sample']].values()[0], v['sample'])) continue else: pass if re.search("Unexpected", k): continue barcode_seq = s_con.get_entry(k, "sequence") # Exclude sample id? if _exclude_sample_id(exclude_sample_ids, v['sample'], barcode_seq): samples_excluded.append(v['sample']) continue # Get the project sample name from the sample run and set table values project_sample = sample_dict[v['sample']] vals = _set_sample_table_values(v['sample'], project_sample, barcode_seq, ordered_million_reads, param) if vals['Status']=="N/A" or vals['Status']=="NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) # Loop through samples in sample_dict for which there is no sample run information samples_in_table_or_excluded = list(set([x[0] for x in sample_table])) + samples_excluded samples_not_in_table = list(set(sample_dict.keys()) - set(samples_in_table_or_excluded)) for sample in samples_not_in_table: if re.search("Unexpected", sample): continue project_sample = sample_dict[sample] # Set project_sample_d: a dictionary mapping from sample run metrics name to sample run metrics database id project_sample_d = _set_project_sample_dict(project_sample) if project_sample_d: for k,v in project_sample_d.iteritems(): barcode_seq = s_con.get_entry(k, "sequence") vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) if vals['Status']=="N/A" or vals['Status']=="NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) else: barcode_seq = None vals = _set_sample_table_values(sample, project_sample, barcode_seq, ordered_million_reads, param) if vals['Status']=="N/A" or vals['Status']=="NP": all_passed = False sample_table.append([vals[k] for k in table_keys]) if all_passed: param["finished"] = 'Project finished.' sample_table.sort() sample_table = list(sample_table for sample_table,_ in itertools.groupby(sample_table)) sample_table.insert(0, ['ScilifeID', 'CustomerID', 'BarcodeSeq', 'MSequenced', 'MOrdered', 'Status']) paragraphs["Samples"]["tpl"] = make_sample_table(sample_table) make_note("{}_project_summary.pdf".format(project_name), headers, paragraphs, **param) make_rest_note("{}_project_summary.rst".format(project_name), sample_table=sample_table, report="project_report", **param) param.update({k:"N/A" for k in param.keys() if param[k] is None or param[k] == ""}) output_data["debug"].write(json.dumps({'param':param, 'table':sample_table})) return output_data
class TestDbConnection(unittest.TestCase): def setUp(self): self.user = "******" self.pw = "pw" self.url = "localhost" self.examples = { "sample": "1_120924_AC003CCCXX_TGACCA", "flowcell": "AC003CCCXX", "project": "J.Doe_00_01" } self.p_con = ProjectSummaryConnection(dbname="projects-test", username=self.user, password=self.pw, url=self.url) def test_connection(self): """Test database connection""" sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url) self.assertEqual(sample_con.url_string, "http://{}:5984".format(self.url)) def test_get_flowcell(self): """Test getting a flowcell for a given sample""" sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url) fc = sample_con.get_entry(self.examples["sample"], "flowcell") self.assertEqual(str(fc), self.examples["flowcell"]) def test_get_sample_ids(self): """Test getting sample ids given flowcell and sample_prj""" sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url) sample_ids = sample_con.get_sample_ids(fc_id=self.examples["flowcell"]) LOG.info("Number of samples before subsetting: " + str(len(sample_ids))) self.assertEqual(len(sample_ids), 5) sample_ids = sample_con.get_sample_ids( fc_id=self.examples["flowcell"], sample_prj=self.examples["project"]) LOG.info("Number of samples after subsetting: " + str(len(sample_ids))) self.assertEqual(len(sample_ids), 2) def test_get_samples(self): """Test getting samples given flowcell and sample_prj.""" sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url) samples = sample_con.get_samples(fc_id=self.examples["flowcell"]) LOG.info("Selecting on flowcell: " + str(len(samples))) self.assertEqual(len(samples), 5) samples = sample_con.get_samples(fc_id=self.examples["flowcell"], sample_prj=self.examples["project"]) LOG.info("Selecting on flowcell, subsetting on project: " + str(len(samples))) self.assertEqual(len(samples), 2) samples = sample_con.get_samples(sample_prj=self.examples["project"]) LOG.info("Selecting on project: " + str(len(samples))) self.assertEqual(len(samples), 3) samples = sample_con.get_samples(sample_prj=self.examples["project"], fc_id=self.examples["flowcell"]) LOG.info("Selecting on project, subsetting on flowcell: " + str(len(samples))) self.assertEqual(len(samples), 2) def test_get_samples_wrong_info(self): """Test getting samples when either flowcell or project id information is wrong""" sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url) samples = sample_con.get_samples(sample_prj="bogusproject", fc_id=self.examples["flowcell"]) LOG.info("Selecting on bogus project, subsetting on flowcell: " + str(len(samples))) self.assertEqual(len(samples), 0) def test_get_project_sample_ids(self): """Test getting project sample ids""" sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url) sample_ids = sample_con.get_sample_ids( sample_prj=self.examples["project"]) sample_names = [sample_con.db.get(x)["name"] for x in sample_ids] self.assertEqual( set(sample_names), set([ '1_120924_AC003CCCXX_TGACCA', '2_120924_AC003CCCXX_ACAGTG', '1_121015_BB002BBBXX_TGACCA' ])) def test_get_latest_library_prep(self): """Test getting latest library prep""" prj = self.p_con.get_entry("J.Doe_00_01") prj['samples']['P001_102']['library_prep']['B'] = { 'sample_run_metrics': { '2_120924_AC003CCCXX_TTGGAA': None } } self.p_con.save(prj) preps = self.p_con.get_latest_library_prep( project_name=self.examples["project"]) srm = [x for l in preps.values() for x in l] # Make sure A prep not in list self.assertNotIn('2_120924_AC003CCCXX_ACAGTG', srm) # Make sure B prep in list self.assertIn('2_120924_AC003CCCXX_TTGGAA', srm) # Reset data prj = self.p_con.get_entry("J.Doe_00_01") del prj['samples']['P001_102']['library_prep']['B'] self.p_con.save(prj) def test_get_barcode_lane_statistics(self): """Test getting barcode lane statistics from flowcell database""" fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******") # Try getting wrong sample name, should return None data = fc_con.get_barcode_lane_statistics("J.Doe_00_01", "P001_101_index6", "120924_AC003CCCXX", "1") self.assertEqual(data, (None, None)) data = fc_con.get_barcode_lane_statistics("J.Doe_00_01", "P001_101_index3", "120924_AC003CCCXX", "1") self.assertEqual(data, (u'35.22', u'90.05'))
class TestDbConnection(unittest.TestCase): def setUp(self): self.user = "******" self.pw = "pw" self.url = "localhost" self.examples = {"sample": "1_120924_AC003CCCXX_TGACCA", "flowcell": "AC003CCCXX", "project": "J.Doe_00_01"} self.p_con = ProjectSummaryConnection( dbname="projects-test", username=self.user, password=self.pw, url=self.url ) def test_connection(self): """Test database connection""" sample_con = SampleRunMetricsConnection( dbname="samples-test", username=self.user, password=self.pw, url=self.url ) self.assertEqual(sample_con.url_string, "http://{}:5984".format(self.url)) def test_get_flowcell(self): """Test getting a flowcell for a given sample""" sample_con = SampleRunMetricsConnection( dbname="samples-test", username=self.user, password=self.pw, url=self.url ) fc = sample_con.get_entry(self.examples["sample"], "flowcell") self.assertEqual(str(fc), self.examples["flowcell"]) def test_get_sample_ids(self): """Test getting sample ids given flowcell and sample_prj""" sample_con = SampleRunMetricsConnection( dbname="samples-test", username=self.user, password=self.pw, url=self.url ) sample_ids = sample_con.get_sample_ids(fc_id=self.examples["flowcell"]) LOG.info("Number of samples before subsetting: " + str(len(sample_ids))) self.assertEqual(len(sample_ids), 4) sample_ids = sample_con.get_sample_ids(fc_id=self.examples["flowcell"], sample_prj=self.examples["project"]) LOG.info("Number of samples after subsetting: " + str(len(sample_ids))) self.assertEqual(len(sample_ids), 2) def test_get_samples(self): """Test getting samples given flowcell and sample_prj.""" sample_con = SampleRunMetricsConnection( dbname="samples-test", username=self.user, password=self.pw, url=self.url ) samples = sample_con.get_samples(fc_id=self.examples["flowcell"]) LOG.info("Selecting on flowcell: " + str(len(samples))) self.assertEqual(len(samples), 4) samples = sample_con.get_samples(fc_id=self.examples["flowcell"], sample_prj=self.examples["project"]) LOG.info("Selecting on flowcell, subsetting on project: " + str(len(samples))) self.assertEqual(len(samples), 2) samples = sample_con.get_samples(sample_prj=self.examples["project"]) LOG.info("Selecting on project: " + str(len(samples))) self.assertEqual(len(samples), 3) samples = sample_con.get_samples(sample_prj=self.examples["project"], fc_id=self.examples["flowcell"]) LOG.info("Selecting on project, subsetting on flowcell: " + str(len(samples))) self.assertEqual(len(samples), 2) def test_get_samples_wrong_info(self): """Test getting samples when either flowcell or project id information is wrong""" sample_con = SampleRunMetricsConnection( dbname="samples-test", username=self.user, password=self.pw, url=self.url ) samples = sample_con.get_samples(sample_prj="bogusproject", fc_id=self.examples["flowcell"]) LOG.info("Selecting on bogus project, subsetting on flowcell: " + str(len(samples))) self.assertEqual(len(samples), 0) def test_get_project_sample_ids(self): """Test getting project sample ids""" sample_con = SampleRunMetricsConnection( dbname="samples-test", username=self.user, password=self.pw, url=self.url ) sample_ids = sample_con.get_sample_ids(sample_prj=self.examples["project"]) sample_names = [sample_con.db.get(x)["name"] for x in sample_ids] self.assertEqual( set(sample_names), set(["1_120924_AC003CCCXX_TGACCA", "2_120924_AC003CCCXX_ACAGTG", "1_121015_BB002BBBXX_TGACCA"]), ) def test_get_latest_library_prep(self): """Test getting latest library prep""" prj = self.p_con.get_entry("J.Doe_00_01") prj["samples"]["P001_102"]["library_prep"]["B"] = {"sample_run_metrics": {"2_120924_AC003CCCXX_TTGGAA": None}} self.p_con.save(prj) preps = self.p_con.get_latest_library_prep(project_name=self.examples["project"]) srm = [x for l in preps.values() for x in l] # Make sure A prep not in list self.assertNotIn("2_120924_AC003CCCXX_ACAGTG", srm) # Make sure B prep in list self.assertIn("2_120924_AC003CCCXX_TTGGAA", srm) # Reset data prj = self.p_con.get_entry("J.Doe_00_01") del prj["samples"]["P001_102"]["library_prep"]["B"] self.p_con.save(prj)
class TestDbConnection(unittest.TestCase): def setUp(self): self.user = "******" self.pw = "pw" self.url = "localhost" self.examples = {"sample":"1_120924_AC003CCCXX_TGACCA", "flowcell":"AC003CCCXX", "project":"J.Doe_00_01"} self.p_con = ProjectSummaryConnection(dbname="projects-test", username=self.user, password=self.pw, url=self.url) def test_connection(self): """Test database connection""" sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url) self.assertEqual(sample_con.url_string, "http://{}:5984".format(self.url)) def test_get_flowcell(self): """Test getting a flowcell for a given sample""" sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url) fc = sample_con.get_entry(self.examples["sample"], "flowcell") self.assertEqual(str(fc), self.examples["flowcell"]) def test_get_sample_ids(self): """Test getting sample ids given flowcell and sample_prj""" sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url) sample_ids = sample_con.get_sample_ids(fc_id=self.examples["flowcell"]) LOG.info("Number of samples before subsetting: " + str(len(sample_ids))) self.assertEqual(len(sample_ids), 5) sample_ids = sample_con.get_sample_ids(fc_id=self.examples["flowcell"], sample_prj=self.examples["project"]) LOG.info( "Number of samples after subsetting: " + str(len(sample_ids))) self.assertEqual(len(sample_ids), 2) def test_get_samples(self): """Test getting samples given flowcell and sample_prj.""" sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url) samples = sample_con.get_samples(fc_id=self.examples["flowcell"]) LOG.info("Selecting on flowcell: " + str(len(samples))) self.assertEqual(len(samples), 5) samples = sample_con.get_samples(fc_id=self.examples["flowcell"], sample_prj=self.examples["project"]) LOG.info("Selecting on flowcell, subsetting on project: " + str(len(samples))) self.assertEqual(len(samples), 2) samples = sample_con.get_samples(sample_prj=self.examples["project"]) LOG.info("Selecting on project: " + str(len(samples))) self.assertEqual(len(samples), 3) samples = sample_con.get_samples(sample_prj=self.examples["project"], fc_id=self.examples["flowcell"]) LOG.info("Selecting on project, subsetting on flowcell: " + str(len(samples))) self.assertEqual(len(samples), 2) def test_get_samples_wrong_info(self): """Test getting samples when either flowcell or project id information is wrong""" sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url) samples = sample_con.get_samples(sample_prj="bogusproject", fc_id=self.examples["flowcell"]) LOG.info("Selecting on bogus project, subsetting on flowcell: " + str(len(samples))) self.assertEqual(len(samples), 0) def test_get_project_sample_ids(self): """Test getting project sample ids""" sample_con = SampleRunMetricsConnection(dbname="samples-test", username=self.user, password=self.pw, url=self.url) sample_ids = sample_con.get_sample_ids(sample_prj=self.examples["project"]) sample_names = [sample_con.db.get(x)["name"] for x in sample_ids] self.assertEqual(set(sample_names) , set(['1_120924_AC003CCCXX_TGACCA', '2_120924_AC003CCCXX_ACAGTG', '1_121015_BB002BBBXX_TGACCA'])) def test_get_latest_library_prep(self): """Test getting latest library prep""" prj = self.p_con.get_entry("J.Doe_00_01") prj['samples']['P001_102']['library_prep']['B'] = {'sample_run_metrics': {'2_120924_AC003CCCXX_TTGGAA': None}} self.p_con.save(prj) preps = self.p_con.get_latest_library_prep(project_name=self.examples["project"]) srm = [x for l in preps.values() for x in l] # Make sure A prep not in list self.assertNotIn('2_120924_AC003CCCXX_ACAGTG', srm) # Make sure B prep in list self.assertIn('2_120924_AC003CCCXX_TTGGAA', srm) # Reset data prj = self.p_con.get_entry("J.Doe_00_01") del prj['samples']['P001_102']['library_prep']['B'] self.p_con.save(prj) def test_get_barcode_lane_statistics(self): """Test getting barcode lane statistics from flowcell database""" fc_con = FlowcellRunMetricsConnection(dbname="flowcells-test", username="******", password="******") # Try getting wrong sample name, should return None data = fc_con.get_barcode_lane_statistics("J.Doe_00_01", "P001_101_index6", "120924_AC003CCCXX", "1") self.assertEqual(data, (None, None)) data = fc_con.get_barcode_lane_statistics("J.Doe_00_01", "P001_101_index3", "120924_AC003CCCXX", "1") self.assertEqual(data, (u'35.22', u'90.05'))