def get_column(client, ssheet, wsheet, column, constraint={}): """Get the content of a specified column, optionally filtering on other columns""" # If the column specified is a name, find the corresponding index try: column = int(column) except ValueError: column = get_column_index(client,ssheet,wsheet,column) # Create a filter mask based on the supplied constraints filter = [True]*row_count(wsheet) for con_name, con_value in constraint.items(): con_column = get_column(client,ssheet,wsheet,con_name) for i,value in enumerate(con_column): filter[i] &= (_to_unicode(value) == _to_unicode(con_value)) # Get the content of the specified column index content_2d = get_cell_content(client, ssheet, wsheet, 0, column, 0, column) # Loop over the content and keep only the rows that have passed the constraint filters content = [] for i,row in enumerate(content_2d): if filter[i]: content.append(row[0]) return content
def get_column_index(client, ssheet, wsheet, name): """Get the index of the column with the specified name, or 0 if no column matches""" header = get_header(client, ssheet, wsheet) for i, column_name in enumerate(header): if _to_unicode(name) == _to_unicode(column_name): return int(i + 1) return 0
def get_folder(client, folder_name): """Get a folder if it exists""" q = gdata.docs.service.DocumentQuery(categories=["folder"], params={"showfolders": "true"}) for entry in client.Query(q.ToUri()).entry or []: if _to_unicode(entry.title.text) == _to_unicode(folder_name): return entry return None
def get_folder(client, folder_name): """Get a folder if it exists""" q = gdata.docs.service.DocumentQuery(categories=['folder'], params={'showfolders': 'true'}) for entry in (client.Query(q.ToUri()).entry or []): if _to_unicode(entry.title.text) == _to_unicode(folder_name): return entry return None
def get_cell_content(client, ssheet, wsheet, \ row_start=0, col_start=0, row_end=0, col_end=0): """Get the text contents of the cells from the supplied spreadsheet and worksheet and from the specified cell range as a two-dimensional list. """ if str(row_start) == '0': row_start = '1' if str(col_start) == '0': col_start = '1' if str(row_end) == '0': row_end = str(row_count(wsheet)) if str(col_end) == '0': col_end = str(column_count(wsheet)) feed = (get_cell_feed(client, ssheet, wsheet, row_start, \ col_start, row_end, col_end) or []) # Get the dimensions of the 2D-list cols = int(col_end) - int(col_start) + 1 content = [] for i, cell in enumerate(feed.entry): r = i // cols c = i - r * cols if c == 0: row = [] content.append(row) row.append(_to_unicode((cell.content.text or ""))) return content
def write_rows(client, ssheet, wsheet, header, rows): """Write the supplied data rows to the worksheet, using the supplied column headers. """ # Get the keys ss_key = get_key(ssheet) ws_key = get_key(wsheet) try: # As a workaround for the InsertRow bugs with column names, # just use single lowercase letters as column headers to start with for i in range(0, len(header)): client.UpdateCell(1, i + 1, chr(97 + i), ss_key, ws_key) # Iterate over the rows and add the data to the worksheet for row in rows: row_data = {} for i, value in enumerate(row): row_data[chr(97 + i)] = unicode(value) client.InsertRow(row_data, ss_key, ws_key) # Lastly, substitute the one-letter header for the real string for i in range(0, len(header)): client.UpdateCell(1, i + 1, _to_unicode(header[i]), ss_key, ws_key) except: return False return True
def get_rows_with_constraint(client, ssheet, wsheet, constraint={}): """Get the content of the rows filtered by some column values""" # Create a filter mask based on the supplied constraints filter = [True]*row_count(wsheet) for con_name, con_value in constraint.items(): con_column = get_column(client,ssheet,wsheet,con_name) for i,value in enumerate(con_column): filter[i] &= (_to_unicode(value) == _to_unicode(con_value)) # Get the content of the entire worksheet content_2d = get_cell_content(client, ssheet, wsheet) # Loop over the content and keep only the rows that have passed the constraint filters content = [] for i,row in enumerate(content_2d): if filter[i]: content.append(row) return content
def get_rows_with_constraint(client, ssheet, wsheet, constraint={}): """Get the content of the rows filtered by some column values""" # Create a filter mask based on the supplied constraints filter_mask = [True] * row_count(wsheet) for con_name, con_value in constraint.items(): con_column = get_column(client, ssheet, wsheet, con_name) for i, value in enumerate(con_column): filter_mask[i] &= ( _to_unicode(value).strip() == _to_unicode(con_value).strip()) # Get the content of the entire worksheet content_2d = get_cell_content(client, ssheet, wsheet) # Loop over the content and keep only the rows that have passed the # constraint filters. content = [] for i, row in enumerate(content_2d): if filter_mask[i]: content.append(row) return content
def _write_to_worksheet(client, ssheet, wsheet_title, rows, header, append, keys=[]): """Generic method to write a set of rows to a worksheet on google docs. """ # Convert the worksheet title to unicode wsheet_title = _to_unicode(wsheet_title) # Add a new worksheet, possibly appending or replacing a pre-existing # worksheet according to the append-flag. wsheet = g_spreadsheet.add_worksheet(client, \ ssheet, \ wsheet_title, \ len(rows) + 1, \ len(header), \ append) if wsheet is None: logger2.error("ERROR: Could not add a worksheet {!r} to " \ "spreadsheet {!r}".format(wsheet_title, ssheet.title.text)) return False # If keys are specified (will correspond to indexes in the header), delete pre-existing rows with matching keys if append and len(keys) > 0: wsheet_data = g_spreadsheet.get_cell_content(client, ssheet, wsheet, '2') wsheet_header = g_spreadsheet.get_header(client, ssheet, wsheet) try: wsheet_indexes = [wsheet_header.index(key) for key in keys] header_indexes = [header.index(key) for key in keys] except ValueError: logger2.warn("WARNING: Could not identify correct header for duplicate detection") else: for row in rows: try: key = "#".join([row[i] for i in header_indexes]) for i, wrow in enumerate(wsheet_data): wkey = "#".join([wrow[j] for j in wsheet_indexes]) if wkey == key: g_spreadsheet.delete_row(client, ssheet, wsheet, i+1) wsheet_data.pop(i) break except: logger2.warn("WARNING: Could not identify/replace duplicate rows") # Write the data to the worksheet success = g_spreadsheet.write_rows(client, ssheet, wsheet, header, rows) if success: logger2.info("Wrote data to the {!r}:{!r} " \ "worksheet".format(ssheet.title.text, wsheet_title)) else: logger2.error("ERROR: Could not write data to the {!r}:{!r} " \ "worksheet".format(ssheet.title.text, wsheet_title)) return success
def _write_to_worksheet(client, ssheet, wsheet_title, rows, header, append): """Generic method to write a set of rows to a worksheet on google docs""" # Convert the worksheet title to unicode wsheet_title = _to_unicode(wsheet_title) # Add a new worksheet, possibly appending or replacing a pre-existing worksheet according to the append-flag wsheet = bcbio.google.spreadsheet.add_worksheet(client, ssheet, wsheet_title, len(rows) + 1, len(header), append) if wsheet is None: log.info("Could not add a worksheet '%s' to spreadsheet '%s'" % (wsheet_title, ssheet.title.text)) return False # Write the data to the worksheet log.info("Adding data to the '%s' worksheet" % (wsheet_title)) return bcbio.google.spreadsheet.write_rows(client, ssheet, wsheet, [col_header[0] for col_header in header], rows)
def add_worksheet(client, ssheet, title, rows=0, cols=0, append=False): """Add a new worksheet with the specified title to the specified spreadsheet. Will overwrite an existing worksheet with the same title unless append is True """ # Check if a worksheet with the same title exists ws = get_worksheet(client, ssheet, title) if ws: # If we're appending, just return the first object in the feed if append: return ws # Otherwise, drop the existing worksheet client.DeleteWorksheet(ws) # Add the desired worksheet return client.AddWorksheet(_to_unicode(title), rows, cols, get_key(ssheet))
def get_spreadsheet(ssheet_title, encoded_credentials): """Connect to Google docs and get a spreadsheet""" # Convert the spreadsheet title to unicode ssheet_title = _to_unicode(ssheet_title) # Create a client class which will make HTTP requests with Google Docs server. client = g_spreadsheet.get_client() bcbio.google.connection.authenticate(client, encoded_credentials) # Locate the spreadsheet ssheet = g_spreadsheet.get_spreadsheet(client, ssheet_title) # Check that we got a result back if not ssheet: logger2.warn("No document with specified title '%s' found in \ GoogleDocs repository" % ssheet_title) return (None, None) return (client, ssheet)
def set_barcode_full_name(self, barcode_full_name): self.barcode_full_name = _to_unicode(barcode_full_name)
def set_barcode_id(self, barcode_id): self.barcode_id = _to_unicode(barcode_id)
def set_description(self, description): self.description = _to_unicode(description)
def set_project(self, project): self.project = get_project_name(_to_unicode(project))
def set_barcode_sequence(self, barcode_sequence): self.barcode_sequence = _to_unicode(barcode_sequence)
def get_proj_inf(project_name_swe,samp_db,proj_db,credentials_file,config_file): logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',filename='proj_coucdb.log',level=logging.INFO) project_name = _replace_ascii(_to_unicode(project_name_swe)) key = find_proj_from_view(proj_db,project_name) if not key: key = uuid4().hex logging.info(str('Handling proj '+project_name+' '+ key)) print key obj={ 'application':'', 'customer_reference':'', 'min_m_reads_per_sample_ordered':'', 'no_of_samples':'', 'entity_type': 'project_summary', 'uppnex_id': '', 'samples': {}, 'project_id': project_name, '_id': key} ### Get minimal #M reads and uppnexid from Genomics Project list print '\nGetting minimal #M reads and uppnexid from Genomics Project list for project ' + project_name_swe config = cl.load_config(config_file) p = pm.ProjectMetaData(project_name,config) if p.project_name == None: p = pm.ProjectMetaData(project_name_swe,config) if p.project_name == None: print project_name+' not found in genomics project list' logging.warning(str('Google Document Genomics Project list: '+project_name+' not found')) else: if p.min_reads_per_sample.strip() !='': obj['min_m_reads_per_sample_ordered'] = float(p.min_reads_per_sample) if p.no_samples.strip() !='': obj['no_of_samples'] = int(p.no_samples) obj['uppnex_id'] = p.uppnex_id obj['application'] = p.application obj['customer_reference'] = p.customer_reference ### Get costumer and Scilife Sample name from _20132_0X_Table for Sample Summary and Reception Control print '\nTrying to find Scilife Sample names from '+project_name_swe+'_20132_0X_Table for Sample Summary and Reception Control' versions = { "01":["Data",'Sample name Scilife (Index included)'], "02":["Sheet1",'Sample name Scilife'], "04":["Reception control",'Complete sample name'], "05":["Reception control",'SciLifeLab ID']} # Load google document client = make_client(credentials_file) feed = bcbio.google.spreadsheet.get_spreadsheets_feed(client,project_name_swe+'_20132', False) #FIXA: Hantera mistakes if len(feed.entry) == 0: ssheet=None logging.warning("Google Document %s: Could not find spreadsheet" % str(project_name_swe+'_20132_XXX')) print "Could not find spreadsheet" else: ssheet = feed.entry[0].title.text version = ssheet.split('_20132_')[1].split(' ')[0].split('_')[0] wsheet = versions[version][0] header = versions[version][1] content, ws_key, ss_key = get_google_document(ssheet, wsheet, credentials_file) # Get Scilife Sample names try: dummy, customer_names_colindex = get_column(content,'Sample name from customer') row_ind, scilife_names_colindex = get_column(content, header) info={} for j,row in enumerate(content): if (j > row_ind): try: cust_name = str(row[customer_names_colindex]).strip() sci_name = str(row[scilife_names_colindex]).strip().replace('-','_') if cust_name != '': info[sci_name] = cust_name except: pass print 'Names found' for scilife_name in info: try: obj['samples'][scilife_name] = {'customer_name': info[scilife_name], 'scilife_name':scilife_name} except: pass except: print 'Names not found' pass ### Get Sample Status from _20158_01_Table for QA HiSeq2000 sequencing results for samples print '\nGetting Sample Status from '+project_name_swe+'_20158_0X_Table for QA HiSeq2000 sequencing results for samples' versions = { "01":['Sample name Scilife',"Total reads per sample","Passed=P/ not passed=NP*",'Sample name from customer'], "02":["Sample name (SciLifeLab)","Total number of reads (Millions)","Based on total number of reads",'Sample name (customer)'], "03":["Sample name (SciLifeLab)","Total number of reads (Millions)","Based on total number of reads",'Sample name (customer)']} # Load google document mistakes = ["_"," _"," ",""] found='FALSE' for m in mistakes: feed = bcbio.google.spreadsheet.get_spreadsheets_feed(client,project_name_swe + m + '20158', False) if len(feed.entry) != 0: try: ssheet = feed.entry[0].title.text version = ssheet.split(str(m+'20158_'))[1].split(' ')[0].split('_')[0] content, ws_key, ss_key = get_google_document(ssheet,"Sheet1",credentials_file) found='TRUE' break except: pass if found=='TRUE': print 'Google document found!' else: print 'Google document NOT found!' logging.warning("Google Document %s: Could not find spreadsheet" % str(project_name_swe+'_20158_XXX')) # Get status etc from loaded document try: dummy, P_NP_colindex = get_column(content,versions[version][2]) dummy, No_reads_sequenced_colindex = get_column(content,versions[version][1]) dummy, customer_names_colindex = get_column(content,versions[version][3]) row_ind, scilife_names_colindex = get_column(content,versions[version][0]) info={} for j,row in enumerate(content): if ( j > row_ind ): try: sci_name=str(row[scilife_names_colindex]).strip() cust_name=str(row[customer_names_colindex]).strip() no_reads=str(row[No_reads_sequenced_colindex]).strip() if sci_name[-1]=='F': status='P' else: status =str(row[P_NP_colindex]).strip() info[sci_name] =[status,no_reads,cust_name] except: pass scilife_names = strip_scilife_name(info.keys()) duplicates = find_duplicates(scilife_names.values()) for key in scilife_names: striped_scilife_name = scilife_names[key] status = info[key][0] m_reads = info[key][1] cust_name = info[key][2] if striped_scilife_name in duplicates: status = 'inconsistent' m_reads = 'inconsistent' try: if obj['samples'].has_key(striped_scilife_name): obj['samples'][striped_scilife_name]['status'] = status obj['samples'][striped_scilife_name]['m_reads_sequenced'] = m_reads else: obj['samples'][striped_scilife_name]= {'customer_name': cust_name, 'scilife_name':striped_scilife_name, 'status':status, 'm_reads_sequenced':m_reads} except: pass except: print 'Status and M reads sequenced not found in '+project_name_swe+'_20158_0X_Table for QA HiSeq2000 sequencing results for samples' pass ### Get _id for sample_run_metrics and bcbb names -- use couchdb views instead.... To be fixed... print '\nGetting _id for sample_run_metrics' info = find_samp_from_view(samp_db,project_name) if len(info.keys())>0: print 'sample_run_metrics found on couchdb for project '+ project_name else: print 'no sample_run_metrics found on couchdb for project '+ project_name logging.warning(str('CouchDB: No sample_run_metrics found for project '+ project_name)) for key in info: scilife_name = strip_scilife_name([info[key][1]])[info[key][1]] if obj['samples'].has_key(scilife_name): if obj['samples'][scilife_name].has_key("sample_run_metrics"): obj['samples'][scilife_name]["sample_run_metrics"][info[key][0]]=key else: obj['samples'][scilife_name]["sample_run_metrics"] = {info[key][0]:key} return obj
def set_name(self, name): self.name = _to_unicode(str(name))
def get_proj_inf(project_name_swe,qc,credentials_file,config_file): logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',filename='ProjectSummary.log',level=logging.INFO)#,datefmt='%Y-%m-%d' project_name = _replace_ascii(_to_unicode(project_name_swe)) key = hashlib.md5(project_name).hexdigest() print key obj={ 'Application':'', 'Customer_reference':'', 'Min_M_reads_per_sample_ordered':'', 'No_of_samples':'', 'Entity_type': 'ProjectSummary', 'Uppnex_id': '', 'Samples': {}, 'Project_id': project_name, 'Entity_version': 0.1, '_id': key} logging.info(str('Handling proj '+project_name+' '+ key)) ### Get minimal #M reads and uppnexid from Genomics Project list print '\nGetting minimal #M reads and uppnexid from Genomics Project list for project ' + project_name_swe config = cl.load_config(config_file) p = pm.ProjectMetaData(project_name,config) if p.project_name==None: p = pm.ProjectMetaData(project_name_swe,config) if p.project_name==None: print project_name+' not found in genomics project list' logging.warning(str('Google Document Genomics Project list: '+project_name+' not found')) else: obj['Min_M_reads_per_sample_ordered'] = float(p.min_reads_per_sample) obj['Uppnex_id'] = p.uppnex_id obj['No_of_samples'] = int(p.no_samples) obj['Application'] = p.application obj['Customer_reference'] = p.customer_reference ### Get costumer and Scilife Sample name from _20132_0X_Table for Sample Summary and Reception Control print '\nTrying to find Scilife Sample names from '+project_name_swe+'_20132_0X_Table for Sample Summary and Reception Control' versions = { "01":["Data",'Sample name Scilife (Index included)'], "02":["Sheet1",'Sample name Scilife'], "04":["Reception control",'Complete sample name'], "05":["Reception control",'SciLifeLab ID']} # Load google document client = make_client(credentials_file) feed = bcbio.google.spreadsheet.get_spreadsheets_feed(client,project_name_swe+'_20132', False) #FIXA: Hantera mistakes if len(feed.entry) == 0: ssheet=None logging.warning("Google Document %s: Could not find spreadsheet" % str(project_name_swe+'_20132_XXX')) print "Could not find spreadsheet" else: ssheet = feed.entry[0].title.text version = ssheet.split('_20132_')[1].split(' ')[0].split('_')[0] wsheet = versions[version][0] header = versions[version][1] content, ws_key, ss_key = get_google_document(ssheet, wsheet, credentials_file) # Get Scilife Sample names try: dummy, customer_names_colindex = get_column(content,'Sample name from customer') row_ind, scilife_names_colindex = get_column(content, header) info={} for j,row in enumerate(content): if (j > row_ind): try: cust_name = str(row[customer_names_colindex]).strip() sci_name = str(row[scilife_names_colindex]).strip().replace('-','_') if cust_name != '': info[sci_name] = cust_name except: pass print 'Names found' for scilife_name in info: try: obj['Samples'][scilife_name] = {'customer_name': info[scilife_name], 'scilife_name':scilife_name} except: pass except: print 'Names not found' pass ### Get Sample Status from _20158_01_Table for QA HiSeq2000 sequencing results for samples print '\nGetting Sample Status from '+project_name_swe+'_20158_0X_Table for QA HiSeq2000 sequencing results for samples' versions = { "01":['Sample name Scilife',"Total reads per sample","Passed=P/ not passed=NP*"], "02":["Sample name (SciLifeLab)","Total number of reads (Millions)","Based on total number of reads"], "03":["Sample name (SciLifeLab)","Total number of reads (Millions)","Based on total number of reads"]} # Load google document mistakes = ["_"," _"," ",""] found='FALSE' for m in mistakes: feed = bcbio.google.spreadsheet.get_spreadsheets_feed(client,project_name_swe + m + '20158', False) if len(feed.entry) == 0: ssheet=None print "Could not find spreadsheet" else: ssheet = feed.entry[0].title.text version = ssheet.split(str(m+'20158_'))[1].split(' ')[0].split('_')[0] wsheet = "Sheet1" try: content, ws_key, ss_key = get_google_document(ssheet,wsheet,credentials_file) found='TRUE' break except: pass if found=='TRUE': print 'Google document found!' else: print 'Google document NOT found!' logging.warning("Google Document %s: Could not find spreadsheet" % str(project_name_swe+'_20158_XXX')) # Get status etc from loaded document try: dummy, P_NP_colindex = get_column(content,versions[version][2]) dummy, No_reads_sequenced_colindex = get_column(content,versions[version][1]) row_ind, scilife_names_colindex = get_column(content,versions[version][0]) info={} for j,row in enumerate(content): if ( j > row_ind ): try: sci_name=str(row[scilife_names_colindex]).strip() no_reads=str(row[No_reads_sequenced_colindex]).strip() if sci_name[-1]=='F': status='P' else: status=str(row[P_NP_colindex]).strip() info[sci_name]=[status,no_reads] except: pass print info scilife_names = strip_scilife_name_prep(info.keys()) duplicates = find_duplicates(scilife_names.values()) for key in scilife_names: striped_scilife_name = scilife_names[key] try: if striped_scilife_name in duplicates: obj['Samples'][striped_scilife_name] = {'status':'inconsistent','M_reads_sequenced':'inconsistent'} elif obj['Samples'].has_key(striped_scilife_name): obj['Samples'][striped_scilife_name]['status'] = info[key][0] obj['Samples'][striped_scilife_name]['M_reads_sequenced'] = info[key][1] except: pass except: print 'Status and M reads sequenced not found in '+project_name_swe+'_20158_0X_Table for QA HiSeq2000 sequencing results for samples' pass ### Get _id for SampleQCMetrics and bcbb names # use couchdb views instead.... To be fixed... print '\nGetting _id for SampleQCMetrics' info={} for key in qc: SampQC = qc.get(key) if SampQC.has_key("entity_type"): if (SampQC["entity_type"] == "SampleQCMetrics") & SampQC.has_key("sample_prj"): if SampQC["sample_prj"] == project_name: info[SampQC["_id"]]=[str(SampQC["name"]).strip(),SampQC["barcode_name"]] for key in info: scilife_name=strip_scilife_name_prep([info[key][1]])[info[key][1]] if obj['Samples'].has_key(scilife_name): if obj['Samples'][scilife_name].has_key("SampleQCMetrics"): obj['Samples'][scilife_name]["SampleQCMetrics"].append(key) else: obj['Samples'][scilife_name]["SampleQCMetrics"] = [key] if obj['Samples'][scilife_name].has_key("bcbb_names"): obj['Samples'][scilife_name]["bcbb_names"].append(info[key][0]) else: obj['Samples'][scilife_name]["bcbb_names"] = [info[key][0]] return obj
def get_proj_inf(project_name_swe, samp_db, proj_db, CREDENTIALS_FILE, config): project_name = _replace_ascii(_to_unicode(project_name_swe)) key = find_proj_from_view(proj_db, project_name) if not key: key = uuid4().hex logger.info("Handling proj %s %s" % (project_name, key)) obj = { "application": "", "customer_reference": "", "min_m_reads_per_sample_ordered": "", "no_of_samples": "", "entity_type": "project_summary", "uppnex_id": "", "samples": {}, "project_name": project_name, "project_id": "", "_id": key, } ### Get minimal #M reads and uppnexid from Genomics Project list logger.debug("Getting minimal #M reads and uppnexid from Genomics Project list for project %s" % project_name_swe) p = pmeta.ProjectMetaData(project_name, config) if p.project_name is None: p = pmeta.ProjectMetaData(project_name_swe, config) if p.project_name is None: logger.warning("Google Document Genomics Project list: %s not found" % project_name) else: if p.min_reads_per_sample.strip() != "": obj["min_m_reads_per_sample_ordered"] = float(p.min_reads_per_sample) if p.no_samples.strip() != "": obj["no_of_samples"] = int(p.no_samples) obj["uppnex_id"] = p.uppnex_id obj["application"] = p.application obj["customer_reference"] = p.customer_reference obj["project_id"] = "P" + p.project_id ### 20132 logger.debug("Trying to find Scilife Sample names from table 20132") versions = { "01": ["Data", "Sample name Scilife (Index included)"], "02": ["Sheet1", "Sample name Scilife"], "04": ["Reception control", "Complete sample name"], "05": ["Reception control", "SciLifeLab ID"], } # Load google document client = make_client(CREDENTIALS_FILE) feed = bcbio.google.spreadsheet.get_spreadsheets_feed(client, project_name_swe + "_20132", False) if len(feed.entry) == 0: ssheet = None logger.warning("Could not find spreadsheet 20132 for %s" % project_name_swe) else: ssheet = feed.entry[0].title.text version = ssheet.split("_20132_")[1].split(" ")[0].split("_")[0] wsheet = versions[version][0] header = versions[version][1] content, ws_key, ss_key = get_google_document(ssheet, wsheet, CREDENTIALS_FILE) logger.debug("Document found") logger.debug(ssheet) # Get Scilife Sample names try: dummy, customer_names_colindex = get_column(content, "Sample name from customer") row_ind, scilife_names_colindex = get_column(content, header) info = {} for j, row in enumerate(content): if j > row_ind: try: cust_name = str(row[customer_names_colindex]).strip() sci_name = str(row[scilife_names_colindex]).strip() if cust_name != "": info[sci_name] = cust_name except: pass logger.debug("Names found") scilife_names, preps = strip_scilife_name(info.keys()) for key in scilife_names: scilife_name = scilife_names[key] prep = preps[key] cust_name = info[key] incoming_QC_status = "F" if "F" in prep else "P" try: obj["samples"][scilife_name] = { "customer_name": cust_name, "scilife_name": scilife_name, "incoming_QC_status": incoming_QC_status, } except: pass except: logger.debug("Names not found") pass ### 20158 logger.debug("Getting Sample Status from table 20158") versions = { "01": [ "Sample name Scilife", "Total reads per sample", "Passed=P/ not passed=NP*", "Sample name from customer", ], "02": [ "Sample name (SciLifeLab)", "Total number of reads (Millions)", "Based on total number of reads after mapping and duplicate removal", "Sample name (customer)", ], "03": [ "Sample name (SciLifeLab)", "Total number of reads (Millions)", "Based on total number of reads after mapping and duplicate removal", "Sample name (customer)", ], } # Load google document mistakes = ["_", " _", " ", ""] found = False for m in mistakes: feed = bcbio.google.spreadsheet.get_spreadsheets_feed(client, project_name_swe + m + "20158", False) if len(feed.entry) != 0: try: ssheet = feed.entry[0].title.text version = ssheet.split(str(m + "20158_"))[1].split(" ")[0].split("_")[0] content, ws_key, ss_key = get_google_document(ssheet, "Sheet1", CREDENTIALS_FILE) found = True break except: pass if found: logger.debug("Google document found") logger.debug(ssheet) else: logger.warning("Could not find spreadsheet 20158 for %s" % project_name_swe) # Get status etc from loaded document try: dummy, P_NP_colindex = get_column(content, versions[version][2]) dummy, No_reads_sequenced_colindex = get_column(content, versions[version][1]) dummy, customer_names_colindex = get_column(content, versions[version][3]) row_ind, scilife_names_colindex = get_column(content, versions[version][0]) info = {} for j, row in enumerate(content): if j > row_ind: try: sci_name = str(row[scilife_names_colindex]).strip() cust_name = str(row[customer_names_colindex]).strip() no_reads = str(row[No_reads_sequenced_colindex]).strip() status = str(row[P_NP_colindex]).strip() info[sci_name] = [status, no_reads, cust_name] except: pass scilife_names, preps = strip_scilife_name(info.keys()) duplicates = find_duplicates(scilife_names.values()) for key in scilife_names: striped_scilife_name = scilife_names[key] status = "inconsistent" if striped_scilife_name in duplicates else info[key][0] m_reads = "inconsistent" if striped_scilife_name in duplicates else info[key][1] cust_name = info[key][2] prep = preps[key] incoming_QC_status = "F" if "F" in prep else "P" if obj["samples"].has_key(striped_scilife_name): obj["samples"][striped_scilife_name]["status"] = status obj["samples"][striped_scilife_name]["m_reads_sequenced"] = m_reads else: obj["samples"][striped_scilife_name] = { "customer_name": cust_name, "scilife_name": striped_scilife_name, "status": status, "m_reads_sequenced": m_reads, "incoming_QC_status": incoming_QC_status, } except: pass ### Get _id for sample_run_metrics logger.debug("Getting _id for sample_run_metrics") info = find_samp_from_view(samp_db, project_name) if len(info.keys()) > 0: logger.debug("sample_run_metrics found on couchdb for project %s" % project_name) else: logger.warning("No sample_run_metrics found for project %s" % project_name) for key in info: sci_name_raw = info[key][1] scilife_name, preps = strip_scilife_name([sci_name_raw]) scilife_name = scilife_name[sci_name_raw] prep = "A" if preps[sci_name_raw].replace("F", "") == "" else preps[sci_name_raw].replace("F", "") if obj["samples"].has_key(scilife_name): if obj["samples"][scilife_name].has_key("library_prep"): if obj["samples"][scilife_name]["library_prep"].has_key(prep): obj["samples"][scilife_name]["library_prep"][prep]["sample_run_metrics"][info[key][0]] = key else: obj["samples"][scilife_name]["library_prep"][prep] = {"sample_run_metrics": {info[key][0]: key}} else: obj["samples"][scilife_name]["library_prep"] = {prep: {"sample_run_metrics": {info[key][0]: key}}} ### 20135 logger.debug("Getting average read length from table 20135") versions = { "04": ["SciLifeLab ID", "Prep version (A, B etc)", "Average size (bp)"], "05": ["SciLifeLab ID", "Prep version (A, B etc)", "Average size (bp)"], "06": ["SciLifeLab ID", "Prep version (A, B etc)", "Average size (bp)"], } mistakes = ["_", "_ ", " _", " ", ""] found = False for m in mistakes: feed = bcbio.google.spreadsheet.get_spreadsheets_feed(client, project_name_swe + m + "20135", False) if len(feed.entry) != 0: ssheet = feed.entry[0].title.text version = ssheet.split("20135")[1].replace("_", " ").lstrip(" ").split(" ")[0] content, ws_key, ss_key = get_google_document(ssheet, "Library QC", CREDENTIALS_FILE) found = True if found: logger.debug("Google document found") logger.debug(ssheet) else: logger.debug("Google document not found") # Get average read length from loaded document try: dummy, Finished_library_col = get_column(content, "Finished library ") dummy, Av_sice_bp_colindex = get_column(content, versions[version][2], Finished_library_col) row_ind, scilife_names_colindex = get_column(content, versions[version][0]) row_ind, prep_colindex = get_column(content, versions[version][1]) info = {} for j, row in enumerate(content): if j > row_ind: try: sci_name = str(row[scilife_names_colindex]).strip() Av_sice = str(row[Av_sice_bp_colindex]).strip() prep = str(row[prep_colindex]).strip() info[sci_name] = [Av_sice, prep] except: pass scilife_names, preps = strip_scilife_name(info.keys()) for key in scilife_names: striped_scilife_name = scilife_names[key] Av_sice = info[key][0] if info[key][1].strip() != "": prep = info[key][1] # KOntrollera!!!!! elif preps[key].strip() != "": prep = preps[key] prep = "A" if preps[key].replace("F", "") == "" else preps[key].replace("F", "") try: if obj["samples"][striped_scilife_name].has_key("library_prep"): obj["samples"][striped_scilife_name]["library_prep"][prep]["average_size_bp"] = Av_sice else: obj["samples"][striped_scilife_name]["library_prep"] = {prep: {"average_size_bp": Av_sice}} except: pass except: pass return obj
def set_barcode_type(self, barcode_type): self.barcode_type = _to_unicode(barcode_type)
def set_genome_build(self, genome_build): self.genome_build = _to_unicode(genome_build)
def set_analysis(self, analysis): self.analysis = _to_unicode(analysis)
def get_proj_inf(WS_projects, project_name_swe, samp_db, proj_db, client, config): project_name = _replace_ascii(_to_unicode(project_name_swe)) key = find_proj_from_view(proj_db, project_name) if not key: key = uuid4().hex logger.info('Handling proj %s %s' % (project_name, key)) obj = { 'application': '', 'customer_reference': '', 'min_m_reads_per_sample_ordered': '', 'no_of_samples': '', 'entity_type': 'project_summary', 'uppnex_id': '', 'samples': {}, 'project_name': project_name, 'project_id': '', '_id': key } ### Genomics Project list p = pmeta.ProjectMetaData(project_name, config) if p.project_name is None: p = pmeta.ProjectMetaData(project_name_swe, config) if p.project_name is None: logger.warning('Google Document Genomics Project list: %s not found' % project_name) else: if p.min_reads_per_sample.strip() != '': obj['min_m_reads_per_sample_ordered'] = float( p.min_reads_per_sample) if p.no_samples.strip() != '': obj['no_of_samples'] = int(p.no_samples) obj['uppnex_id'] = p.uppnex_id obj['application'] = p.application obj['customer_reference'] = p.customer_reference obj['project_id'] = 'P' + p.project_id info = get_20132_info(client, project_name_swe) ### 20132 try: scilife_names, preps = strip_scilife_name(info.keys()) for key in scilife_names: scilife_name = scilife_names[key] prep = preps[key] cust_name = info[key] incoming_QC_status = 'F' if 'F' in prep else 'P' try: obj['samples'][scilife_name] = { 'customer_name': cust_name, 'scilife_name': scilife_name, 'incoming_QC_status': incoming_QC_status } except: pass except: pass ### 20158 info = get_20158_info(client, project_name_swe) try: scilife_names, preps = strip_scilife_name(info.keys()) duplicates = find_duplicates(scilife_names.values()) for key in scilife_names: striped_scilife_name = scilife_names[key] status = 'inconsistent' if striped_scilife_name in duplicates else info[ key][0] m_reads = 'inconsistent' if striped_scilife_name in duplicates else info[ key][1] prep = preps[key] incoming_QC_status = 'F' if 'F' in prep else 'P' if obj['samples'].has_key(striped_scilife_name): obj['samples'][striped_scilife_name]['status'] = status obj['samples'][striped_scilife_name][ 'm_reads_sequenced'] = m_reads else: obj['samples'][striped_scilife_name] = { 'scilife_name': striped_scilife_name, 'status': status, 'm_reads_sequenced': m_reads, 'incoming_QC_status': incoming_QC_status } except: pass ### Get _id for sample_run_metrics info = find_samp_from_view(samp_db, project_name) if len(info.keys()) > 0: logger.debug('sample_run_metrics found on couchdb for project %s' % project_name) else: logger.warning('No sample_run_metrics found for project %s' % project_name) for key in info: sci_name_raw = info[key][1] scilife_name, preps = strip_scilife_name([sci_name_raw]) scilife_name = scilife_name[sci_name_raw] prep = 'A' if preps[sci_name_raw].replace( 'F', '') == '' else preps[sci_name_raw].replace('F', '') if obj['samples'].has_key(scilife_name): if obj['samples'][scilife_name].has_key("library_prep"): if obj['samples'][scilife_name]["library_prep"].has_key(prep): obj['samples'][scilife_name]["library_prep"][prep][ "sample_run_metrics"][info[key][0]] = key else: obj['samples'][scilife_name]["library_prep"][prep] = { "sample_run_metrics": { info[key][0]: key } } else: obj['samples'][scilife_name]["library_prep"] = { prep: { "sample_run_metrics": { info[key][0]: key } } } ### 20135 if WS_projects.has_key(project_name): logger.debug('project run on Work Set') info = WS_projects[project_name] info = get_20135_info(client, project_name_swe, info) scilife_names, preps = strip_scilife_name(info.keys()) for key in scilife_names: striped_scilife_name = scilife_names[key] for prep in info[key]: try: Av_sice = int(float(info[key][prep][0])) prep_status = info[key][prep][1] if obj['samples'][striped_scilife_name].has_key( "library_prep"): if obj['samples'][striped_scilife_name][ "library_prep"].has_key(prep): obj['samples'][striped_scilife_name]["library_prep"][ prep]["average_size_bp"] = Av_sice obj['samples'][striped_scilife_name]["library_prep"][ prep]["prep_status"] = prep_status else: obj['samples'][striped_scilife_name]["library_prep"][ prep] = { "average_size_bp": Av_sice, "prep_status": prep_status } else: obj['samples'][striped_scilife_name]["library_prep"] = { prep: { "average_size_bp": Av_sice, "prep_status": prep_status } } except: pass return obj
def set_name(self, name): self.name = get_sample_name(_to_unicode(name))
def set_full_name(self, name): self.full_name = _to_unicode(name)
def get_proj_inf(project_name_swe, samp_db, proj_db, credentials_file, config_file): logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', filename='proj_coucdb.log', level=logging.INFO) project_name = _replace_ascii(_to_unicode(project_name_swe)) key = find_proj_from_view(proj_db, project_name) if not key: key = uuid4().hex logging.info(str('Handling proj ' + project_name + ' ' + key)) print key obj = { 'application': '', 'customer_reference': '', 'min_m_reads_per_sample_ordered': '', 'no_of_samples': '', 'entity_type': 'project_summary', 'uppnex_id': '', 'samples': {}, 'project_id': project_name, '_id': key } ### Get minimal #M reads and uppnexid from Genomics Project list print '\nGetting minimal #M reads and uppnexid from Genomics Project list for project ' + project_name_swe config = cl.load_config(config_file) p = pm.ProjectMetaData(project_name, config) if p.project_name == None: p = pm.ProjectMetaData(project_name_swe, config) if p.project_name == None: print project_name + ' not found in genomics project list' logging.warning( str('Google Document Genomics Project list: ' + project_name + ' not found')) else: if p.min_reads_per_sample.strip() != '': obj['min_m_reads_per_sample_ordered'] = float( p.min_reads_per_sample) if p.no_samples.strip() != '': obj['no_of_samples'] = int(p.no_samples) obj['uppnex_id'] = p.uppnex_id obj['application'] = p.application obj['customer_reference'] = p.customer_reference ### Get costumer and Scilife Sample name from _20132_0X_Table for Sample Summary and Reception Control print '\nTrying to find Scilife Sample names from ' + project_name_swe + '_20132_0X_Table for Sample Summary and Reception Control' versions = { "01": ["Data", 'Sample name Scilife (Index included)'], "02": ["Sheet1", 'Sample name Scilife'], "04": ["Reception control", 'Complete sample name'], "05": ["Reception control", 'SciLifeLab ID'] } # Load google document client = make_client(credentials_file) feed = bcbio.google.spreadsheet.get_spreadsheets_feed( client, project_name_swe + '_20132', False) #FIXA: Hantera mistakes if len(feed.entry) == 0: ssheet = None logging.warning("Google Document %s: Could not find spreadsheet" % str(project_name_swe + '_20132_XXX')) print "Could not find spreadsheet" else: ssheet = feed.entry[0].title.text version = ssheet.split('_20132_')[1].split(' ')[0].split('_')[0] wsheet = versions[version][0] header = versions[version][1] content, ws_key, ss_key = get_google_document(ssheet, wsheet, credentials_file) # Get Scilife Sample names try: dummy, customer_names_colindex = get_column( content, 'Sample name from customer') row_ind, scilife_names_colindex = get_column(content, header) info = {} for j, row in enumerate(content): if (j > row_ind): try: cust_name = str(row[customer_names_colindex]).strip() sci_name = str( row[scilife_names_colindex]).strip().replace('-', '_') if cust_name != '': info[sci_name] = cust_name except: pass print 'Names found' for scilife_name in info: try: obj['samples'][scilife_name] = { 'customer_name': info[scilife_name], 'scilife_name': scilife_name } except: pass except: print 'Names not found' pass ### Get Sample Status from _20158_01_Table for QA HiSeq2000 sequencing results for samples print '\nGetting Sample Status from ' + project_name_swe + '_20158_0X_Table for QA HiSeq2000 sequencing results for samples' versions = { "01": [ 'Sample name Scilife', "Total reads per sample", "Passed=P/ not passed=NP*", 'Sample name from customer' ], "02": [ "Sample name (SciLifeLab)", "Total number of reads (Millions)", "Based on total number of reads", 'Sample name (customer)' ], "03": [ "Sample name (SciLifeLab)", "Total number of reads (Millions)", "Based on total number of reads", 'Sample name (customer)' ] } # Load google document mistakes = ["_", " _", " ", ""] found = 'FALSE' for m in mistakes: feed = bcbio.google.spreadsheet.get_spreadsheets_feed( client, project_name_swe + m + '20158', False) if len(feed.entry) != 0: try: ssheet = feed.entry[0].title.text version = ssheet.split( str(m + '20158_'))[1].split(' ')[0].split('_')[0] content, ws_key, ss_key = get_google_document( ssheet, "Sheet1", credentials_file) found = 'TRUE' break except: pass if found == 'TRUE': print 'Google document found!' else: print 'Google document NOT found!' logging.warning("Google Document %s: Could not find spreadsheet" % str(project_name_swe + '_20158_XXX')) # Get status etc from loaded document try: dummy, P_NP_colindex = get_column(content, versions[version][2]) dummy, No_reads_sequenced_colindex = get_column( content, versions[version][1]) dummy, customer_names_colindex = get_column(content, versions[version][3]) row_ind, scilife_names_colindex = get_column(content, versions[version][0]) info = {} for j, row in enumerate(content): if (j > row_ind): try: sci_name = str(row[scilife_names_colindex]).strip() cust_name = str(row[customer_names_colindex]).strip() no_reads = str(row[No_reads_sequenced_colindex]).strip() if sci_name[-1] == 'F': status = 'P' else: status = str(row[P_NP_colindex]).strip() info[sci_name] = [status, no_reads, cust_name] except: pass scilife_names = strip_scilife_name(info.keys()) duplicates = find_duplicates(scilife_names.values()) for key in scilife_names: striped_scilife_name = scilife_names[key] status = info[key][0] m_reads = info[key][1] cust_name = info[key][2] if striped_scilife_name in duplicates: status = 'inconsistent' m_reads = 'inconsistent' try: if obj['samples'].has_key(striped_scilife_name): obj['samples'][striped_scilife_name]['status'] = status obj['samples'][striped_scilife_name][ 'm_reads_sequenced'] = m_reads else: obj['samples'][striped_scilife_name] = { 'customer_name': cust_name, 'scilife_name': striped_scilife_name, 'status': status, 'm_reads_sequenced': m_reads } except: pass except: print 'Status and M reads sequenced not found in ' + project_name_swe + '_20158_0X_Table for QA HiSeq2000 sequencing results for samples' pass ### Get _id for sample_run_metrics and bcbb names -- use couchdb views instead.... To be fixed... print '\nGetting _id for sample_run_metrics' info = find_samp_from_view(samp_db, project_name) if len(info.keys()) > 0: print 'sample_run_metrics found on couchdb for project ' + project_name else: print 'no sample_run_metrics found on couchdb for project ' + project_name logging.warning( str('CouchDB: No sample_run_metrics found for project ' + project_name)) for key in info: scilife_name = strip_scilife_name([info[key][1]])[info[key][1]] if obj['samples'].has_key(scilife_name): if obj['samples'][scilife_name].has_key("sample_run_metrics"): obj['samples'][scilife_name]["sample_run_metrics"][info[key] [0]] = key else: obj['samples'][scilife_name]["sample_run_metrics"] = { info[key][0]: key } return obj
def get_proj_inf(WS_projects, project_name_swe, samp_db, proj_db, client, config): project_name = _replace_ascii(_to_unicode(project_name_swe)) key = find_proj_from_view(proj_db, project_name) if not key: key = uuid4().hex logger.info("Handling proj %s %s" % (project_name, key)) obj = { "application": "", "customer_reference": "", "min_m_reads_per_sample_ordered": "", "no_of_samples": "", "entity_type": "project_summary", "uppnex_id": "", "samples": {}, "project_name": project_name, "project_id": "", "_id": key, } ### Genomics Project list p = pmeta.ProjectMetaData(project_name, config) if p.project_name is None: p = pmeta.ProjectMetaData(project_name_swe, config) if p.project_name is None: logger.warning("Google Document Genomics Project list: %s not found" % project_name) else: if p.min_reads_per_sample.strip() != "": obj["min_m_reads_per_sample_ordered"] = float(p.min_reads_per_sample) if p.no_samples.strip() != "": try: obj["no_of_samples"] = int(p.no_samples) except: obj["no_of_samples"] = p.no_samples pass obj["uppnex_id"] = p.uppnex_id obj["application"] = p.application obj["customer_reference"] = p.customer_reference obj["project_id"] = "P" + p.project_id ### 20132 try: info = get_20132_info(client, project_name_swe) scilife_names, preps = strip_scilife_name(info.keys()) for key in scilife_names: scilife_name = scilife_names[key] prep = preps[key] cust_name = info[key] incoming_QC_status = "F" if "F" in prep else "P" try: obj["samples"][scilife_name] = { "customer_name": cust_name, "scilife_name": scilife_name, "incoming_QC_status": incoming_QC_status, } except: pass except: pass ### 20158 try: info = get_20158_info(client, project_name_swe) scilife_names, preps = strip_scilife_name(info.keys()) duplicates = find_duplicates(scilife_names.values()) for key in scilife_names: striped_scilife_name = scilife_names[key] status = "inconsistent" if striped_scilife_name in duplicates else info[key][0] m_reads = "inconsistent" if striped_scilife_name in duplicates else info[key][1] prep = preps[key] incoming_QC_status = "F" if "F" in prep else "P" if obj["samples"].has_key(striped_scilife_name): obj["samples"][striped_scilife_name]["status"] = status obj["samples"][striped_scilife_name]["m_reads_sequenced"] = m_reads else: obj["samples"][striped_scilife_name] = { "scilife_name": striped_scilife_name, "status": status, "m_reads_sequenced": m_reads, "incoming_QC_status": incoming_QC_status, } except: pass ### Get _id for sample_run_metrics info = find_samp_from_view(samp_db, project_name) if len(info.keys()) > 0: logger.debug("sample_run_metrics found on couchdb for project %s" % project_name) else: logger.warning("No sample_run_metrics found for project %s" % project_name) for key in info: sci_name_raw = info[key][1] scilife_name, preps = strip_scilife_name([sci_name_raw]) scilife_name = scilife_name[sci_name_raw] prep = "A" if preps[sci_name_raw].replace("F", "") == "" else preps[sci_name_raw].replace("F", "") if obj["samples"].has_key(scilife_name): if obj["samples"][scilife_name].has_key("library_prep"): if obj["samples"][scilife_name]["library_prep"].has_key(prep): obj["samples"][scilife_name]["library_prep"][prep]["sample_run_metrics"][info[key][0]] = key else: obj["samples"][scilife_name]["library_prep"][prep] = {"sample_run_metrics": {info[key][0]: key}} else: obj["samples"][scilife_name]["library_prep"] = {prep: {"sample_run_metrics": {info[key][0]: key}}} ### 20135 if WS_projects.has_key(project_name): logger.debug("project run on Work Set") info = WS_projects[project_name] info = get_20135_info(client, project_name_swe, info) scilife_names, preps = strip_scilife_name(info.keys()) for key in scilife_names: striped_scilife_name = scilife_names[key] for prep in info[key]: try: Av_sice = int(float(info[key][prep][0])) prep_status = info[key][prep][1] if obj["samples"][striped_scilife_name].has_key("library_prep"): if obj["samples"][striped_scilife_name]["library_prep"].has_key(prep): obj["samples"][striped_scilife_name]["library_prep"][prep]["average_size_bp"] = Av_sice obj["samples"][striped_scilife_name]["library_prep"][prep]["prep_status"] = prep_status else: obj["samples"][striped_scilife_name]["library_prep"][prep] = { "average_size_bp": Av_sice, "prep_status": prep_status, } else: obj["samples"][striped_scilife_name]["library_prep"] = { prep: {"average_size_bp": Av_sice, "prep_status": prep_status} } except: pass return obj
def get_proj_inf(WS_projects,project_name_swe, samp_db, proj_db, client, config): project_name = _replace_ascii(_to_unicode(project_name_swe)) key = find_proj_from_view(proj_db, project_name) if not key: key = uuid4().hex logger.info('Handling proj %s %s' % (project_name, key)) obj={'application': '', 'customer_reference': '', 'min_m_reads_per_sample_ordered': '', 'no_of_samples': '', 'entity_type': 'project_summary', 'uppnex_id': '', 'samples': {}, 'project_name': project_name, 'project_id':'', '_id': key} ### Genomics Project list p = pmeta.ProjectMetaData(project_name, config) if p.project_name is None: p = pmeta.ProjectMetaData(project_name_swe, config) if p.project_name is None: logger.warning('Google Document Genomics Project list: %s not found' % project_name) else: if p.min_reads_per_sample.strip() != '': obj['min_m_reads_per_sample_ordered'] = float(p.min_reads_per_sample) if p.no_samples.strip() != '': obj['no_of_samples'] = int(p.no_samples) obj['uppnex_id'] = p.uppnex_id obj['application'] = p.application obj['customer_reference'] = p.customer_reference obj['project_id']='P' + p.project_id info = get_20132_info(client,project_name_swe) ### 20132 try: scilife_names,preps = strip_scilife_name(info.keys()) for key in scilife_names: scilife_name = scilife_names[key] prep = preps[key] cust_name = info[key] incoming_QC_status = 'F' if 'F' in prep else 'P' try: obj['samples'][scilife_name] = {'customer_name': cust_name, 'scilife_name': scilife_name, 'incoming_QC_status': incoming_QC_status} except: pass except: pass ### 20158 info = get_20158_info(client, project_name_swe) try: scilife_names, preps = strip_scilife_name(info.keys()) duplicates = find_duplicates(scilife_names.values()) for key in scilife_names: striped_scilife_name = scilife_names[key] status = 'inconsistent' if striped_scilife_name in duplicates else info[key][0] m_reads = 'inconsistent' if striped_scilife_name in duplicates else info[key][1] prep = preps[key] incoming_QC_status = 'F' if 'F' in prep else 'P' if obj['samples'].has_key(striped_scilife_name): obj['samples'][striped_scilife_name]['status'] = status obj['samples'][striped_scilife_name]['m_reads_sequenced'] = m_reads else: obj['samples'][striped_scilife_name] = {'scilife_name': striped_scilife_name, 'status': status, 'm_reads_sequenced': m_reads, 'incoming_QC_status': incoming_QC_status} except: pass ### Get _id for sample_run_metrics info = find_samp_from_view(samp_db, project_name) if len(info.keys()) > 0: logger.debug('sample_run_metrics found on couchdb for project %s' % project_name) else: logger.warning('No sample_run_metrics found for project %s' % project_name) for key in info: sci_name_raw = info[key][1] scilife_name, preps = strip_scilife_name([sci_name_raw]) scilife_name = scilife_name[sci_name_raw] prep = 'A' if preps[sci_name_raw].replace('F','') == '' else preps[sci_name_raw].replace('F','') if obj['samples'].has_key(scilife_name): if obj['samples'][scilife_name].has_key("library_prep"): if obj['samples'][scilife_name]["library_prep"].has_key(prep): obj['samples'][scilife_name]["library_prep"][prep]["sample_run_metrics"][info[key][0]]=key else: obj['samples'][scilife_name]["library_prep"][prep]={"sample_run_metrics":{info[key][0]:key}} else: obj['samples'][scilife_name]["library_prep"]={prep:{"sample_run_metrics":{info[key][0]:key}}} ### 20135 if WS_projects.has_key(project_name): logger.debug('project run on Work Set') info = WS_projects[project_name] info = get_20135_info(client,project_name_swe, info) scilife_names, preps = strip_scilife_name(info.keys()) for key in scilife_names: striped_scilife_name = scilife_names[key] for prep in info[key]: try: Av_sice = int(float(info[key][prep][0])) prep_status = info[key][prep][1] if obj['samples'][striped_scilife_name].has_key("library_prep"): if obj['samples'][striped_scilife_name]["library_prep"].has_key(prep): obj['samples'][striped_scilife_name]["library_prep"][prep]["average_size_bp"]=Av_sice obj['samples'][striped_scilife_name]["library_prep"][prep]["prep_status"]=prep_status else: obj['samples'][striped_scilife_name]["library_prep"][prep]={"average_size_bp":Av_sice,"prep_status":prep_status} else: obj['samples'][striped_scilife_name]["library_prep"]={prep:{"average_size_bp":Av_sice,"prep_status":prep_status}} except: pass return obj