Beispiel #1
0
def get_column(client, ssheet, wsheet, column, constraint={}):
    """Get the content of a specified column, optionally filtering on other columns"""
    
    # If the column specified is a name, find the corresponding index
    try:
        column = int(column)
    except ValueError:
        column = get_column_index(client,ssheet,wsheet,column)
    
    # Create a filter mask based on the supplied constraints
    filter = [True]*row_count(wsheet)
    for con_name, con_value in constraint.items():
        con_column = get_column(client,ssheet,wsheet,con_name)
        for i,value in enumerate(con_column):
            filter[i] &= (_to_unicode(value) == _to_unicode(con_value))
    
    # Get the content of the specified column index
    content_2d = get_cell_content(client, ssheet, wsheet, 0, column, 0, column)
    
    # Loop over the content and keep only the rows that have passed the constraint filters
    content = []
    for i,row in enumerate(content_2d):
        if filter[i]:
            content.append(row[0])
            
    return content
Beispiel #2
0
def get_column_index(client, ssheet, wsheet, name):
    """Get the index of the column with the specified name, or 0 if no column matches"""

    header = get_header(client, ssheet, wsheet)
    for i, column_name in enumerate(header):
        if _to_unicode(name) == _to_unicode(column_name):
            return int(i + 1)
    return 0
Beispiel #3
0
def get_column_index(client, ssheet, wsheet, name):
    """Get the index of the column with the specified name, or 0 if no column matches"""

    header = get_header(client, ssheet, wsheet)
    for i, column_name in enumerate(header):
        if _to_unicode(name) == _to_unicode(column_name):
            return int(i + 1)
    return 0
Beispiel #4
0
def get_folder(client, folder_name):
    """Get a folder if it exists"""
    q = gdata.docs.service.DocumentQuery(categories=["folder"], params={"showfolders": "true"})
    for entry in client.Query(q.ToUri()).entry or []:
        if _to_unicode(entry.title.text) == _to_unicode(folder_name):
            return entry

    return None
Beispiel #5
0
def get_folder(client, folder_name):
    """Get a folder if it exists"""
    q = gdata.docs.service.DocumentQuery(categories=['folder'],
                                         params={'showfolders': 'true'})
    for entry in (client.Query(q.ToUri()).entry or []):
        if _to_unicode(entry.title.text) == _to_unicode(folder_name):
            return entry

    return None
Beispiel #6
0
def get_cell_content(client, ssheet, wsheet, \
    row_start=0, col_start=0, row_end=0, col_end=0):
    """Get the text contents of the cells from the supplied spreadsheet and
    worksheet and from the specified cell range as a two-dimensional list.
    """

    if str(row_start) == '0':
        row_start = '1'
    if str(col_start) == '0':
        col_start = '1'
    if str(row_end) == '0':
        row_end = str(row_count(wsheet))
    if str(col_end) == '0':
        col_end = str(column_count(wsheet))

    feed = (get_cell_feed(client, ssheet, wsheet, row_start, \
        col_start, row_end, col_end) or [])

    # Get the dimensions of the 2D-list
    cols = int(col_end) - int(col_start) + 1
    content = []
    for i, cell in enumerate(feed.entry):
        r = i // cols
        c = i - r * cols
        if c == 0:
            row = []
            content.append(row)
        row.append(_to_unicode((cell.content.text or "")))

    return content
Beispiel #7
0
def write_rows(client, ssheet, wsheet, header, rows):
    """Write the supplied data rows to the worksheet,
    using the supplied column headers.
    """
    # Get the keys
    ss_key = get_key(ssheet)
    ws_key = get_key(wsheet)

    try:
        # As a workaround for the InsertRow bugs with column names,
        # just use single lowercase letters as column headers to start with
        for i in range(0, len(header)):
            client.UpdateCell(1, i + 1, chr(97 + i), ss_key, ws_key)

        # Iterate over the rows and add the data to the worksheet
        for row in rows:
            row_data = {}

            for i, value in enumerate(row):
                row_data[chr(97 + i)] = unicode(value)
            client.InsertRow(row_data, ss_key, ws_key)

        # Lastly, substitute the one-letter header for the real string
        for i in range(0, len(header)):
            client.UpdateCell(1, i + 1, _to_unicode(header[i]), ss_key, ws_key)
    except:
        return False

    return True
Beispiel #8
0
def write_rows(client, ssheet, wsheet, header, rows):
    """Write the supplied data rows to the worksheet,
    using the supplied column headers.
    """
    # Get the keys
    ss_key = get_key(ssheet)
    ws_key = get_key(wsheet)

    try:
        # As a workaround for the InsertRow bugs with column names,
        # just use single lowercase letters as column headers to start with
        for i in range(0, len(header)):
            client.UpdateCell(1, i + 1, chr(97 + i), ss_key, ws_key)

        # Iterate over the rows and add the data to the worksheet
        for row in rows:
            row_data = {}

            for i, value in enumerate(row):
                row_data[chr(97 + i)] = unicode(value)
            client.InsertRow(row_data, ss_key, ws_key)

        # Lastly, substitute the one-letter header for the real string
        for i in range(0, len(header)):
            client.UpdateCell(1, i + 1, _to_unicode(header[i]), ss_key, ws_key)
    except:
        return False

    return True
Beispiel #9
0
def get_cell_content(client, ssheet, wsheet, \
    row_start=0, col_start=0, row_end=0, col_end=0):
    """Get the text contents of the cells from the supplied spreadsheet and
    worksheet and from the specified cell range as a two-dimensional list.
    """

    if str(row_start) == '0':
        row_start = '1'
    if str(col_start) == '0':
        col_start = '1'
    if str(row_end) == '0':
        row_end = str(row_count(wsheet))
    if str(col_end) == '0':
        col_end = str(column_count(wsheet))

    feed = (get_cell_feed(client, ssheet, wsheet, row_start, \
        col_start, row_end, col_end) or [])

    # Get the dimensions of the 2D-list
    cols = int(col_end) - int(col_start) + 1
    content = []
    for i, cell in enumerate(feed.entry):
        r = i // cols
        c = i - r * cols
        if c == 0:
            row = []
            content.append(row)
        row.append(_to_unicode((cell.content.text or "")))

    return content
Beispiel #10
0
def get_rows_with_constraint(client, ssheet, wsheet, constraint={}):
    """Get the content of the rows filtered by some column values"""
    
    # Create a filter mask based on the supplied constraints
    filter = [True]*row_count(wsheet)
    for con_name, con_value in constraint.items():
        con_column = get_column(client,ssheet,wsheet,con_name)
        for i,value in enumerate(con_column):
            filter[i] &= (_to_unicode(value) == _to_unicode(con_value))
    
    # Get the content of the entire worksheet
    content_2d = get_cell_content(client, ssheet, wsheet)
    
    # Loop over the content and keep only the rows that have passed the constraint filters
    content = []
    for i,row in enumerate(content_2d):
        if filter[i]:
            content.append(row)
            
    return content
Beispiel #11
0
def get_rows_with_constraint(client, ssheet, wsheet, constraint={}):
    """Get the content of the rows filtered by some column values"""

    # Create a filter mask based on the supplied constraints
    filter_mask = [True] * row_count(wsheet)
    for con_name, con_value in constraint.items():
        con_column = get_column(client, ssheet, wsheet, con_name)
        for i, value in enumerate(con_column):
            filter_mask[i] &= (
                _to_unicode(value).strip() == _to_unicode(con_value).strip())

    # Get the content of the entire worksheet
    content_2d = get_cell_content(client, ssheet, wsheet)

    # Loop over the content and keep only the rows that have passed the
    # constraint filters.
    content = []
    for i, row in enumerate(content_2d):
        if filter_mask[i]:
            content.append(row)

    return content
Beispiel #12
0
def _write_to_worksheet(client, ssheet, wsheet_title, rows, header, append, keys=[]):
    """Generic method to write a set of rows to a worksheet on google docs.
    """
    # Convert the worksheet title to unicode
    wsheet_title = _to_unicode(wsheet_title)

    # Add a new worksheet, possibly appending or replacing a pre-existing
    # worksheet according to the append-flag.
    wsheet = g_spreadsheet.add_worksheet(client, \
                                         ssheet, \
                                         wsheet_title, \
                                         len(rows) + 1, \
                                         len(header), \
                                         append)
    if wsheet is None:
        logger2.error("ERROR: Could not add a worksheet {!r} to " \
            "spreadsheet {!r}".format(wsheet_title, ssheet.title.text))
        return False
    
    # If keys are specified (will correspond to indexes in the header), delete pre-existing rows with matching keys
    if append and len(keys) > 0:
        wsheet_data = g_spreadsheet.get_cell_content(client, ssheet, wsheet, '2')
        wsheet_header = g_spreadsheet.get_header(client, ssheet, wsheet)
        try:
            wsheet_indexes = [wsheet_header.index(key) for key in keys]
            header_indexes = [header.index(key) for key in keys]
        except ValueError:
            logger2.warn("WARNING: Could not identify correct header for duplicate detection")
        else:
            for row in rows:
                try:
                    key = "#".join([row[i] for i in header_indexes])        
                    for i, wrow in enumerate(wsheet_data):
                        wkey = "#".join([wrow[j] for j in wsheet_indexes])
                        if wkey == key:
                            g_spreadsheet.delete_row(client, ssheet, wsheet, i+1)
                            wsheet_data.pop(i)
                            break
                except:
                    logger2.warn("WARNING: Could not identify/replace duplicate rows")

    # Write the data to the worksheet
    success = g_spreadsheet.write_rows(client, ssheet, wsheet, header, rows)
    if success:
        logger2.info("Wrote data to the {!r}:{!r} " \
                     "worksheet".format(ssheet.title.text, wsheet_title))
    else:
        logger2.error("ERROR: Could not write data to the {!r}:{!r} " \
                      "worksheet".format(ssheet.title.text, wsheet_title))
    return success
Beispiel #13
0
def _write_to_worksheet(client, ssheet, wsheet_title, rows, header, append):
    """Generic method to write a set of rows to a worksheet on google docs"""

    # Convert the worksheet title to unicode
    wsheet_title = _to_unicode(wsheet_title)

    # Add a new worksheet, possibly appending or replacing a pre-existing worksheet according to the append-flag
    wsheet = bcbio.google.spreadsheet.add_worksheet(client, ssheet, wsheet_title, len(rows) + 1, len(header), append)
    if wsheet is None:
        log.info("Could not add a worksheet '%s' to spreadsheet '%s'" % (wsheet_title, ssheet.title.text))
        return False

    # Write the data to the worksheet
    log.info("Adding data to the '%s' worksheet" % (wsheet_title))
    return bcbio.google.spreadsheet.write_rows(client, ssheet, wsheet, [col_header[0] for col_header in header], rows)
Beispiel #14
0
def add_worksheet(client, ssheet, title, rows=0, cols=0, append=False):
    """Add a new worksheet with the specified title to the specified spreadsheet.
    Will overwrite an existing worksheet with the same title unless append is True
    """
    # Check if a worksheet with the same title exists
    ws = get_worksheet(client, ssheet, title)
    if ws:
        # If we're appending, just return the first object in the feed
        if append:
            return ws

        # Otherwise, drop the existing worksheet
        client.DeleteWorksheet(ws)

    # Add the desired worksheet
    return client.AddWorksheet(_to_unicode(title), rows, cols, get_key(ssheet))
Beispiel #15
0
def add_worksheet(client, ssheet, title, rows=0, cols=0, append=False):
    """Add a new worksheet with the specified title to the specified spreadsheet.
    Will overwrite an existing worksheet with the same title unless append is True
    """
    # Check if a worksheet with the same title exists
    ws = get_worksheet(client, ssheet, title)
    if ws:
        # If we're appending, just return the first object in the feed
        if append:
            return ws

        # Otherwise, drop the existing worksheet
        client.DeleteWorksheet(ws)

    # Add the desired worksheet
    return client.AddWorksheet(_to_unicode(title), rows, cols, get_key(ssheet))
Beispiel #16
0
def get_spreadsheet(ssheet_title, encoded_credentials):
    """Connect to Google docs and get a spreadsheet"""

    # Convert the spreadsheet title to unicode
    ssheet_title = _to_unicode(ssheet_title)

    # Create a client class which will make HTTP requests with Google Docs server.
    client = g_spreadsheet.get_client()
    bcbio.google.connection.authenticate(client, encoded_credentials)

    # Locate the spreadsheet
    ssheet = g_spreadsheet.get_spreadsheet(client, ssheet_title)

    # Check that we got a result back
    if not ssheet:
        logger2.warn("No document with specified title '%s' found in \
                      GoogleDocs repository" % ssheet_title)
        return (None, None)

    return (client, ssheet)
Beispiel #17
0
 def set_barcode_full_name(self, barcode_full_name):
     self.barcode_full_name = _to_unicode(barcode_full_name)
Beispiel #18
0
 def set_barcode_id(self, barcode_id):
     self.barcode_id = _to_unicode(barcode_id)
Beispiel #19
0
 def set_description(self, description):
     self.description = _to_unicode(description)
Beispiel #20
0
 def set_project(self, project):
     self.project = get_project_name(_to_unicode(project))
Beispiel #21
0
 def set_barcode_sequence(self, barcode_sequence):
     self.barcode_sequence = _to_unicode(barcode_sequence)
def get_proj_inf(project_name_swe,samp_db,proj_db,credentials_file,config_file):
	logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',filename='proj_coucdb.log',level=logging.INFO)
	project_name	 = _replace_ascii(_to_unicode(project_name_swe))
	key		= find_proj_from_view(proj_db,project_name)
	if not key:
		key = uuid4().hex

        logging.info(str('Handling proj '+project_name+' '+ key))
        print key

        obj={   'application':'',
		'customer_reference':'',
		'min_m_reads_per_sample_ordered':'',
		'no_of_samples':'',
                'entity_type': 'project_summary',
                'uppnex_id': '',                 
		'samples': {},
                'project_id': project_name, 
                '_id': key}


	### Get minimal #M reads and uppnexid from Genomics Project list
	print '\nGetting minimal #M reads and uppnexid from Genomics Project list for project ' + project_name_swe

	config = cl.load_config(config_file)
	p = pm.ProjectMetaData(project_name,config)
	if p.project_name == None:
		p = pm.ProjectMetaData(project_name_swe,config)
	if p.project_name == None:
		print project_name+' not found in genomics project list'
		logging.warning(str('Google Document Genomics Project list: '+project_name+' not found')) 
	else:
		if p.min_reads_per_sample.strip() !='':
                	obj['min_m_reads_per_sample_ordered'] = float(p.min_reads_per_sample)
		if p.no_samples.strip() !='':
			obj['no_of_samples'] = int(p.no_samples)
                obj['uppnex_id']                      = p.uppnex_id
		obj['application']		      = p.application
		obj['customer_reference']             = p.customer_reference


	### Get costumer and Scilife Sample name from _20132_0X_Table for Sample Summary and Reception Control
	print '\nTrying to find Scilife Sample names from '+project_name_swe+'_20132_0X_Table for Sample Summary and Reception Control'

       	versions = { 	"01":["Data",'Sample name Scilife (Index included)'],
			"02":["Sheet1",'Sample name Scilife'],
			"04":["Reception control",'Complete sample name'],
			"05":["Reception control",'SciLifeLab ID']}

	# Load google document
	client	= make_client(credentials_file)
	feed 	= bcbio.google.spreadsheet.get_spreadsheets_feed(client,project_name_swe+'_20132', False) #FIXA: Hantera mistakes
	if len(feed.entry) == 0:
    		ssheet=None
		logging.warning("Google Document %s: Could not find spreadsheet" % str(project_name_swe+'_20132_XXX'))
		print "Could not find spreadsheet" 
	else:
    		ssheet	= feed.entry[0].title.text
  		version	= ssheet.split('_20132_')[1].split(' ')[0].split('_')[0]
		wsheet 	= versions[version][0]
		header 	= versions[version][1]
		content, ws_key, ss_key = get_google_document(ssheet, wsheet, credentials_file)
	
	# Get Scilife Sample names
	try:    
	   	dummy, customer_names_colindex 	= get_column(content,'Sample name from customer')
		row_ind, scilife_names_colindex = get_column(content, header)
		info={}
                for j,row in enumerate(content):
			if (j > row_ind):
				try:
					cust_name = str(row[customer_names_colindex]).strip()
					sci_name  = str(row[scilife_names_colindex]).strip().replace('-','_')
					if cust_name != '':
						info[sci_name] = cust_name
				except:
					pass
		print 'Names found'
		for scilife_name in info:
			try:
				obj['samples'][scilife_name] = {'customer_name': info[scilife_name], 'scilife_name':scilife_name}
			except:
				pass
        except:
		print 'Names not found'
                pass

	### Get Sample Status from _20158_01_Table for QA HiSeq2000 sequencing results for samples
	print '\nGetting Sample Status from '+project_name_swe+'_20158_0X_Table for QA HiSeq2000 sequencing results for samples'

        versions = {    "01":['Sample name Scilife',"Total reads per sample","Passed=P/ not passed=NP*",'Sample name from customer'],
                        "02":["Sample name (SciLifeLab)","Total number of reads (Millions)","Based on total number of reads",'Sample name (customer)'],
                        "03":["Sample name (SciLifeLab)","Total number of reads (Millions)","Based on total number of reads",'Sample name (customer)']}

        # Load google document
	mistakes = ["_"," _"," ",""]
	found='FALSE'
	for m in mistakes:
		feed    = bcbio.google.spreadsheet.get_spreadsheets_feed(client,project_name_swe + m + '20158', False)
        	if len(feed.entry) != 0:
			try:
				ssheet  = feed.entry[0].title.text
				version = ssheet.split(str(m+'20158_'))[1].split(' ')[0].split('_')[0]	
				content, ws_key, ss_key = get_google_document(ssheet,"Sheet1",credentials_file)
				found='TRUE'
				break
                	except:
				pass
	if found=='TRUE':
		print 'Google document found!'
	else:
		print 'Google document NOT found!'
		logging.warning("Google Document %s: Could not find spreadsheet" % str(project_name_swe+'_20158_XXX'))

	# Get status etc from loaded document
	try:
		dummy, P_NP_colindex 			= get_column(content,versions[version][2])
		dummy, No_reads_sequenced_colindex 	= get_column(content,versions[version][1])
		dummy, customer_names_colindex          = get_column(content,versions[version][3])
        	row_ind, scilife_names_colindex 	= get_column(content,versions[version][0])
		info={}
                for j,row in enumerate(content):
			if ( j > row_ind ):
				try:
                                        sci_name=str(row[scilife_names_colindex]).strip()
					cust_name=str(row[customer_names_colindex]).strip()
                                        no_reads=str(row[No_reads_sequenced_colindex]).strip()
                                        if sci_name[-1]=='F':
                                                status='P'
                                        else:
                                                status	=str(row[P_NP_colindex]).strip()
                                        info[sci_name]	=[status,no_reads,cust_name]
				except:
					pass
		scilife_names 	= strip_scilife_name(info.keys())
		duplicates	= find_duplicates(scilife_names.values())
		for key in scilife_names:
			striped_scilife_name 	= scilife_names[key]
			status			= info[key][0]
			m_reads			= info[key][1]
			cust_name		= info[key][2]
			if striped_scilife_name in duplicates:
                                        status 	= 'inconsistent'
					m_reads	= 'inconsistent'
			try:
                		if obj['samples'].has_key(striped_scilife_name):
                        		obj['samples'][striped_scilife_name]['status']            = status
                        	        obj['samples'][striped_scilife_name]['m_reads_sequenced'] = m_reads
				else:
					obj['samples'][striped_scilife_name]=	{'customer_name': cust_name, 
										'scilife_name':striped_scilife_name,
										'status':status,
										'm_reads_sequenced':m_reads}
			except:
				pass
        except:
		print 'Status and M reads sequenced not found in '+project_name_swe+'_20158_0X_Table for QA HiSeq2000 sequencing results for samples'
                pass


	### Get _id for sample_run_metrics and bcbb names -- use couchdb views instead.... To be fixed...
	print '\nGetting _id for sample_run_metrics'

	info	= find_samp_from_view(samp_db,project_name)

	if len(info.keys())>0:
		print 'sample_run_metrics found on couchdb for project '+ project_name
	else:
		print 'no sample_run_metrics found on couchdb for project '+ project_name
		logging.warning(str('CouchDB: No sample_run_metrics found for project '+ project_name))
        for key in info:
        	scilife_name = strip_scilife_name([info[key][1]])[info[key][1]]
                if obj['samples'].has_key(scilife_name):
        		if obj['samples'][scilife_name].has_key("sample_run_metrics"):
                		obj['samples'][scilife_name]["sample_run_metrics"][info[key][0]]=key
                        else:
                              	obj['samples'][scilife_name]["sample_run_metrics"] = {info[key][0]:key}
	return obj
Beispiel #23
0
 def set_description(self, description):
     self.description = _to_unicode(description)
Beispiel #24
0
 def set_name(self, name):
     self.name = _to_unicode(str(name))
def get_proj_inf(project_name_swe,qc,credentials_file,config_file):
	logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',filename='ProjectSummary.log',level=logging.INFO)#,datefmt='%Y-%m-%d'
	project_name	 = _replace_ascii(_to_unicode(project_name_swe))
	key  		 = hashlib.md5(project_name).hexdigest()
	print key
        obj={   'Application':'',
		'Customer_reference':'',
		'Min_M_reads_per_sample_ordered':'',
		'No_of_samples':'',
                'Entity_type': 'ProjectSummary',
                'Uppnex_id': '',                 
		'Samples': {},
                'Project_id': project_name, 
                'Entity_version': 0.1,
                '_id': key}

	logging.info(str('Handling proj '+project_name+' '+ key))

	### Get minimal #M reads and uppnexid from Genomics Project list
	print '\nGetting minimal #M reads and uppnexid from Genomics Project list for project ' + project_name_swe
	config = cl.load_config(config_file)
	p = pm.ProjectMetaData(project_name,config)
	if p.project_name==None:
		p = pm.ProjectMetaData(project_name_swe,config)
	if p.project_name==None:
		print project_name+' not found in genomics project list'
		logging.warning(str('Google Document Genomics Project list: '+project_name+' not found'))
	else:
                obj['Min_M_reads_per_sample_ordered'] = float(p.min_reads_per_sample) 
                obj['Uppnex_id']                      = p.uppnex_id
                obj['No_of_samples']                  = int(p.no_samples)
		obj['Application']		      = p.application
		obj['Customer_reference']	      = p.customer_reference


	### Get costumer and Scilife Sample name from _20132_0X_Table for Sample Summary and Reception Control
	print '\nTrying to find Scilife Sample names from '+project_name_swe+'_20132_0X_Table for Sample Summary and Reception Control'

       	versions = { 	"01":["Data",'Sample name Scilife (Index included)'],
			"02":["Sheet1",'Sample name Scilife'],
			"04":["Reception control",'Complete sample name'],
			"05":["Reception control",'SciLifeLab ID']}

	# Load google document
	client	= make_client(credentials_file)
	feed 	= bcbio.google.spreadsheet.get_spreadsheets_feed(client,project_name_swe+'_20132', False) #FIXA: Hantera mistakes
	if len(feed.entry) == 0:
    		ssheet=None
		logging.warning("Google Document %s: Could not find spreadsheet" % str(project_name_swe+'_20132_XXX'))
		print "Could not find spreadsheet" 
	else:
    		ssheet	= feed.entry[0].title.text
  		version	= ssheet.split('_20132_')[1].split(' ')[0].split('_')[0]
		wsheet 	= versions[version][0]
		header 	= versions[version][1]
		content, ws_key, ss_key = get_google_document(ssheet, wsheet, credentials_file)
	
	# Get Scilife Sample names
	try:    
	   	dummy, customer_names_colindex 	= get_column(content,'Sample name from customer')
		row_ind, scilife_names_colindex = get_column(content, header)
		info={}
                for j,row in enumerate(content):
			if (j > row_ind):
				try:
                                        cust_name = str(row[customer_names_colindex]).strip()
                                        sci_name  = str(row[scilife_names_colindex]).strip().replace('-','_')
                                        if cust_name != '':
                                                info[sci_name] = cust_name
				except:
					pass
		print 'Names found'
		for scilife_name in info:
			try:
				obj['Samples'][scilife_name] = {'customer_name': info[scilife_name], 'scilife_name':scilife_name}
			except:
				pass
        except:
		print 'Names not found'
                pass

	### Get Sample Status from _20158_01_Table for QA HiSeq2000 sequencing results for samples
	print '\nGetting Sample Status from '+project_name_swe+'_20158_0X_Table for QA HiSeq2000 sequencing results for samples'

        versions = {    "01":['Sample name Scilife',"Total reads per sample","Passed=P/ not passed=NP*"],
                        "02":["Sample name (SciLifeLab)","Total number of reads (Millions)","Based on total number of reads"],
                        "03":["Sample name (SciLifeLab)","Total number of reads (Millions)","Based on total number of reads"]}
        # Load google document
	mistakes = ["_"," _"," ",""]
	found='FALSE'
	for m in mistakes:
		feed    = bcbio.google.spreadsheet.get_spreadsheets_feed(client,project_name_swe + m + '20158', False)
        	if len(feed.entry) == 0:
                	ssheet=None
                	print "Could not find spreadsheet"
		else:
			ssheet  = feed.entry[0].title.text
			version = ssheet.split(str(m+'20158_'))[1].split(' ')[0].split('_')[0]	
	        	wsheet = "Sheet1"
			try:
				content, ws_key, ss_key = get_google_document(ssheet,wsheet,credentials_file)
				found='TRUE'
				break
                	except:
				pass
	if found=='TRUE':
		print 'Google document found!'
	else:
		print 'Google document NOT found!'
		logging.warning("Google Document %s: Could not find spreadsheet" % str(project_name_swe+'_20158_XXX'))
	# Get status etc from loaded document
	try:
		dummy, P_NP_colindex 			= get_column(content,versions[version][2])
		dummy, No_reads_sequenced_colindex 	= get_column(content,versions[version][1])
        	row_ind, scilife_names_colindex 	= get_column(content,versions[version][0])
		info={}
                for j,row in enumerate(content):
			if ( j > row_ind ):
				try:
					sci_name=str(row[scilife_names_colindex]).strip()
					no_reads=str(row[No_reads_sequenced_colindex]).strip()
					if sci_name[-1]=='F':
						status='P' 
					else:
						status=str(row[P_NP_colindex]).strip()
					info[sci_name]=[status,no_reads]
				except:
					pass
		print info
		scilife_names 	= strip_scilife_name_prep(info.keys())
		duplicates	= find_duplicates(scilife_names.values())
		for key in scilife_names:
			striped_scilife_name = scilife_names[key]
			try:
				if striped_scilife_name in duplicates:
					obj['Samples'][striped_scilife_name] = {'status':'inconsistent','M_reads_sequenced':'inconsistent'}
					
                		elif obj['Samples'].has_key(striped_scilife_name):
                        		obj['Samples'][striped_scilife_name]['status']            = info[key][0]
                        	        obj['Samples'][striped_scilife_name]['M_reads_sequenced'] = info[key][1]
			except:
				pass
        except:
		print 'Status and M reads sequenced not found in '+project_name_swe+'_20158_0X_Table for QA HiSeq2000 sequencing results for samples'
                pass


	### Get _id for SampleQCMetrics and bcbb names  
	#	use couchdb views instead.... To be fixed...
	print '\nGetting _id for SampleQCMetrics'
	info={}

        for key in qc:
                SampQC = qc.get(key)
                if SampQC.has_key("entity_type"):
                        if (SampQC["entity_type"] == "SampleQCMetrics") & SampQC.has_key("sample_prj"):
                                if SampQC["sample_prj"] == project_name:
                                        info[SampQC["_id"]]=[str(SampQC["name"]).strip(),SampQC["barcode_name"]]

        for key in info:
                scilife_name=strip_scilife_name_prep([info[key][1]])[info[key][1]]
                if obj['Samples'].has_key(scilife_name):
                        if obj['Samples'][scilife_name].has_key("SampleQCMetrics"):
                                obj['Samples'][scilife_name]["SampleQCMetrics"].append(key)
                        else:
                                obj['Samples'][scilife_name]["SampleQCMetrics"] = [key]
                        if obj['Samples'][scilife_name].has_key("bcbb_names"):
                                obj['Samples'][scilife_name]["bcbb_names"].append(info[key][0])
                        else:
                                obj['Samples'][scilife_name]["bcbb_names"] = [info[key][0]]

	return obj
Beispiel #26
0
 def set_barcode_id(self, barcode_id):
     self.barcode_id = _to_unicode(barcode_id)
def get_proj_inf(project_name_swe, samp_db, proj_db, CREDENTIALS_FILE, config):

    project_name = _replace_ascii(_to_unicode(project_name_swe))
    key = find_proj_from_view(proj_db, project_name)
    if not key:
        key = uuid4().hex

    logger.info("Handling proj %s %s" % (project_name, key))

    obj = {
        "application": "",
        "customer_reference": "",
        "min_m_reads_per_sample_ordered": "",
        "no_of_samples": "",
        "entity_type": "project_summary",
        "uppnex_id": "",
        "samples": {},
        "project_name": project_name,
        "project_id": "",
        "_id": key,
    }

    ### Get minimal #M reads and uppnexid from Genomics Project list
    logger.debug("Getting minimal #M reads and uppnexid from Genomics Project list for project %s" % project_name_swe)

    p = pmeta.ProjectMetaData(project_name, config)
    if p.project_name is None:
        p = pmeta.ProjectMetaData(project_name_swe, config)
    if p.project_name is None:
        logger.warning("Google Document Genomics Project list: %s not found" % project_name)
    else:
        if p.min_reads_per_sample.strip() != "":
            obj["min_m_reads_per_sample_ordered"] = float(p.min_reads_per_sample)
        if p.no_samples.strip() != "":
            obj["no_of_samples"] = int(p.no_samples)
        obj["uppnex_id"] = p.uppnex_id
        obj["application"] = p.application
        obj["customer_reference"] = p.customer_reference
        obj["project_id"] = "P" + p.project_id

        ### 20132
    logger.debug("Trying to find Scilife Sample names from table 20132")

    versions = {
        "01": ["Data", "Sample name Scilife (Index included)"],
        "02": ["Sheet1", "Sample name Scilife"],
        "04": ["Reception control", "Complete sample name"],
        "05": ["Reception control", "SciLifeLab ID"],
    }

    # Load google document
    client = make_client(CREDENTIALS_FILE)
    feed = bcbio.google.spreadsheet.get_spreadsheets_feed(client, project_name_swe + "_20132", False)
    if len(feed.entry) == 0:
        ssheet = None
        logger.warning("Could not find spreadsheet 20132 for %s" % project_name_swe)
    else:
        ssheet = feed.entry[0].title.text
        version = ssheet.split("_20132_")[1].split(" ")[0].split("_")[0]
        wsheet = versions[version][0]
        header = versions[version][1]
        content, ws_key, ss_key = get_google_document(ssheet, wsheet, CREDENTIALS_FILE)
        logger.debug("Document found")
        logger.debug(ssheet)

    # Get Scilife Sample names
    try:
        dummy, customer_names_colindex = get_column(content, "Sample name from customer")
        row_ind, scilife_names_colindex = get_column(content, header)
        info = {}
        for j, row in enumerate(content):
            if j > row_ind:
                try:
                    cust_name = str(row[customer_names_colindex]).strip()
                    sci_name = str(row[scilife_names_colindex]).strip()
                    if cust_name != "":
                        info[sci_name] = cust_name
                except:
                    pass

        logger.debug("Names found")
        scilife_names, preps = strip_scilife_name(info.keys())
        for key in scilife_names:
            scilife_name = scilife_names[key]
            prep = preps[key]
            cust_name = info[key]
            incoming_QC_status = "F" if "F" in prep else "P"
            try:
                obj["samples"][scilife_name] = {
                    "customer_name": cust_name,
                    "scilife_name": scilife_name,
                    "incoming_QC_status": incoming_QC_status,
                }
            except:
                pass
    except:
        logger.debug("Names not found")
        pass

        ### 20158
    logger.debug("Getting Sample Status from table 20158")

    versions = {
        "01": [
            "Sample name Scilife",
            "Total reads per sample",
            "Passed=P/ not passed=NP*",
            "Sample name from customer",
        ],
        "02": [
            "Sample name (SciLifeLab)",
            "Total number of reads (Millions)",
            "Based on total number of reads after mapping and duplicate removal",
            "Sample name (customer)",
        ],
        "03": [
            "Sample name (SciLifeLab)",
            "Total number of reads (Millions)",
            "Based on total number of reads after mapping and duplicate removal",
            "Sample name (customer)",
        ],
    }

    # Load google document
    mistakes = ["_", " _", " ", ""]
    found = False
    for m in mistakes:
        feed = bcbio.google.spreadsheet.get_spreadsheets_feed(client, project_name_swe + m + "20158", False)
        if len(feed.entry) != 0:
            try:
                ssheet = feed.entry[0].title.text
                version = ssheet.split(str(m + "20158_"))[1].split(" ")[0].split("_")[0]
                content, ws_key, ss_key = get_google_document(ssheet, "Sheet1", CREDENTIALS_FILE)
                found = True
                break
            except:
                pass
    if found:
        logger.debug("Google document found")
        logger.debug(ssheet)
    else:
        logger.warning("Could not find spreadsheet 20158 for %s" % project_name_swe)

        # Get status etc from loaded document
    try:
        dummy, P_NP_colindex = get_column(content, versions[version][2])
        dummy, No_reads_sequenced_colindex = get_column(content, versions[version][1])
        dummy, customer_names_colindex = get_column(content, versions[version][3])
        row_ind, scilife_names_colindex = get_column(content, versions[version][0])
        info = {}
        for j, row in enumerate(content):
            if j > row_ind:
                try:
                    sci_name = str(row[scilife_names_colindex]).strip()
                    cust_name = str(row[customer_names_colindex]).strip()
                    no_reads = str(row[No_reads_sequenced_colindex]).strip()
                    status = str(row[P_NP_colindex]).strip()
                    info[sci_name] = [status, no_reads, cust_name]
                except:
                    pass
        scilife_names, preps = strip_scilife_name(info.keys())
        duplicates = find_duplicates(scilife_names.values())
        for key in scilife_names:
            striped_scilife_name = scilife_names[key]
            status = "inconsistent" if striped_scilife_name in duplicates else info[key][0]
            m_reads = "inconsistent" if striped_scilife_name in duplicates else info[key][1]
            cust_name = info[key][2]
            prep = preps[key]
            incoming_QC_status = "F" if "F" in prep else "P"
            if obj["samples"].has_key(striped_scilife_name):
                obj["samples"][striped_scilife_name]["status"] = status
                obj["samples"][striped_scilife_name]["m_reads_sequenced"] = m_reads
            else:
                obj["samples"][striped_scilife_name] = {
                    "customer_name": cust_name,
                    "scilife_name": striped_scilife_name,
                    "status": status,
                    "m_reads_sequenced": m_reads,
                    "incoming_QC_status": incoming_QC_status,
                }

    except:
        pass

    ### Get _id for sample_run_metrics
    logger.debug("Getting _id for sample_run_metrics")
    info = find_samp_from_view(samp_db, project_name)

    if len(info.keys()) > 0:
        logger.debug("sample_run_metrics found on couchdb for project %s" % project_name)
    else:
        logger.warning("No sample_run_metrics found for project %s" % project_name)
    for key in info:
        sci_name_raw = info[key][1]
        scilife_name, preps = strip_scilife_name([sci_name_raw])
        scilife_name = scilife_name[sci_name_raw]
        prep = "A" if preps[sci_name_raw].replace("F", "") == "" else preps[sci_name_raw].replace("F", "")

        if obj["samples"].has_key(scilife_name):
            if obj["samples"][scilife_name].has_key("library_prep"):
                if obj["samples"][scilife_name]["library_prep"].has_key(prep):
                    obj["samples"][scilife_name]["library_prep"][prep]["sample_run_metrics"][info[key][0]] = key
                else:
                    obj["samples"][scilife_name]["library_prep"][prep] = {"sample_run_metrics": {info[key][0]: key}}
            else:
                obj["samples"][scilife_name]["library_prep"] = {prep: {"sample_run_metrics": {info[key][0]: key}}}

                ### 20135
    logger.debug("Getting average read length from table 20135")

    versions = {
        "04": ["SciLifeLab ID", "Prep version (A, B etc)", "Average size (bp)"],
        "05": ["SciLifeLab ID", "Prep version (A, B etc)", "Average size (bp)"],
        "06": ["SciLifeLab ID", "Prep version (A, B etc)", "Average size (bp)"],
    }
    mistakes = ["_", "_ ", " _", " ", ""]
    found = False

    for m in mistakes:
        feed = bcbio.google.spreadsheet.get_spreadsheets_feed(client, project_name_swe + m + "20135", False)
        if len(feed.entry) != 0:
            ssheet = feed.entry[0].title.text
            version = ssheet.split("20135")[1].replace("_", " ").lstrip(" ").split(" ")[0]
            content, ws_key, ss_key = get_google_document(ssheet, "Library QC", CREDENTIALS_FILE)
            found = True

    if found:
        logger.debug("Google document found")
        logger.debug(ssheet)
    else:
        logger.debug("Google document not found")

    # Get average read length from loaded document
    try:
        dummy, Finished_library_col = get_column(content, "Finished library ")
        dummy, Av_sice_bp_colindex = get_column(content, versions[version][2], Finished_library_col)
        row_ind, scilife_names_colindex = get_column(content, versions[version][0])
        row_ind, prep_colindex = get_column(content, versions[version][1])
        info = {}
        for j, row in enumerate(content):
            if j > row_ind:
                try:
                    sci_name = str(row[scilife_names_colindex]).strip()
                    Av_sice = str(row[Av_sice_bp_colindex]).strip()
                    prep = str(row[prep_colindex]).strip()
                    info[sci_name] = [Av_sice, prep]
                except:
                    pass
        scilife_names, preps = strip_scilife_name(info.keys())
        for key in scilife_names:
            striped_scilife_name = scilife_names[key]
            Av_sice = info[key][0]
            if info[key][1].strip() != "":
                prep = info[key][1]  #    KOntrollera!!!!!
            elif preps[key].strip() != "":
                prep = preps[key]
                prep = "A" if preps[key].replace("F", "") == "" else preps[key].replace("F", "")
            try:
                if obj["samples"][striped_scilife_name].has_key("library_prep"):
                    obj["samples"][striped_scilife_name]["library_prep"][prep]["average_size_bp"] = Av_sice
                else:
                    obj["samples"][striped_scilife_name]["library_prep"] = {prep: {"average_size_bp": Av_sice}}
            except:
                pass
    except:
        pass
    return obj
Beispiel #28
0
 def set_barcode_type(self, barcode_type):
     self.barcode_type = _to_unicode(barcode_type)
Beispiel #29
0
 def set_barcode_sequence(self, barcode_sequence):
     self.barcode_sequence = _to_unicode(barcode_sequence)
Beispiel #30
0
 def set_barcode_type(self, barcode_type):
     self.barcode_type = _to_unicode(barcode_type)
Beispiel #31
0
 def set_genome_build(self, genome_build):
     self.genome_build = _to_unicode(genome_build)
Beispiel #32
0
 def set_project(self, project):
     self.project = get_project_name(_to_unicode(project))
Beispiel #33
0
 def set_analysis(self, analysis):
     self.analysis = _to_unicode(analysis)
Beispiel #34
0
def get_proj_inf(WS_projects, project_name_swe, samp_db, proj_db, client,
                 config):
    project_name = _replace_ascii(_to_unicode(project_name_swe))
    key = find_proj_from_view(proj_db, project_name)
    if not key: key = uuid4().hex

    logger.info('Handling proj %s %s' % (project_name, key))

    obj = {
        'application': '',
        'customer_reference': '',
        'min_m_reads_per_sample_ordered': '',
        'no_of_samples': '',
        'entity_type': 'project_summary',
        'uppnex_id': '',
        'samples': {},
        'project_name': project_name,
        'project_id': '',
        '_id': key
    }

    ### Genomics Project list
    p = pmeta.ProjectMetaData(project_name, config)
    if p.project_name is None:
        p = pmeta.ProjectMetaData(project_name_swe, config)
    if p.project_name is None:
        logger.warning('Google Document Genomics Project list: %s not found' %
                       project_name)
    else:
        if p.min_reads_per_sample.strip() != '':
            obj['min_m_reads_per_sample_ordered'] = float(
                p.min_reads_per_sample)
        if p.no_samples.strip() != '':
            obj['no_of_samples'] = int(p.no_samples)
        obj['uppnex_id'] = p.uppnex_id
        obj['application'] = p.application
        obj['customer_reference'] = p.customer_reference
        obj['project_id'] = 'P' + p.project_id

    info = get_20132_info(client, project_name_swe)
    ### 20132
    try:
        scilife_names, preps = strip_scilife_name(info.keys())
        for key in scilife_names:
            scilife_name = scilife_names[key]
            prep = preps[key]
            cust_name = info[key]
            incoming_QC_status = 'F' if 'F' in prep else 'P'
            try:
                obj['samples'][scilife_name] = {
                    'customer_name': cust_name,
                    'scilife_name': scilife_name,
                    'incoming_QC_status': incoming_QC_status
                }
            except:
                pass
    except:
        pass

    ### 20158
    info = get_20158_info(client, project_name_swe)
    try:
        scilife_names, preps = strip_scilife_name(info.keys())
        duplicates = find_duplicates(scilife_names.values())
        for key in scilife_names:
            striped_scilife_name = scilife_names[key]
            status = 'inconsistent' if striped_scilife_name in duplicates else info[
                key][0]
            m_reads = 'inconsistent' if striped_scilife_name in duplicates else info[
                key][1]
            prep = preps[key]
            incoming_QC_status = 'F' if 'F' in prep else 'P'
            if obj['samples'].has_key(striped_scilife_name):
                obj['samples'][striped_scilife_name]['status'] = status
                obj['samples'][striped_scilife_name][
                    'm_reads_sequenced'] = m_reads
            else:
                obj['samples'][striped_scilife_name] = {
                    'scilife_name': striped_scilife_name,
                    'status': status,
                    'm_reads_sequenced': m_reads,
                    'incoming_QC_status': incoming_QC_status
                }
    except:
        pass

### Get _id for sample_run_metrics
    info = find_samp_from_view(samp_db, project_name)
    if len(info.keys()) > 0:
        logger.debug('sample_run_metrics found on couchdb for project %s' %
                     project_name)
    else:
        logger.warning('No sample_run_metrics found for project %s' %
                       project_name)
    for key in info:
        sci_name_raw = info[key][1]
        scilife_name, preps = strip_scilife_name([sci_name_raw])
        scilife_name = scilife_name[sci_name_raw]
        prep = 'A' if preps[sci_name_raw].replace(
            'F', '') == '' else preps[sci_name_raw].replace('F', '')

        if obj['samples'].has_key(scilife_name):
            if obj['samples'][scilife_name].has_key("library_prep"):
                if obj['samples'][scilife_name]["library_prep"].has_key(prep):
                    obj['samples'][scilife_name]["library_prep"][prep][
                        "sample_run_metrics"][info[key][0]] = key
                else:
                    obj['samples'][scilife_name]["library_prep"][prep] = {
                        "sample_run_metrics": {
                            info[key][0]: key
                        }
                    }
            else:
                obj['samples'][scilife_name]["library_prep"] = {
                    prep: {
                        "sample_run_metrics": {
                            info[key][0]: key
                        }
                    }
                }

    ### 20135
    if WS_projects.has_key(project_name):
        logger.debug('project run on Work Set')
        info = WS_projects[project_name]
    info = get_20135_info(client, project_name_swe, info)
    scilife_names, preps = strip_scilife_name(info.keys())
    for key in scilife_names:
        striped_scilife_name = scilife_names[key]
        for prep in info[key]:
            try:
                Av_sice = int(float(info[key][prep][0]))
                prep_status = info[key][prep][1]
                if obj['samples'][striped_scilife_name].has_key(
                        "library_prep"):
                    if obj['samples'][striped_scilife_name][
                            "library_prep"].has_key(prep):
                        obj['samples'][striped_scilife_name]["library_prep"][
                            prep]["average_size_bp"] = Av_sice
                        obj['samples'][striped_scilife_name]["library_prep"][
                            prep]["prep_status"] = prep_status
                    else:
                        obj['samples'][striped_scilife_name]["library_prep"][
                            prep] = {
                                "average_size_bp": Av_sice,
                                "prep_status": prep_status
                            }
                else:
                    obj['samples'][striped_scilife_name]["library_prep"] = {
                        prep: {
                            "average_size_bp": Av_sice,
                            "prep_status": prep_status
                        }
                    }
            except:
                pass

    return obj
Beispiel #35
0
 def set_barcode_full_name(self, barcode_full_name):
     self.barcode_full_name = _to_unicode(barcode_full_name)
Beispiel #36
0
 def set_name(self, name):
     self.name = get_sample_name(_to_unicode(name))
Beispiel #37
0
 def set_full_name(self, name):
     self.full_name = _to_unicode(name)
Beispiel #38
0
def get_proj_inf(project_name_swe, samp_db, proj_db, credentials_file,
                 config_file):
    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
                        filename='proj_coucdb.log',
                        level=logging.INFO)
    project_name = _replace_ascii(_to_unicode(project_name_swe))
    key = find_proj_from_view(proj_db, project_name)
    if not key:
        key = uuid4().hex

    logging.info(str('Handling proj ' + project_name + ' ' + key))
    print key

    obj = {
        'application': '',
        'customer_reference': '',
        'min_m_reads_per_sample_ordered': '',
        'no_of_samples': '',
        'entity_type': 'project_summary',
        'uppnex_id': '',
        'samples': {},
        'project_id': project_name,
        '_id': key
    }

    ### Get minimal #M reads and uppnexid from Genomics Project list
    print '\nGetting minimal #M reads and uppnexid from Genomics Project list for project ' + project_name_swe

    config = cl.load_config(config_file)
    p = pm.ProjectMetaData(project_name, config)
    if p.project_name == None:
        p = pm.ProjectMetaData(project_name_swe, config)
    if p.project_name == None:
        print project_name + ' not found in genomics project list'
        logging.warning(
            str('Google Document Genomics Project list: ' + project_name +
                ' not found'))
    else:
        if p.min_reads_per_sample.strip() != '':
            obj['min_m_reads_per_sample_ordered'] = float(
                p.min_reads_per_sample)
        if p.no_samples.strip() != '':
            obj['no_of_samples'] = int(p.no_samples)
        obj['uppnex_id'] = p.uppnex_id
        obj['application'] = p.application
        obj['customer_reference'] = p.customer_reference

    ### Get costumer and Scilife Sample name from _20132_0X_Table for Sample Summary and Reception Control
    print '\nTrying to find Scilife Sample names from ' + project_name_swe + '_20132_0X_Table for Sample Summary and Reception Control'

    versions = {
        "01": ["Data", 'Sample name Scilife (Index included)'],
        "02": ["Sheet1", 'Sample name Scilife'],
        "04": ["Reception control", 'Complete sample name'],
        "05": ["Reception control", 'SciLifeLab ID']
    }

    # Load google document
    client = make_client(credentials_file)
    feed = bcbio.google.spreadsheet.get_spreadsheets_feed(
        client, project_name_swe + '_20132', False)  #FIXA: Hantera mistakes
    if len(feed.entry) == 0:
        ssheet = None
        logging.warning("Google Document %s: Could not find spreadsheet" %
                        str(project_name_swe + '_20132_XXX'))
        print "Could not find spreadsheet"
    else:
        ssheet = feed.entry[0].title.text
        version = ssheet.split('_20132_')[1].split(' ')[0].split('_')[0]
        wsheet = versions[version][0]
        header = versions[version][1]
        content, ws_key, ss_key = get_google_document(ssheet, wsheet,
                                                      credentials_file)

    # Get Scilife Sample names
    try:
        dummy, customer_names_colindex = get_column(
            content, 'Sample name from customer')
        row_ind, scilife_names_colindex = get_column(content, header)
        info = {}
        for j, row in enumerate(content):
            if (j > row_ind):
                try:
                    cust_name = str(row[customer_names_colindex]).strip()
                    sci_name = str(
                        row[scilife_names_colindex]).strip().replace('-', '_')
                    if cust_name != '':
                        info[sci_name] = cust_name
                except:
                    pass
        print 'Names found'
        for scilife_name in info:
            try:
                obj['samples'][scilife_name] = {
                    'customer_name': info[scilife_name],
                    'scilife_name': scilife_name
                }
            except:
                pass
    except:
        print 'Names not found'
        pass

    ### Get Sample Status from _20158_01_Table for QA HiSeq2000 sequencing results for samples
    print '\nGetting Sample Status from ' + project_name_swe + '_20158_0X_Table for QA HiSeq2000 sequencing results for samples'

    versions = {
        "01": [
            'Sample name Scilife', "Total reads per sample",
            "Passed=P/ not passed=NP*", 'Sample name from customer'
        ],
        "02": [
            "Sample name (SciLifeLab)", "Total number of reads (Millions)",
            "Based on total number of reads", 'Sample name (customer)'
        ],
        "03": [
            "Sample name (SciLifeLab)", "Total number of reads (Millions)",
            "Based on total number of reads", 'Sample name (customer)'
        ]
    }

    # Load google document
    mistakes = ["_", " _", " ", ""]
    found = 'FALSE'
    for m in mistakes:
        feed = bcbio.google.spreadsheet.get_spreadsheets_feed(
            client, project_name_swe + m + '20158', False)
        if len(feed.entry) != 0:
            try:
                ssheet = feed.entry[0].title.text
                version = ssheet.split(
                    str(m + '20158_'))[1].split(' ')[0].split('_')[0]
                content, ws_key, ss_key = get_google_document(
                    ssheet, "Sheet1", credentials_file)
                found = 'TRUE'
                break
            except:
                pass
    if found == 'TRUE':
        print 'Google document found!'
    else:
        print 'Google document NOT found!'
        logging.warning("Google Document %s: Could not find spreadsheet" %
                        str(project_name_swe + '_20158_XXX'))

    # Get status etc from loaded document
    try:
        dummy, P_NP_colindex = get_column(content, versions[version][2])
        dummy, No_reads_sequenced_colindex = get_column(
            content, versions[version][1])
        dummy, customer_names_colindex = get_column(content,
                                                    versions[version][3])
        row_ind, scilife_names_colindex = get_column(content,
                                                     versions[version][0])
        info = {}
        for j, row in enumerate(content):
            if (j > row_ind):
                try:
                    sci_name = str(row[scilife_names_colindex]).strip()
                    cust_name = str(row[customer_names_colindex]).strip()
                    no_reads = str(row[No_reads_sequenced_colindex]).strip()
                    if sci_name[-1] == 'F':
                        status = 'P'
                    else:
                        status = str(row[P_NP_colindex]).strip()
                    info[sci_name] = [status, no_reads, cust_name]
                except:
                    pass
        scilife_names = strip_scilife_name(info.keys())
        duplicates = find_duplicates(scilife_names.values())
        for key in scilife_names:
            striped_scilife_name = scilife_names[key]
            status = info[key][0]
            m_reads = info[key][1]
            cust_name = info[key][2]
            if striped_scilife_name in duplicates:
                status = 'inconsistent'
                m_reads = 'inconsistent'
            try:
                if obj['samples'].has_key(striped_scilife_name):
                    obj['samples'][striped_scilife_name]['status'] = status
                    obj['samples'][striped_scilife_name][
                        'm_reads_sequenced'] = m_reads
                else:
                    obj['samples'][striped_scilife_name] = {
                        'customer_name': cust_name,
                        'scilife_name': striped_scilife_name,
                        'status': status,
                        'm_reads_sequenced': m_reads
                    }
            except:
                pass
    except:
        print 'Status and M reads sequenced not found in ' + project_name_swe + '_20158_0X_Table for QA HiSeq2000 sequencing results for samples'
        pass

    ### Get _id for sample_run_metrics and bcbb names -- use couchdb views instead.... To be fixed...
    print '\nGetting _id for sample_run_metrics'

    info = find_samp_from_view(samp_db, project_name)

    if len(info.keys()) > 0:
        print 'sample_run_metrics found on couchdb for project ' + project_name
    else:
        print 'no sample_run_metrics found on couchdb for project ' + project_name
        logging.warning(
            str('CouchDB: No sample_run_metrics found for project ' +
                project_name))
    for key in info:
        scilife_name = strip_scilife_name([info[key][1]])[info[key][1]]
        if obj['samples'].has_key(scilife_name):
            if obj['samples'][scilife_name].has_key("sample_run_metrics"):
                obj['samples'][scilife_name]["sample_run_metrics"][info[key]
                                                                   [0]] = key
            else:
                obj['samples'][scilife_name]["sample_run_metrics"] = {
                    info[key][0]: key
                }
    return obj
Beispiel #39
0
 def set_name(self, name):
     self.name = get_sample_name(_to_unicode(name))
def get_proj_inf(WS_projects, project_name_swe, samp_db, proj_db, client, config):
    project_name = _replace_ascii(_to_unicode(project_name_swe))
    key = find_proj_from_view(proj_db, project_name)
    if not key:
        key = uuid4().hex

    logger.info("Handling proj %s %s" % (project_name, key))

    obj = {
        "application": "",
        "customer_reference": "",
        "min_m_reads_per_sample_ordered": "",
        "no_of_samples": "",
        "entity_type": "project_summary",
        "uppnex_id": "",
        "samples": {},
        "project_name": project_name,
        "project_id": "",
        "_id": key,
    }

    ### Genomics Project list
    p = pmeta.ProjectMetaData(project_name, config)
    if p.project_name is None:
        p = pmeta.ProjectMetaData(project_name_swe, config)
    if p.project_name is None:
        logger.warning("Google Document Genomics Project list: %s not found" % project_name)
    else:
        if p.min_reads_per_sample.strip() != "":
            obj["min_m_reads_per_sample_ordered"] = float(p.min_reads_per_sample)
        if p.no_samples.strip() != "":
            try:
                obj["no_of_samples"] = int(p.no_samples)
            except:
                obj["no_of_samples"] = p.no_samples
                pass
        obj["uppnex_id"] = p.uppnex_id
        obj["application"] = p.application
        obj["customer_reference"] = p.customer_reference
        obj["project_id"] = "P" + p.project_id

        ### 20132
    try:
        info = get_20132_info(client, project_name_swe)
        scilife_names, preps = strip_scilife_name(info.keys())
        for key in scilife_names:
            scilife_name = scilife_names[key]
            prep = preps[key]
            cust_name = info[key]
            incoming_QC_status = "F" if "F" in prep else "P"
            try:
                obj["samples"][scilife_name] = {
                    "customer_name": cust_name,
                    "scilife_name": scilife_name,
                    "incoming_QC_status": incoming_QC_status,
                }
            except:
                pass
    except:
        pass

    ### 20158
    try:
        info = get_20158_info(client, project_name_swe)
        scilife_names, preps = strip_scilife_name(info.keys())
        duplicates = find_duplicates(scilife_names.values())
        for key in scilife_names:
            striped_scilife_name = scilife_names[key]
            status = "inconsistent" if striped_scilife_name in duplicates else info[key][0]
            m_reads = "inconsistent" if striped_scilife_name in duplicates else info[key][1]
            prep = preps[key]
            incoming_QC_status = "F" if "F" in prep else "P"
            if obj["samples"].has_key(striped_scilife_name):
                obj["samples"][striped_scilife_name]["status"] = status
                obj["samples"][striped_scilife_name]["m_reads_sequenced"] = m_reads
            else:
                obj["samples"][striped_scilife_name] = {
                    "scilife_name": striped_scilife_name,
                    "status": status,
                    "m_reads_sequenced": m_reads,
                    "incoming_QC_status": incoming_QC_status,
                }
    except:
        pass
    ### Get _id for sample_run_metrics
    info = find_samp_from_view(samp_db, project_name)
    if len(info.keys()) > 0:
        logger.debug("sample_run_metrics found on couchdb for project %s" % project_name)
    else:
        logger.warning("No sample_run_metrics found for project %s" % project_name)
    for key in info:
        sci_name_raw = info[key][1]
        scilife_name, preps = strip_scilife_name([sci_name_raw])
        scilife_name = scilife_name[sci_name_raw]
        prep = "A" if preps[sci_name_raw].replace("F", "") == "" else preps[sci_name_raw].replace("F", "")

        if obj["samples"].has_key(scilife_name):
            if obj["samples"][scilife_name].has_key("library_prep"):
                if obj["samples"][scilife_name]["library_prep"].has_key(prep):
                    obj["samples"][scilife_name]["library_prep"][prep]["sample_run_metrics"][info[key][0]] = key
                else:
                    obj["samples"][scilife_name]["library_prep"][prep] = {"sample_run_metrics": {info[key][0]: key}}
            else:
                obj["samples"][scilife_name]["library_prep"] = {prep: {"sample_run_metrics": {info[key][0]: key}}}

    ### 20135
    if WS_projects.has_key(project_name):
        logger.debug("project run on Work Set")
        info = WS_projects[project_name]
    info = get_20135_info(client, project_name_swe, info)
    scilife_names, preps = strip_scilife_name(info.keys())
    for key in scilife_names:
        striped_scilife_name = scilife_names[key]
        for prep in info[key]:
            try:
                Av_sice = int(float(info[key][prep][0]))
                prep_status = info[key][prep][1]
                if obj["samples"][striped_scilife_name].has_key("library_prep"):
                    if obj["samples"][striped_scilife_name]["library_prep"].has_key(prep):
                        obj["samples"][striped_scilife_name]["library_prep"][prep]["average_size_bp"] = Av_sice
                        obj["samples"][striped_scilife_name]["library_prep"][prep]["prep_status"] = prep_status
                    else:
                        obj["samples"][striped_scilife_name]["library_prep"][prep] = {
                            "average_size_bp": Av_sice,
                            "prep_status": prep_status,
                        }
                else:
                    obj["samples"][striped_scilife_name]["library_prep"] = {
                        prep: {"average_size_bp": Av_sice, "prep_status": prep_status}
                    }
            except:
                pass

    return obj
Beispiel #41
0
 def set_full_name(self, name):
     self.full_name = _to_unicode(name)
Beispiel #42
0
 def set_analysis(self, analysis):
     self.analysis = _to_unicode(analysis)
Beispiel #43
0
 def set_name(self, name):
     self.name = _to_unicode(str(name))
def get_proj_inf(WS_projects,project_name_swe, samp_db, proj_db, client, config):
	project_name = _replace_ascii(_to_unicode(project_name_swe))
	key = find_proj_from_view(proj_db, project_name)
	if not key: key = uuid4().hex

        logger.info('Handling proj %s %s' % (project_name, key))

        obj={'application': '',
	     'customer_reference': '',
	     'min_m_reads_per_sample_ordered': '',
	     'no_of_samples': '',
             'entity_type': 'project_summary',
             'uppnex_id': '',                 
             'samples': {},
             'project_name': project_name, 
	     'project_id':'',
             '_id': key}


	### Genomics Project list
	p = pmeta.ProjectMetaData(project_name, config)
	if p.project_name is None:
		p = pmeta.ProjectMetaData(project_name_swe, config)
	if p.project_name is None:
		logger.warning('Google Document Genomics Project list: %s not found' % project_name) 
	else:
		if p.min_reads_per_sample.strip() != '':
                	obj['min_m_reads_per_sample_ordered'] = float(p.min_reads_per_sample)
		if p.no_samples.strip() != '':
			obj['no_of_samples'] = int(p.no_samples)
                obj['uppnex_id'] = p.uppnex_id
		obj['application'] = p.application
		obj['customer_reference'] = p.customer_reference
		obj['project_id']='P' + p.project_id

	info = get_20132_info(client,project_name_swe)
	### 20132
	try:
                scilife_names,preps = strip_scilife_name(info.keys())
                for key in scilife_names:
                        scilife_name = scilife_names[key]
			prep = preps[key]
                        cust_name = info[key]
			incoming_QC_status = 'F' if 'F' in prep else 'P'
			try:
				obj['samples'][scilife_name] = {'customer_name': cust_name, 
								'scilife_name': scilife_name,
								'incoming_QC_status': incoming_QC_status}
			except:
				pass
        except:
                pass

	### 20158
	info = get_20158_info(client, project_name_swe)
	try:
		scilife_names, preps = strip_scilife_name(info.keys())
		duplicates = find_duplicates(scilife_names.values())
		for key in scilife_names:
			striped_scilife_name = scilife_names[key]
			status = 'inconsistent' if striped_scilife_name in duplicates else info[key][0]
			m_reads = 'inconsistent' if striped_scilife_name in duplicates else info[key][1]
			prep = preps[key]
			incoming_QC_status = 'F' if 'F' in prep else 'P'
                	if obj['samples'].has_key(striped_scilife_name):
                        	obj['samples'][striped_scilife_name]['status'] = status
                                obj['samples'][striped_scilife_name]['m_reads_sequenced'] = m_reads
			else:
				obj['samples'][striped_scilife_name] = {'scilife_name': striped_scilife_name,
									'status': status,
									'm_reads_sequenced': m_reads,
                                                               		'incoming_QC_status': incoming_QC_status}
        except:
                pass


        ### Get _id for sample_run_metrics 
        info = find_samp_from_view(samp_db, project_name)
        if len(info.keys()) > 0:
                logger.debug('sample_run_metrics found on couchdb for project %s' % project_name)
        else:
                logger.warning('No sample_run_metrics found for project %s' % project_name)
        for key in info:
                sci_name_raw = info[key][1]
                scilife_name, preps = strip_scilife_name([sci_name_raw])
                scilife_name = scilife_name[sci_name_raw]
                prep = 'A' if preps[sci_name_raw].replace('F','') == '' else preps[sci_name_raw].replace('F','')

                if obj['samples'].has_key(scilife_name):
                        if obj['samples'][scilife_name].has_key("library_prep"):
				if obj['samples'][scilife_name]["library_prep"].has_key(prep):
					obj['samples'][scilife_name]["library_prep"][prep]["sample_run_metrics"][info[key][0]]=key
				else:
					obj['samples'][scilife_name]["library_prep"][prep]={"sample_run_metrics":{info[key][0]:key}}
			else:
				obj['samples'][scilife_name]["library_prep"]={prep:{"sample_run_metrics":{info[key][0]:key}}}

	### 20135
	if WS_projects.has_key(project_name):
		logger.debug('project run on Work Set')
		info = WS_projects[project_name]	
	info = get_20135_info(client,project_name_swe, info)
	scilife_names, preps = strip_scilife_name(info.keys())
	for key in scilife_names:
	       	striped_scilife_name = scilife_names[key]
		for prep in info[key]:
	        	try:
				Av_sice = int(float(info[key][prep][0]))
				prep_status = info[key][prep][1]
				if obj['samples'][striped_scilife_name].has_key("library_prep"):
					if obj['samples'][striped_scilife_name]["library_prep"].has_key(prep):
        					obj['samples'][striped_scilife_name]["library_prep"][prep]["average_size_bp"]=Av_sice
						obj['samples'][striped_scilife_name]["library_prep"][prep]["prep_status"]=prep_status
					else:
						obj['samples'][striped_scilife_name]["library_prep"][prep]={"average_size_bp":Av_sice,"prep_status":prep_status}
				else:
					obj['samples'][striped_scilife_name]["library_prep"]={prep:{"average_size_bp":Av_sice,"prep_status":prep_status}}
        		except:
	               		pass
                 
	return obj
Beispiel #45
0
 def set_genome_build(self, genome_build):
     self.genome_build = _to_unicode(genome_build)