Python image Examples

Programming Language: Python

Namespace/Package Name: bcbio.templates.mako2rst

Method/Function: image

Examples at hotexamples.com: 5

Python image - 5 examples found. These are the top rated real world Python examples of bcbio.templates.mako2rst.image extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: project_fc_delivery_reports.py Project: hussius/bcbb

def generate_report(proj_conf):
    d = { 
        'project_id' : proj_conf['id'],
        'infotable' : "",
        'lanetable' : "",
        'read1table': "",
        'read2table': "",
        'qcplots': "",
        'qc30plots': "",
        'errorrate': "",
        }

    ## General info table
    tab = Texttable()
    tab.add_row(["Project id", proj_conf['id']])
    tab.add_rows([["Run name:", proj_conf['flowcell']],
                  ["Uppnex project", ""]])
    d.update(infotable=tab.draw())
    
    ## Lane table
    tab = Texttable()
    tab.add_row(["Lane", "Sample(s)", "Conc. (pM)"])
    for l in proj_conf['lanes']:
        samples = []
        for mp in l['multiplex']:
            samples.append(mp['name'])
        tab.add_row([l['lane'], ", ".join(samples), ""])
    d.update(lanetable=tab.draw())
                
    ## qcplots
    byCycleDir = os.path.join(proj_conf['archive_dir'], proj_conf['flowcell'], "Data", "reports", "ByCycle")
    res = []
    for l in proj_conf['lanes']:
        res.append(m2r.image(os.path.relpath(os.path.join(byCycleDir, "QScore_L%s.png" % (l['lane']))), width="100%"))
    d.update(qcplots= "\n".join(res))

    ## qc30plots
    res = []
    for l in proj_conf['lanes']:
        res.append(m2r.image(os.path.relpath(os.path.join(byCycleDir, "NumGT30_L%s.png" % (l['lane']))), width="100%"))
    d.update(qc30plots= "\n".join(res))

    ## qcplots
    res = []
    for l in proj_conf['lanes']:
        res.append(m2r.image(os.path.relpath(os.path.join(byCycleDir, "ErrRate_L%s.png" % (l['lane']))), width="100%"))
    d.update(errorrate= "\n".join(res))
                
    return d

Example #2

Show file

File: analysis_report.py Project: mayabrandi/RNA_analysis

def generate_report(proj_conf):

    d = {
	'runname':proj_conf['run'],
	'project_id': proj_conf['id'],
        'samplenames': ' '.join(proj_conf['samples']),
        'latex_opt' : "",
        'uppnex': "",
        'mapping':"",
        'dup_rem':"",
        'read_count':"",
        'quantifyer':"",
        'gene_body_cov':"",
        'FPKM_heatmap':"",
        'FPKM_PCAplot':"",
        'Mapping_statistics': "",
        'Read_Distribution':"",
	'rRNA_table':""
        }

    ## Latex option (no of floats per page)
    floats_per_page = '.. raw:: latex\n\n   \setcounter{totalnumber}{8}'
    d['latex_opt'] = floats_per_page


    ## Metadata fetched from the 'Genomics project list' on Google Docs 
    try:
        proj_data = ProjectMetaData(proj_conf['id'], proj_conf['config'])
        uppnex_proj = proj_data.uppnex_id
    except:
        uppnex_proj = "b201YYXX"
        print "No uppnex ID fetched"
	pass
    if not uppnex_proj:
	uppnex_proj="b201YYXX"
        print "No uppnex ID fetched"
    d['uppnex'] = uppnex_proj 


    ## RNA-seq tools fetched from config file post_process.yaml
    try:
        tools      	= proj_conf['config']['custom_algorithms']['RNA-seq analysis']
        d['mapping']	= os.path.join(tools['aligner'],tools['aligner_version'])
        d['dup_rem']    = os.path.join(tools['dup_remover'],tools['dup_remover_version'])
        d['read_count'] = os.path.join(tools['counts'],tools['counts_version'])
        d['quantifyer'] = os.path.join(tools['quantifyer'],tools['quantifyer_version'])
    except:
	print "Could not fetched RNA-seq tools from config file post_process.yaml"
        d['mapping'] = "X"
        d['dup_rem'] = "X"
        d['read_count'] = "X"
        d['quantifyer'] = "X"
        pass


    ## Mapping Statistics
    tab = Texttable()
    tab.set_cols_dtype(['t','t','t','t'])
    tab.add_row(['Sample','tot_#_read_pairs','%_uniquely_mapped_reads','%_uniquely_mapped_reads_left_after_dup_rem'])
    try:
	for sample_name in proj_conf['samples']:
	    f=open('tophat_out_'+sample_name+'/stat_'+sample_name, 'r')
	    data = f.readlines()
	    tab.add_row([sample_name,data[1].split()[1],data[2].split()[1],data[3].split()[1]])
	    f.close()
	d['Mapping_statistics']=tab.draw()
    except:
	try:
            f=open('stat', 'r')
            data = f.readlines()
            D=dict(zip(data[0].split(),zip(data[1].split(),data[2].split(),data[3].split())))
            for sample_name in proj_conf['samples']:
	        if D.has_key(sample_name):
                    tab.add_row([sample_name,D[sample_name][0],D[sample_name][1],D[sample_name][2]])
	        else:
	            print 'kould not find '+sample_name+' in stat'
            d['Mapping_statistics']=tab.draw() 
            f.close()
        except:
	    print "Could not make Mapping Statistics table"
            pass


    ## Read Distribution 
    try:
        tab = Texttable()
        json=open('Ever_rd.json','a')
        print >> json, '{'
        Groups=["Sample:","CDS Exons:","5'UTR Exons:","3'UTR Exons:","Intronic region:","TSS up 1kb:","TES down 1kb:"]

        tab.set_cols_dtype(['t','t','t','t','t','t','t','t'])
        tab.add_row(["Sample","CDS Exon","5'UTR Exon","3'UTR Exon","Intron","TSS up 1kb","TES down 1kb","mRNA frac"])
   	
	for i in range(len(proj_conf['samples'])):
	    sample_name=proj_conf['samples'][i] 
            print >> json, sample_name+': {'
            row=[sample_name]
            Reads_counts=[]
	    try:
		f=open('RSeQC_rd_'+sample_name+'.err','r')
	    except:
            	f=open('Ever_rd_'+sample_name+'.err','r')
		pass
            for line in f:
                Group=line.split('\t')[0]
                if Group in Groups:
                    if Group=="TES down 1kb:":
                        print >> json, '"'+Group+'"'+':'+str(line.split('\t')[3].strip())
                    else:
                        print >> json, '"'+Group+'"'+':'+str(line.split('\t')[3].strip())+','
                    row.append(str(line.split('\t')[3].strip())+' ')
                    Reads_counts.append(float(line.split('\t')[2].strip()))
	    if os.path.exists('RSeQC_rd_'+sample_name+'.err'):
		t=os.popen("grep 'Total Fragments' 'RSeQC_rd_"+sample_name+".err'|sed 's/Total Fragments               //g'")
	    else:
		try:
			t=os.popen("grep 'Total Fragments' 'Ever_rd_"+sample_name+".err'|sed 's/Total Fragments               //g'")
		except:		
			pass
            tot=float(t.readline())
            frac=(Reads_counts[0]+Reads_counts[1]+Reads_counts[2])/tot
            row.append(str(round((Reads_counts[0]+Reads_counts[1]+Reads_counts[2])/tot,2)))
            tab.add_row(row)
            f.close()
            if i==(len(proj_conf['samples'])-1):
                    print >> json,'}'
            else:
                    print >> json,'},'
        print >> json, '}'
        json.close()
        d['Read_Distribution']=tab.draw()

    except:
	print "Could not make Read Distribution table"
        pass


    ## FPKM_PCAplot, FPKM_heatmap
    if os.path.exists("FPKM_PCAplot.pdf") and os.path.exists("FPKM_heatmap.pdf"):
        d['FPKM_PCAplot'] = m2r.image("FPKM_PCAplot.pdf", width="100%")
        d['FPKM_heatmap'] = m2r.image("FPKM_heatmap.pdf", width="100%")
    else:
	print "could not make FPKM PCAplot and FPKM heatmap"


    ## rRNA_table
    try:
        tab = Texttable()
        tab.set_cols_dtype(['t','t'])
        tab.add_row(["Sample","rRNA"])
	f=open('rRNA.quantification','r')
	D={}
	for line in f:
            D[str(line.split('\t')[0].strip())]=str(line.split('\t')[1].strip())
        for sample_name in proj_conf['samples']:
            if D.has_key(sample_name):
                        tab.add_row([sample_name,D[sample_name]])
        d['rRNA_table']=tab.draw()
        f.close()
    except:
	print "could not generate rRNA table"
        pass   
 
    return d

Example #3

Show file

File: fc_delivery_reports.py Project: hussius/scilife-utilities

def generate_report(proj_conf):
    
    #######
    ### Metadata fetched from the 'Genomics project list' on Google Docs
    ###
    uppnex_proj = ''
    min_reads_per_sample = ''
    try:
    	proj_data = ProjectMetaData(proj_conf['id'], proj_conf['config'])
    	uppnex_proj = proj_data.uppnex_id
        project_id = proj_data.project_id
        queue_date = proj_data.queue_date
        no_samples = proj_data.no_samples
        lanes_plates = proj_data.lanes_plates
        min_reads_per_sample = proj_data.min_reads_per_sample
        customer_reference = proj_data.customer_reference
        application = proj_data.application
        no_finished_samples = proj_data.no_finished_samples
    except:
        print("WARNING: Could not fetch meta data from Google Docs")

    d = { 
        'project_id' : proj_conf['id'],
        'latex_opt' : "",
        'summary' : "",
        'infotable' : "",
        'lanetable' : "",
        'read1table': "",
        'read2table': "",
        'qcplots': "",
        'qc30plots': "",
        'errorrate': "",
        'yieldtable': "",
        }

    ## Latex option (no of floats per page)
    floats_per_page = '.. raw:: latex\n\n   \setcounter{totalnumber}{8}'
    d.update(latex_opt = floats_per_page)

    ## General info table
    tab = Texttable()
    if not uppnex_proj or len(uppnex_proj) < 4 or uppnex_proj[0:4] != 'b201':
        uppnex_proj = "b201YXXX"
        print "WARNING: Could not find UPPNEX project"

    run_name_comp = proj_conf['flowcell'].split('_')
    simple_run_name = run_name_comp[0] + run_name_comp[3][0]
    proj_level_dir = fixProjName(proj_conf['id'])
    instr_id = run_name_comp[1]
    fc_name, fc_date = get_flowcell_info(proj_conf['flowcell'])
    tab.add_row(["Run name:", proj_conf['flowcell']])
    del_base = "/proj/"
    proj_id = proj_conf['id']
    try: 
        if len(customer_reference) > 1:
            proj_id += ' (' + customer_reference + ')'
    except:
        pass

    tab.add_rows([["Project id:", proj_id], 
                  ["Date:", fc_date],
                  ["Instrument ID:", instr_id],
                  ["Flow cell ID:", fc_name],
                  ["Uppnex project:", uppnex_proj],
                  ["Delivery directory:", del_base + uppnex_proj + "/INBOX/" + proj_level_dir + "/" + proj_conf['flowcell']]])
    d.update(infotable=tab.draw())
    
    ## Lane table
    tab = Texttable()
    tab.add_row(["Lane", "Sample(s)"])
    for l in proj_conf['lanes']:
        main_proj = l['description'].split(',')[1].strip()
        samples = []
        if l.has_key('multiplex'):
            for mp in l['multiplex']:
                if mp.has_key('sample_prj'):
                    if mp['sample_prj'] == proj_conf['id']:
                        samples.append(mp['name'])
            tab.add_row([l['lane'], ", ".join(samples)])
        else:
            tab.add_row([l['lane'], "Non-multiplexed lane"])
    d.update(lanetable=tab.draw())
    
    tab_r1 = Texttable()
    tab_r2 = Texttable()
    tab_r1.set_cols_width([2,12,12,12,12,12,12,30])
    tab_r2.set_cols_width([2,12,12,12,12,12,12,30])
    tab_r1.add_row(["Lane", "Clu. dens. #/mm2","% PF clusters","Clu. PF #/mm2", "% phas/prephas", "% aln PhiX", "% error rate", "Comment"])
    tab_r2.add_row(["Lane", "Clu. dens. #/mm2","% PF clusters","Clu. PF #/mm2", "% phas/prephas", "% aln PhiX", "% error rate", "Comment"])

    # These should be moved to a cfg file. ( + perhaps provide an alternative for v1.5 FC )
    if (options.v1_5_fc): min_clupf = 300 
    else: min_clupf = 475
    max_phas = 0.4
    max_prephas = 1.0 # 0.5
    max_mean_err = 2

    statspath = os.path.join(proj_conf['archive_dir'], proj_conf['flowcell'], "Data", "reports", "Summary")
    stats = summ.getQCstats(statspath)

    # Check quality criteria and add comments
    comm_r1 = ''
    comm_r2 = ''
    ok_r1 = True
    ok_r2 = True
    ok_cludens_r1 = True
    ok_cludens_r2 = True
    ok_err_rate = True 
    ok_err_r1 = True
    ok_err_r2 = True

    for l in proj_conf['lanes']:

        # Cluster densities
        clu_dens_r1 =  stats['raw_cluster_dens']['read1'][l['lane']]
        clu_dens_r2 =  stats['raw_cluster_dens']['read2'][l['lane']]
        clu_dens_sd_r1 =  stats['raw_cluster_dens_sd']['read1'][l['lane']]
        clu_dens_sd_r2 =  stats['raw_cluster_dens_sd']['read2'][l['lane']]
        clu_dens_string_r1 = str(clu_dens_r1) + '+/-' + str(clu_dens_sd_r1) 
        clu_dens_string_r2 = str(clu_dens_r2) + '+/-' + str(clu_dens_sd_r2) 

        # Cluster PF densities
        clu_dens_pf_r1 =  stats['pf_cluster_dens']['read1'][l['lane']]
        clu_dens_pf_r2 =  stats['pf_cluster_dens']['read2'][l['lane']]
        clu_dens_pf_sd_r1 =  stats['pf_cluster_dens_sd']['read1'][l['lane']]
        clu_dens_pf_sd_r2 =  stats['pf_cluster_dens_sd']['read2'][l['lane']]
        clu_dens_pf_string_r1 = str(clu_dens_pf_r1) + '+/-' + str(clu_dens_pf_sd_r1)
        clu_dens_pf_string_r2 = str(clu_dens_pf_r2) + '+/-' + str(clu_dens_pf_sd_r2)

        # % PF clusters
        prc_pf_r1 =  stats['prc_pf']['read1'][l['lane']]
        prc_pf_r2 =  stats['prc_pf']['read2'][l['lane']]
        prc_pf_sd_r1 =  stats['prc_pf_sd']['read1'][l['lane']]
        prc_pf_sd_r2 =  stats['prc_pf_sd']['read2'][l['lane']]
        prc_pf_string_r1 = str(prc_pf_r1) + '+/-' + str(prc_pf_sd_r1)
        prc_pf_string_r2 = str(prc_pf_r2) + '+/-' + str(prc_pf_sd_r2)

        # % phasing and prephasing
        phas_r1 = stats['phasing']['read1'][l['lane']]
        phas_r2 = stats['phasing']['read2'][l['lane']]
        prephas_r1 = stats['prephasing']['read1'][l['lane']]
        prephas_r2 = stats['prephasing']['read2'][l['lane']]
        phas_string_r1 = str(phas_r1) + '/' + str(prephas_r1)
        phas_string_r2 = str(phas_r2) + '/' + str(prephas_r2)

        # % aligned
        aln_r1 = stats['prc_aligned']['read1'][l['lane']]
        aln_r2 = stats['prc_aligned']['read2'][l['lane']]
        aln_sd_r1 = stats['prc_aligned_sd']['read1'][l['lane']]
        aln_sd_r2 = stats['prc_aligned_sd']['read2'][l['lane']]
        aln_string_r1 = str(aln_r1) + '+/-' + str(aln_sd_r1)
        aln_string_r2 = str(aln_r2) + '+/-' + str(aln_sd_r2)

        # error rate
        err_r1 = stats['error_rate']['read1'][l['lane']]
        err_r2 = stats['error_rate']['read2'][l['lane']]
        err_sd_r1 = stats['error_rate_sd']['read1'][l['lane']]
        err_sd_r2 = stats['error_rate_sd']['read2'][l['lane']]
        err_str_r1 = str(err_r1) + '+/-' + str(err_sd_r1)
        err_str_r2 = str(err_r2) + '+/-' + str(err_sd_r2)
        
        comm_r1 = ""
        comm_r2 = ""

        # check criteria
        if float(clu_dens_pf_r1[:-1]) < min_clupf: 
            ok_r1 = False
            ok_cludens_r1 = False
            comm_r1 += "Low cluster density. "
        if float(clu_dens_pf_r2[:-1]) < min_clupf: 
            ok_r2 = False
            ok_cludens_r2 = False
            comm_r2 += "Low cluster density. "
        avg_error_rate = (float(err_r1) + float(err_r2))/2
        if avg_error_rate > max_mean_err:
            ok_err_rate = False
        if float(err_r1) > max_mean_err:
            comm_r1 += "High error rate. "
            ok_err_r1 = False
        if float(err_r2) > max_mean_err:
            comm_r2 += "High error rate. "
            ok_err_r2 = False

        if comm_r1 == "": comm_r1 = "OK"        
        if comm_r2 == "": comm_r2 = "OK"

        tab_r1.add_row([l['lane'], clu_dens_string_r1, prc_pf_string_r1, clu_dens_pf_string_r1, phas_string_r1, aln_string_r1, err_str_r1, comm_r1])
        tab_r2.add_row([l['lane'], clu_dens_string_r2, prc_pf_string_r2, clu_dens_pf_string_r2, phas_string_r2, aln_string_r2, err_str_r2, comm_r2])

    # Reinitialize comments for the summary. (Which will be for several lanes, potentially)
    comm_r1 = ""
    comm_r2 = ""
 
    # if not ok_cludens_r1: comm_r1 += "Low cluster density. " 
    # if not ok_cludens_r2: comm_r2 += "Low cluster density. " 
    if not ok_err_rate:
        if not ok_err_r1: 
            ok_r1 = False
            comm_r1 += "High error rate. "
        if not ok_err_r2: 
            ok_r2 = False
            comm_r2 += "High error rate. "

    if (ok_r1 and ok_r2): 
        comm_r1 = comm_r2 = "OK"
        d.update(summary = "Successful run in terms of error rate. ")
    else:  
        if (ok_r1): 
            comm_r1 = "OK"
            d.update (summary = "Read 2 did not pass quality criteria: " + comm_r2)
        elif (ok_r2):
            comm_r2 = "OK"
            d.update (summary = "Read 1 did not pass quality criteria: " + comm_r1)
        else:
            d.update (summary = "Did not pass quality criteria. Read 1: " + comm_r1 + " Read 2: " + comm_r2)


    d.update(read1table=tab_r1.draw())
    d.update(read2table=tab_r2.draw())
        
    ## qcplots
    byCycleDir = os.path.join(proj_conf['archive_dir'], proj_conf['flowcell'], "Data", "reports", "ByCycle")
    res = []
    for l in proj_conf['lanes']:
        res.append(m2r.image(os.path.relpath(os.path.join(byCycleDir, "QScore_L%s.png" % (l['lane']))), width="100%"))
    d.update(qcplots= "\n".join(res))

    ## qc30plots
    res = []
    for l in proj_conf['lanes']:
        res.append(m2r.image(os.path.relpath(os.path.join(byCycleDir, "NumGT30_L%s.png" % (l['lane']))), width="100%"))
    d.update(qc30plots= "\n".join(res))

    ## qcplots
    res = []
    for l in proj_conf['lanes']:
        res.append(m2r.image(os.path.relpath(os.path.join(byCycleDir, "ErrRate_L%s.png" % (l['lane']))), width="100%"))
    d.update(errorrate= "\n".join(res))

    ## Sequence yield table
    target_yield_per_lane = 143000000.0
    if (options.v1_5_fc):  target_yield_per_lane = 60000000.0
    tab = Texttable()
    tab.add_row(['Lane','Sample','Number of sequences','Million sequences ordered','Comment'])
    
    run_info_yaml = os.path.join(proj_conf['archive_dir'],proj_conf['flowcell'],"run_info.yaml")

    if not os.path.exists(run_info_yaml):
        print("WARNING: could not find required run_info.yaml configuration file at '%s'" % run_info_yaml)
        return

    with open(run_info_yaml) as in_handle:
        run_info = yaml.load(in_handle)

    fc_name, fc_date = get_flowcell_info(proj_conf['flowcell'])
    low_yield = False
    
    bc_multiplier = 0.75 # Should move to cfg file

    ok_samples = []
    low_samples = []

    for l in proj_conf['lanes']:
        bc_file_name = os.path.join(proj_conf['analysis_dir'], proj_conf['flowcell'], '_'.join([l['lane'], fc_date, fc_name, "nophix_barcode"]), '_'.join([l['lane'], fc_date, fc_name, "nophix_bc.metrics"]))
        try:
            bc_file = open(bc_file_name)
        except:
            sys.exit("Could not find bc metrics file " + bc_file_name)
        bc_count = {}
        for line in bc_file:
            c = line.strip().split()
            bc_count[c[0]]=c[1] + ' (~' + str (int ( round (float(c[1])/1000000) ) ) + " million)"
        no_samples = len(bc_count)
        if no_samples == 0:
            print("WARNING: did not find a BC metrics file... Skipping lane %s for %s" %(l['lane'], proj_conf['id']))
            continue
        
        target_yield_per_sample = ''
        try:
            min_reads_per_sample = round(float(str(min_reads_per_sample)))
            target_yield_per_sample = min_reads_per_sample * 1000000
        except ValueError:
            min_reads_per_sample = ''
            target_yield_per_sample = bc_multiplier * target_yield_per_lane / no_samples
            
        sample_name = {}
        is_multiplexed = True
        is_rerun = False
        # Check here for each sample if it belongs to the project
        for entry in run_info:
            if entry['lane'] == l['lane']:
                projs = set()
                if entry.has_key('multiplex'):
                    for sample in entry['multiplex']:
                        if sample.has_key('sample_prj'):
                            projs.add(sample['sample_prj'])
                            if sample['sample_prj'].strip() == proj_conf['id']:
                                sample_name[sample['barcode_id']]=sample['name']
                else: is_multiplexed = False
                if len(projs) > 1: is_rerun = True
        samp_count = {}

        for k in bc_count.keys():
            if not k.isdigit(): pass
            else: 
                if sample_name.has_key(int(k)): samp_count[sample_name[int(k)]] =  bc_count[k]

        for k in sorted(samp_count.keys()):
            comment = ''
            if int(samp_count[k].split('(')[0]) < target_yield_per_sample: 
                comment = 'Low. '
                low_yield = True
                low_samples.append(k)
            else: ok_samples.append(k)
            if is_rerun: comment += '(rerun lane)'
            tab.add_row([l['lane'], k, samp_count[k], min_reads_per_sample, comment])
        
        if is_multiplexed:
            comment = ''
            try:
                if int (bc_count['unmatched'].split('(')[0]) > target_yield_per_sample: comment = 'High.'
                if is_rerun: comment += '(rerun lane)'
                tab.add_row([l['lane'], 'unmatched', bc_count['unmatched'], min_reads_per_sample, comment])
            except:
                print('WARNING: insufficient or no barcode metrics for lane')
        else:
            comment = ''
            for k in bc_count.keys():
                if int (bc_count[k].split('(')[0]) < bc_multiplier * target_yield_per_lane: comment = 'Low.' 
                tab.add_row([l['lane'], "Non-multiplexed lane", bc_count[k], min_reads_per_sample, comment])

    delivery_type = "Final delivery. "
    if low_yield:
        delivery_type = "Partial delivery. "
        fail_comm = "Samples " + ", ".join(low_samples) + " yielded fewer sequences than expected. These will be re-run unless this was already a re-run and the total yield is now sufficient. "
    else: fail_comm = ""

    if low_yield: 
        if len(ok_samples)>0: ok_comm = "Samples " + ", ".join(ok_samples) + " yielded the expected number of sequences or more. "
        else: ok_comm = ""
    else: ok_comm = "All samples yielded the expected number of sequences or more. "

    comm = d['summary'] + fail_comm + ok_comm
    d.update(summary = comm)

    d.update(yieldtable=tab.draw())
    return d

Example #4

Show file

def generate_report(proj_conf):

    #######
    ### Metadata fetched from the 'Genomics project list' on Google Docs
    ###
    uppnex_proj = ''
    min_reads_per_sample = ''
    try:
        proj_data = ProjectMetaData(proj_conf['id'], proj_conf['config'])
        uppnex_proj = proj_data.uppnex_id
        project_id = proj_data.project_id
        queue_date = proj_data.queue_date
        no_samples = proj_data.no_samples
        lanes_plates = proj_data.lanes_plates
        min_reads_per_sample = proj_data.min_reads_per_sample
        customer_reference = proj_data.customer_reference
        application = proj_data.application
        no_finished_samples = proj_data.no_finished_samples
    except:
        print("WARNING: Could not fetch meta data from Google Docs")

    d = {
        'project_id': proj_conf['id'],
        'latex_opt': "",
        'summary': "",
        'infotable': "",
        'lanetable': "",
        'read1table': "",
        'read2table': "",
        'qcplots': "",
        'qc30plots': "",
        'errorrate': "",
        'yieldtable': "",
        'qualscale': proj_conf['qual_scale'],
        }

    ## Latex option (no of floats per page)
    floats_per_page = '.. raw:: latex\n\n   \setcounter{totalnumber}{8}'
    d.update(latex_opt=floats_per_page)

    ## General info table
    tab = Texttable()
    if not uppnex_proj or len(uppnex_proj) < 4 or uppnex_proj[0:4] != 'b201':
        uppnex_proj = "b201YXXX"
        print "WARNING: Could not find UPPNEX project"

    run_name_comp = proj_conf['flowcell'].split('_')
    simple_run_name = run_name_comp[0] + "_" + run_name_comp[3]
    proj_level_dir = fixProjName(proj_conf['id'])
    instr_id = run_name_comp[1]
    fc_name, fc_date = get_flowcell_info(proj_conf['flowcell'])
    tab.add_row(["Run name:", proj_conf['flowcell']])
    del_base = "/proj/"
    proj_id = proj_conf['id']
    try:
        if len(customer_reference) > 1:
            proj_id += ' (' + customer_reference + ')'
    except:
        pass

    if len(proj_id) > 30: 
        print "Project ID + customer reference too long: ", proj_id
    tab.add_rows([["Project id:", proj_id], 
                  ["Date:", fc_date],
                  ["Instrument ID:", instr_id],
                  ["Flow cell ID:", fc_name],
                  ["Uppnex project:", uppnex_proj],
                  ["Delivery directory:", del_base + uppnex_proj + "/INBOX/" + proj_level_dir + "/" + simple_run_name]])
    d.update(infotable=tab.draw())

    ## Lane table
    tab = Texttable()
    tab.add_row(["Lane", "Sample(s)"])
    for l in proj_conf['lanes']:
        main_proj = l['description'].split(',')[1].strip()
        samples = []
        if 'multiplex' in l:
            for mp in l['multiplex']:
                if 'sample_prj' in mp:
                    if mp['sample_prj'] == proj_conf['id']:
                        samples.append(mp['name'])
            tab.add_row([l['lane'], ", ".join(samples)])
        else:
            tab.add_row([l['lane'], "Non-multiplexed lane"])
    d.update(lanetable=tab.draw())

    tab_r1 = Texttable()
    tab_r2 = Texttable()
    tab_r1.set_cols_width([2, 12, 12, 12, 12, 12, 12, 30])
    tab_r2.set_cols_width([2, 12, 12, 12, 12, 12, 12, 30])
    tab_r1.add_row(["Lane", "Clu. dens. #/mm2", "% PF clusters", "Clu. PF #/mm2", "% phas/prephas", "% aln PhiX", "% error rate", "Comment"])
    tab_r2.add_row(["Lane", "Clu. dens. #/mm2", "% PF clusters", "Clu. PF #/mm2", "% phas/prephas", "% aln PhiX", "% error rate", "Comment"])

    # These should be moved to a cfg file. ( + perhaps provide an alternative for v1.5 FC )
    if (options.v1_5_fc):
        min_clupf = 300
    else:
        min_clupf = 475
    max_phas = 0.4
    max_prephas = 1.0  # 0.5
    max_mean_err = 2

    statspath = os.path.join(proj_conf['archive_dir'], proj_conf['flowcell'], "Data", "reports", "Summary")
    stats = summ.getQCstats(statspath)

    # Check quality criteria and add comments
    comm_r1 = ''
    comm_r2 = ''
    ok_r1 = True
    ok_r2 = True
    ok_cludens_r1 = True
    ok_cludens_r2 = True
    ok_err_rate = True
    ok_err_r1 = True
    ok_err_r2 = True

    for l in proj_conf['lanes']:

        # Cluster densities
        clu_dens_r1 = stats['raw_cluster_dens']['read1'][l['lane']]
        clu_dens_r2 = stats['raw_cluster_dens']['read2'][l['lane']]
        clu_dens_sd_r1 = stats['raw_cluster_dens_sd']['read1'][l['lane']]
        clu_dens_sd_r2 = stats['raw_cluster_dens_sd']['read2'][l['lane']]
        clu_dens_string_r1 = str(clu_dens_r1) + '+/-' + str(clu_dens_sd_r1)
        clu_dens_string_r2 = str(clu_dens_r2) + '+/-' + str(clu_dens_sd_r2)

        # Cluster PF densities
        clu_dens_pf_r1 = stats['pf_cluster_dens']['read1'][l['lane']]
        clu_dens_pf_r2 = stats['pf_cluster_dens']['read2'][l['lane']]
        clu_dens_pf_sd_r1 = stats['pf_cluster_dens_sd']['read1'][l['lane']]
        clu_dens_pf_sd_r2 = stats['pf_cluster_dens_sd']['read2'][l['lane']]
        clu_dens_pf_string_r1 = str(clu_dens_pf_r1) + '+/-' + str(clu_dens_pf_sd_r1)
        clu_dens_pf_string_r2 = str(clu_dens_pf_r2) + '+/-' + str(clu_dens_pf_sd_r2)

        # % PF clusters
        prc_pf_r1 = stats['prc_pf']['read1'][l['lane']]
        prc_pf_r2 = stats['prc_pf']['read2'][l['lane']]
        prc_pf_sd_r1 = stats['prc_pf_sd']['read1'][l['lane']]
        prc_pf_sd_r2 = stats['prc_pf_sd']['read2'][l['lane']]
        prc_pf_string_r1 = str(prc_pf_r1) + '+/-' + str(prc_pf_sd_r1)
        prc_pf_string_r2 = str(prc_pf_r2) + '+/-' + str(prc_pf_sd_r2)

        # % phasing and prephasing
        phas_r1 = stats['phasing']['read1'][l['lane']]
        phas_r2 = stats['phasing']['read2'][l['lane']]
        prephas_r1 = stats['prephasing']['read1'][l['lane']]
        prephas_r2 = stats['prephasing']['read2'][l['lane']]
        phas_string_r1 = str(phas_r1) + '/' + str(prephas_r1)
        phas_string_r2 = str(phas_r2) + '/' + str(prephas_r2)

        # % aligned
        aln_r1 = stats['prc_aligned']['read1'][l['lane']]
        aln_r2 = stats['prc_aligned']['read2'][l['lane']]
        aln_sd_r1 = stats['prc_aligned_sd']['read1'][l['lane']]
        aln_sd_r2 = stats['prc_aligned_sd']['read2'][l['lane']]
        aln_string_r1 = str(aln_r1) + '+/-' + str(aln_sd_r1)
        aln_string_r2 = str(aln_r2) + '+/-' + str(aln_sd_r2)

        # error rate
        err_r1 = stats['error_rate']['read1'][l['lane']]
        err_r2 = stats['error_rate']['read2'][l['lane']]
        err_sd_r1 = stats['error_rate_sd']['read1'][l['lane']]
        err_sd_r2 = stats['error_rate_sd']['read2'][l['lane']]
        err_str_r1 = str(err_r1) + '+/-' + str(err_sd_r1)
        err_str_r2 = str(err_r2) + '+/-' + str(err_sd_r2)

        comm_r1 = ""
        comm_r2 = ""

        # check criteria
        if float(clu_dens_pf_r1[:-1]) < min_clupf:
            ok_r1 = False
            ok_cludens_r1 = False
            comm_r1 += "Low cluster density. "
        if float(clu_dens_pf_r2[:-1]) < min_clupf:
            ok_r2 = False
            ok_cludens_r2 = False
            comm_r2 += "Low cluster density. "
        avg_error_rate = (float(err_r1) + float(err_r2)) / 2
        if avg_error_rate > max_mean_err:
            ok_err_rate = False
        if float(err_r1) > max_mean_err:
            comm_r1 += "High error rate. "
            ok_err_r1 = False
        if float(err_r2) > max_mean_err:
            comm_r2 += "High error rate. "
            ok_err_r2 = False

        if comm_r1 == "":
            comm_r1 = "OK"
        if comm_r2 == "":
            comm_r2 = "OK"

        tab_r1.add_row([l['lane'], clu_dens_string_r1, prc_pf_string_r1, clu_dens_pf_string_r1, phas_string_r1, aln_string_r1, err_str_r1, comm_r1])
        tab_r2.add_row([l['lane'], clu_dens_string_r2, prc_pf_string_r2, clu_dens_pf_string_r2, phas_string_r2, aln_string_r2, err_str_r2, comm_r2])

    # Reinitialize comments for the summary. (Which will be for several lanes, potentially)
    comm_r1 = ""
    comm_r2 = ""

    if not ok_cludens_r1:
        comm_r1 += "Low cluster density. "
    if not ok_cludens_r2:
        comm_r2 += "Low cluster density. "
    if not ok_err_rate:
        if not ok_err_r1:
            ok_r1 = False
            comm_r1 += "High error rate. "
        if not ok_err_r2:
            ok_r2 = False
            comm_r2 += "High error rate. "

    if (ok_r1 and ok_r2):
        comm_r1 = comm_r2 = "OK"
        d.update(summary = "Successful run in terms of error rate. ")
    else:  
        if (ok_r1): 
            comm_r1 = "OK"
            d.update(summary="Read 2 did not pass quality criteria: " + comm_r2)
        elif (ok_r2):
            comm_r2 = "OK"
            d.update(summary="Read 1 did not pass quality criteria: " + comm_r1)
        else:
            d.update(summary="Did not pass quality criteria. Read 1: " + comm_r1 + " Read 2: " + comm_r2)

    d.update(read1table=tab_r1.draw())
    d.update(read2table=tab_r2.draw())

    ## qcplots
    byCycleDir = os.path.join(proj_conf['archive_dir'], proj_conf['flowcell'], "Data", "reports", "ByCycle")
    res = []
    for l in proj_conf['lanes']:
        res.append(m2r.image(os.path.relpath(os.path.join(byCycleDir, "QScore_L%s.png" % (l['lane']))), width="100%"))
    d.update(qcplots="\n".join(res))

    ## qc30plots
    res = []
    for l in proj_conf['lanes']:
        res.append(m2r.image(os.path.relpath(os.path.join(byCycleDir, "NumGT30_L%s.png" % (l['lane']))), width="100%"))
    d.update(qc30plots="\n".join(res))

    ## qcplots
    res = []
    for l in proj_conf['lanes']:
        res.append(m2r.image(os.path.relpath(os.path.join(byCycleDir, "ErrRate_L%s.png" % (l['lane']))), width="100%"))
    d.update(errorrate="\n".join(res))

    ## Sequence yield table
    target_yield_per_lane = 143000000.0
    if (options.v1_5_fc):
        target_yield_per_lane = 60000000.0
    tab = Texttable()
    tab.add_row(['Lane', 'Sample', 'Number of sequences', 'Million sequences ordered', 'Comment'])

    run_info_yaml = os.path.join(proj_conf['archive_dir'], proj_conf['flowcell'], "run_info.yaml")

    if not os.path.exists(run_info_yaml):
        print("WARNING: could not find required run_info.yaml configuration file at '%s'" % run_info_yaml)
        return

    with open(run_info_yaml) as in_handle:
        run_info = yaml.load(in_handle)

    fc_name, fc_date = get_flowcell_info(proj_conf['flowcell'])
    low_yield = False

    bc_multiplier = 0.75  # Should move to cfg file

    ok_samples = []
    low_samples = []

    for l in proj_conf['lanes']:
	bc_file_name_prefix = os.path.join(proj_conf['analysis_dir'], proj_conf['flowcell'], '_'.join([l['lane'], fc_date, fc_name, "nophix_barcode"]), '_'.join([l['lane'], fc_date, fc_name, "nophix"]))
        bc_file = bc_file_name_prefix + ".bc_metrics"
	if not os.path.exists(bc_file):
		bc_file = bc_file_name_prefix + "_bc.metrics"
        try:
            bc_file = open(bc_file)
        except:
            sys.exit("Could not find bc metrics file " + bc_file)
        bc_count = {}
        for line in bc_file:
            c = line.strip().split()
            bc_count[c[0]]=c[1] + ' (~' + str (int ( round (float(c[1])/1000000) ) ) + " million)"
        no_samples = len(bc_count) - 1
        if no_samples == 0:
            print("WARNING: did not find a BC metrics file... Skipping lane %s for %s" % (l['lane'], proj_conf['id']))
            continue

        target_yield_per_sample = ''
        try:
            min_reads_per_sample = round(float(str(min_reads_per_sample)))
            target_yield_per_sample = min_reads_per_sample * 1000000
        except ValueError:
            min_reads_per_sample = ''
            target_yield_per_sample = bc_multiplier * target_yield_per_lane / no_samples

        sample_name = {}
        is_multiplexed = True
        is_rerun = False
        # Check here for each sample if it belongs to the project
        for entry in run_info:
            if entry['lane'] == l['lane']:
                projs = set()
                if 'multiplex' in entry:
                    for sample in entry['multiplex']:
                        if 'sample_prj' in sample:
                            projs.add(sample['sample_prj'])
                            if sample['sample_prj'].strip() == proj_conf['id']:
                                sample_name[sample['barcode_id']] = sample['name']
                else:
                    is_multiplexed = False
                if len(projs) > 1:
                    is_rerun = True
        samp_count = {}

        for k in bc_count.keys():
            if not k.isdigit():
                pass
            else:
                if int(k) in sample_name:
                    samp_count[sample_name[int(k)]] = bc_count[k]

        print "DEBUG: Target yield per sample = ", target_yield_per_sample
        print "DEBUG: Min reads per sample = ", min_reads_per_sample
        print "DEBUG: No samples: ", no_samples

        for k in sorted(samp_count.keys()):
            comment = ''
            if int(samp_count[k].split('(')[0]) < target_yield_per_sample:
                comment = 'Low. '
                low_yield = True
                low_samples.append(k)
            else:
                ok_samples.append(k)
            if is_rerun:
                comment += '(rerun lane)'
            tab.add_row([l['lane'], k, samp_count[k], min_reads_per_sample, comment])

        if is_multiplexed:
            comment = ''
            try:
                if int(bc_count['unmatched'].split('(')[0]) > target_yield_per_sample:
                    comment = 'High.'
                if is_rerun:
                    comment += '(rerun lane)'
                tab.add_row([l['lane'], 'unmatched', bc_count['unmatched'], min_reads_per_sample, comment])
            except:
                print('WARNING: insufficient or no barcode metrics for lane')
        else:
            comment = ''
            for k in bc_count.keys():
                if int(bc_count[k].split('(')[0]) < bc_multiplier * target_yield_per_lane:
                    comment = 'Low.'
                tab.add_row([l['lane'], "Non-multiplexed lane", bc_count[k], min_reads_per_sample, comment])

    delivery_type = "Final delivery. "
    if low_yield:
        delivery_type = "Partial delivery. "
        fail_comm = "Samples " + ", ".join(low_samples) + " yielded fewer sequences than expected. These will be re-run unless this was already a re-run and the total yield is now sufficient. "
    else:
        fail_comm = ""

    if low_yield:
        if len(ok_samples) > 0:
            ok_comm = "Samples " + ", ".join(ok_samples) + " yielded the expected number of sequences or more. "
        else:
            ok_comm = ""
    else:
        ok_comm = "All samples yielded the expected number of sequences or more. "

    comm = d['summary'] + fail_comm + ok_comm
    d.update(summary=comm)

    d.update(yieldtable=tab.draw())
    return d

Example #5

Show file

def generate_report(proj_conf):

    d = {
        'runname': proj_conf['run'],
        'project_id': proj_conf['id'],
        'samplenames': ' '.join(proj_conf['samples']),
        'latex_opt': "",
        'uppnex': "",
        'mapping': "",
        'dup_rem': "",
        'read_count': "",
        'quantifyer': "",
        'gene_body_cov': "",
        'FPKM_heatmap': "",
        'FPKM_PCAplot': "",
        'Mapping_statistics': "",
        'Read_Distribution': "",
        'rRNA_table': ""
    }

    ## Latex option (no of floats per page)
    floats_per_page = '.. raw:: latex\n\n   \setcounter{totalnumber}{8}'
    d['latex_opt'] = floats_per_page

    ## Metadata fetched from the 'Genomics project list' on Google Docs
    try:
        proj_data = ProjectMetaData(proj_conf['id'], proj_conf['config'])
        uppnex_proj = proj_data.uppnex_id
    except:
        uppnex_proj = "b201YYXX"
        print "No uppnex ID fetched"
        pass
    if not uppnex_proj:
        uppnex_proj = "b201YYXX"
        print "No uppnex ID fetched"
    d['uppnex'] = uppnex_proj

    ## RNA-seq tools fetched from config file post_process.yaml
    try:
        tools = proj_conf['config']['custom_algorithms']['RNA-seq analysis']
        d['mapping'] = os.path.join(tools['aligner'], tools['aligner_version'])
        d['dup_rem'] = os.path.join(tools['dup_remover'],
                                    tools['dup_remover_version'])
        d['read_count'] = os.path.join(tools['counts'],
                                       tools['counts_version'])
        d['quantifyer'] = os.path.join(tools['quantifyer'],
                                       tools['quantifyer_version'])
    except:
        print "Could not fetched RNA-seq tools from config file post_process.yaml"
        d['mapping'] = "X"
        d['dup_rem'] = "X"
        d['read_count'] = "X"
        d['quantifyer'] = "X"
        pass

    ## Mapping Statistics
    tab = Texttable()
    tab.set_cols_dtype(['t', 't', 't', 't'])
    tab.add_row([
        'Sample', 'tot_#_read_pairs', '%_uniquely_mapped_reads',
        '%_uniquely_mapped_reads_left_after_dup_rem'
    ])
    try:
        for sample_name in proj_conf['samples']:
            f = open('tophat_out_' + sample_name + '/stat_' + sample_name, 'r')
            data = f.readlines()
            tab.add_row([
                sample_name, data[1].split()[1], data[2].split()[1],
                data[3].split()[1]
            ])
            f.close()
        d['Mapping_statistics'] = tab.draw()
    except:
        try:
            f = open('stat', 'r')
            data = f.readlines()
            D = dict(
                zip(data[0].split(),
                    zip(data[1].split(), data[2].split(), data[3].split())))
            for sample_name in proj_conf['samples']:
                if D.has_key(sample_name):
                    tab.add_row([
                        sample_name, D[sample_name][0], D[sample_name][1],
                        D[sample_name][2]
                    ])
                else:
                    print 'kould not find ' + sample_name + ' in stat'
            d['Mapping_statistics'] = tab.draw()
            f.close()
        except:
            print "Could not make Mapping Statistics table"
            pass

    ## Read Distribution
    try:
        tab = Texttable()
        json = open('Ever_rd.json', 'a')
        print >> json, '{'
        Groups = [
            "Sample:", "CDS Exons:", "5'UTR Exons:", "3'UTR Exons:",
            "Intronic region:", "TSS up 1kb:", "TES down 1kb:"
        ]

        tab.set_cols_dtype(['t', 't', 't', 't', 't', 't', 't', 't'])
        tab.add_row([
            "Sample", "CDS Exon", "5'UTR Exon", "3'UTR Exon", "Intron",
            "TSS up 1kb", "TES down 1kb", "mRNA frac"
        ])

        for i in range(len(proj_conf['samples'])):
            sample_name = proj_conf['samples'][i]
            print >> json, sample_name + ': {'
            row = [sample_name]
            Reads_counts = []
            try:
                f = open('RSeQC_rd_' + sample_name + '.err', 'r')
            except:
                f = open('Ever_rd_' + sample_name + '.err', 'r')
                pass
            for line in f:
                Group = line.split('\t')[0]
                if Group in Groups:
                    if Group == "TES down 1kb:":
                        print >> json, '"' + Group + '"' + ':' + str(
                            line.split('\t')[3].strip())
                    else:
                        print >> json, '"' + Group + '"' + ':' + str(
                            line.split('\t')[3].strip()) + ','
                    row.append(str(line.split('\t')[3].strip()) + ' ')
                    Reads_counts.append(float(line.split('\t')[2].strip()))
            if os.path.exists('RSeQC_rd_' + sample_name + '.err'):
                t = os.popen("grep 'Total Fragments' 'RSeQC_rd_" +
                             sample_name +
                             ".err'|sed 's/Total Fragments               //g'")
            else:
                try:
                    t = os.popen(
                        "grep 'Total Fragments' 'Ever_rd_" + sample_name +
                        ".err'|sed 's/Total Fragments               //g'")
                except:
                    pass
            tot = float(t.readline())
            frac = (Reads_counts[0] + Reads_counts[1] + Reads_counts[2]) / tot
            row.append(
                str(
                    round(
                        (Reads_counts[0] + Reads_counts[1] + Reads_counts[2]) /
                        tot, 2)))
            tab.add_row(row)
            f.close()
            if i == (len(proj_conf['samples']) - 1):
                print >> json, '}'
            else:
                print >> json, '},'
        print >> json, '}'
        json.close()
        d['Read_Distribution'] = tab.draw()

    except:
        print "Could not make Read Distribution table"
        pass

    ## FPKM_PCAplot, FPKM_heatmap
    if os.path.exists("FPKM_PCAplot.pdf") and os.path.exists(
            "FPKM_heatmap.pdf"):
        d['FPKM_PCAplot'] = m2r.image("FPKM_PCAplot.pdf", width="100%")
        d['FPKM_heatmap'] = m2r.image("FPKM_heatmap.pdf", width="100%")
    else:
        print "could not make FPKM PCAplot and FPKM heatmap"

    ## rRNA_table
    try:
        tab = Texttable()
        tab.set_cols_dtype(['t', 't'])
        tab.add_row(["Sample", "rRNA"])
        f = open('rRNA.quantification', 'r')
        D = {}
        for line in f:
            D[str(line.split('\t')[0].strip())] = str(
                line.split('\t')[1].strip())
        for sample_name in proj_conf['samples']:
            if D.has_key(sample_name):
                tab.add_row([sample_name, D[sample_name]])
        d['rRNA_table'] = tab.draw()
        f.close()
    except:
        print "could not generate rRNA table"
        pass

    return d