예제 #1
0
def main(run_name, gdocs_spreadsheet, encoded_credentials_file, run_info_yaml, analysis_dir, archive_dir, gdocs_worksheet, gdocs_projects_folder, append, split_on_project):

    log.info("Processing run: %s" % run_name)
    
    # If not supplied, assume that the configuration file is named run_info.yaml and resides in the archive dir
    if not run_info_yaml:
        run_info_yaml = os.path.join(archive_dir,"run_info.yaml")
        log.info("No configuration file supplied, assuming it is '%s'" % run_info_yaml)
        
    if not os.path.exists(run_info_yaml):
        log.warn("Could not find required run_info.yaml configuration file at '%s'" % run_info_yaml)
        return
    with open(run_info_yaml) as in_handle:
        run_info = {'details': yaml.load(in_handle)}

    # Get the google docs crdentials
    gdocs_credentials = ""
    if not os.path.exists(encoded_credentials_file):
        log.warn("The Google Docs credentials file could not be found. No demultiplex data was written")
        return
    with open(encoded_credentials_file) as fh:
        gdocs_credentials = fh.read().strip()
    

    fc_name, fc_date = get_flowcell_info(run_name)
    
    # Get the barcode statistics
    bc_metrics = get_bc_stats(fc_date,fc_name,analysis_dir,run_info)
    
    # Write the report
    write_run_report_to_gdocs(fc_date,fc_name,bc_metrics,gdocs_spreadsheet,gdocs_credentials,gdocs_worksheet,append,split_on_project)
    
    # Write the bc project summary report
    if gdocs_projects_folder:
        write_project_report_to_gdocs(fc_date,fc_name,bc_metrics,gdocs_credentials,gdocs_projects_folder)
예제 #2
0
def create_bc_report_on_gdocs(fc_date, fc_name, work_dir, run_info, config):
    """Get the barcode read distribution for a run and upload to google docs"""
    
    encoded_credentials = get_credentials(config)
    if not encoded_credentials:
        log.warn("Could not find Google Docs account credentials. No demultiplex report was written")
        return
    
    # Get the required parameters from the post_process.yaml configuration file
    gdocs = config.get("gdocs_upload",None)
    
    # Get the GDocs demultiplex result file title
    gdocs_spreadsheet = gdocs.get("gdocs_dmplx_file",None)
    if not gdocs_spreadsheet:
        log.warn("Could not find Google Docs demultiplex results file title in config. No demultiplex counts were written to Google Docs")
        return
    
    # Get the barcode statistics. Get a deep copy of the run_info since we will modify it
    bc_metrics = get_bc_stats(fc_date,fc_name,work_dir,copy.deepcopy(run_info))
    
    # Upload the data
    write_run_report_to_gdocs(fc_date,fc_name,bc_metrics,gdocs_spreadsheet,encoded_credentials)
    
    # Get the projects parent folder
    projects_folder = gdocs.get("gdocs_projects_folder",None)
    
    # Write the bc project summary report
    if projects_folder:
        write_project_report_to_gdocs(fc_date,fc_name,bc_metrics,encoded_credentials,projects_folder)
예제 #3
0
def main(flowcell_id, archive_dir, analysis_dir):
    print " ".join([flowcell_id, archive_dir, analysis_dir])
    fp = os.path.join(archive_dir, flowcell_id, "run_info.yaml")
    with open(fp) as in_handle:
        run_info = yaml.load(in_handle)
    project_ids = dict()
    for lane in run_info:
        (l, id) = [x.strip() for x in lane['description'].split(",")]
        if project_ids.has_key(id):
            project_ids[id].append(lane)
        else:
            project_ids[id] = [lane]

    sphinx_defs = []
    for k in project_ids.keys():
        lanes = [x['lane'] for x in project_ids[k]]
        log.info("saw project %s in lanes %s" %( k, ", ".join(lanes)))
        sphinx_defs.append("('%s', '%s_delivery.tex', 'Delivery note', u'Scilife', 'manual'),\n"  % (k, k))
        projectfile = "%s.mako" % (k)
        fp = open(projectfile, "w")
        fp.write(TEMPLATE)
        fp.close()
        mylookup = TemplateLookup(directories=['./'])
        tmpl = Template(filename=projectfile, lookup=mylookup)
        proj_conf = {
            'id' : k,
            'lanes' : project_ids[k],
            'archive_dir' : archive_dir, 
            'analysis_dir' : analysis_dir,
            'flowcell' : flowcell_id,
            }
        d = generate_report(proj_conf)
        rstfile = "%s.rst" % (k)
        fp = open(rstfile, "w")
        fp.write(tmpl.render(**d))
        fp.close()

    sphinxconf = os.path.join(os.getcwd(), "conf.py")
    if not os.path.exists(sphinxconf):
        log.warn("no sphinx configuration file conf.py found: you have to edit conf.py yourself!")
    else:
        fp = open(sphinxconf)
        lines = fp.readlines()
        fp.close()
        sdout = []
        modify_conf = False
        for sd in sphinx_defs:
            if not sd in lines:
                sdout.append(sd)
                modify_conf = True
        if modify_conf:
            i = lines.index("latex_documents = [\n")
            newconf = lines[:i+3] + sdout + lines[i+3:]
            fp = open("conf.py", "w")
            fp.write("".join(newconf))
            fp.close()
예제 #4
0
def _handle_data(src, tgt, f=shutil.copyfile):
    if src is None:
        return
    if os.path.exists(tgt):
        log.warn("%s already exists: not doing anything!" %(tgt))
        return
    if options.dry_run:
        print "DRY_RUN: %s file %s to %s" % (f.__name__, src, tgt)
    else:
        log.info("%s file %s to %s" % (f.__name__, src, tgt))
        f(src, tgt)
예제 #5
0
def get_spreadsheet(ssheet_title,encoded_credentials):
    """Connect to Google docs and get a spreadsheet"""
    
    # Convert the spreadsheet title to unicode
    ssheet_title = _to_unicode(ssheet_title)
    
    # Create a client class which will make HTTP requests with Google Docs server.
    client = bcbio.google.spreadsheet.get_client()
    bcbio.google.connection.authenticate(client,encoded_credentials)
    
    # Locate the spreadsheet
    ssheet = bcbio.google.spreadsheet.get_spreadsheet(client,ssheet_title)
    
    # Check that we got a result back
    if not ssheet:
        log.warn("No document with specified title '%s' found in GoogleDocs repository" % ssheet_title)
        return (None,None)
    
    log.info("Found spreadsheet matching the supplied title: '%s'" % (ssheet.title.text))
    
    return (client,ssheet)
예제 #6
0
파일: bc_metrics.py 프로젝트: hussius/bcbb
def create_bc_report_on_gdocs(fc_date, fc_name, work_dir, run_info, config):
    """Get the barcode read distribution for a run and upload to google docs"""

    # Get the required parameters from the post_process.yaml configuration file
    gdocs = config.get("gdocs_upload", None)
    if not gdocs:
        log.info("No GDocs upload section specified in config file, will not upload demultiplex data")
        return

    # Get the GDocs demultiplex result file title
    gdocs_spreadsheet = gdocs.get("gdocs_dmplx_file", None)
    if not gdocs_spreadsheet:
        log.warn(
            "Could not find Google Docs demultiplex results file title in config. No demultiplex counts were written to Google Docs"
        )
        return

    # Get the account credentials
    encoded_credentials = ""
    encoded_credentials_file = gdocs.get("gdocs_credentials", None)
    if not encoded_credentials_file:
        log.warn("Could not find Google Docs account credentials. No demultiplex report was written")
        return
    # Check if the credentials file exists
    if not os.path.exists(encoded_credentials_file):
        log.warn("The Google Docs credentials file could not be found. No demultiplex data was written")
        return
    with open(encoded_credentials_file) as fh:
        encoded_credentials = fh.read().strip()

    # Get the barcode statistics. Get a deep copy of the run_info since we will modify it
    bc_metrics = get_bc_stats(fc_date, fc_name, work_dir, copy.deepcopy(run_info))

    # Upload the data
    write_run_report_to_gdocs(fc_date, fc_name, bc_metrics, gdocs_spreadsheet, encoded_credentials)

    # Get the projects parent folder
    projects_folder = gdocs.get("gdocs_projects_folder", None)

    # Write the bc project summary report
    if projects_folder:
        write_project_report_to_gdocs(fc_date, fc_name, bc_metrics, encoded_credentials, projects_folder)
def generate_report(proj_conf):
    
    #######
    ### Metadata fetched from the 'Genomics project list' on Google Docs
    ###
    proj_data = ProjectMetaData(proj_conf['id'], proj_conf['config'])
    uppnex_proj = proj_data.uppnex_id
    project_id = proj_data.project_id
    queue_date = proj_data.queue_date
    no_samples = proj_data.no_samples
    lanes_plates = proj_data.lanes_plates
    min_reads_per_sample = proj_data.min_reads_per_sample
    customer_reference = proj_data.customer_reference
    application = proj_data.application
    no_finished_samples = proj_data.no_finished_samples
    
    d = { 
        'project_id' : proj_conf['id'],
        'latex_opt' : "",
        'summary' : "",
        'infotable' : "",
        'lanetable' : "",
        'read1table': "",
        'read2table': "",
        'qcplots': "",
        'qc30plots': "",
        'errorrate': "",
        'yieldtable': "",
        }

    ## Latex option (no of floats per page)
    floats_per_page = '.. raw:: latex\n\n   \setcounter{totalnumber}{8}'
    d.update(latex_opt = floats_per_page)

    ## General info table
    tab = Texttable()
    if not uppnex_proj or len(uppnex_proj) < 4 or uppnex_proj[0:4] != 'b201':
        uppnex_proj = "b201YXXX"
    
    run_name_comp = proj_conf['flowcell'].split('_')
    simple_run_name = run_name_comp[0] + run_name_comp[3][0]
    instr_id = run_name_comp[1]
    fc_name, fc_date = get_flowcell_info(proj_conf['flowcell'])
    tab.add_row(["Run name:", proj_conf['flowcell']])
    tab.add_rows([["Project id:", proj_conf['id']], 
                  ["Date:", fc_date],
                  ["Instrument ID:", instr_id],
                  ["Flow cell ID:", fc_name],
                  ["Uppnex project:", uppnex_proj],
                  ["Delivery directory:", "/bubo/proj/" + uppnex_proj + "/INBOX/20" + simple_run_name + "_hiseq2000"]])
    d.update(infotable=tab.draw())
    
    ## Lane table
    tab = Texttable()
    tab.add_row(["Lane", "Sample(s)"])
    for l in proj_conf['lanes']:
        main_proj = l['description'].split(',')[1].strip()
        if main_proj == proj_conf['id']: is_main_proj = True
        else: is_main_proj = False
        samples = []
        if l.has_key('multiplex'):
            for mp in l['multiplex']:
                if mp.has_key('description'):
                    if mp['description'] == proj_conf['id']:
                        samples.append(mp['name'])
                elif is_main_proj:
                    samples.append(mp['name'])
            tab.add_row([l['lane'], ", ".join(samples)])
        else:
            tab.add_row([l['lane'], "Non-multiplexed lane"])
    d.update(lanetable=tab.draw())
    
    tab_r1 = Texttable()
    tab_r2 = Texttable()
    tab_r1.set_cols_width([2,12,12,12,12,12,12,30])
    tab_r2.set_cols_width([2,12,12,12,12,12,12,30])
    tab_r1.add_row(["Lane", "Clu. dens. #/mm2","% PF clusters","Clu. PF #/mm2", "% phas/prephas", "% aln PhiX", "% error rate", "Comment"])
    tab_r2.add_row(["Lane", "Clu. dens. #/mm2","% PF clusters","Clu. PF #/mm2", "% phas/prephas", "% aln PhiX", "% error rate", "Comment"])

    # These should be moved to a cfg file. ( + perhaps provide an alternative for v1.5 FC )
    if (options.v1_5_fc): min_clupf = 300 
    else: min_clupf = 475
    max_phas = 0.4
    max_prephas = 1.0 # 0.5
    max_mean_err = 2

    statspath = os.path.join(proj_conf['archive_dir'], proj_conf['flowcell'], "Data", "reports", "Summary")
    stats = summ.getQCstats(statspath)

    # Check quality criteria and add comments
    comm_r1 = ''
    comm_r2 = ''
    ok_r1 = True
    ok_r2 = True
    ok_cludens_r1 = True
    ok_cludens_r2 = True
    ok_phasing_r1 = True
    ok_phasing_r2 = True
    ok_prephasing_r1 = True
    ok_prephasing_r2 = True
    ok_err_rate = True 
    ok_err_r1 = True
    ok_err_r2 = True

    for l in proj_conf['lanes']:

        # Cluster densities
        clu_dens_r1 =  stats['raw_cluster_dens']['read1'][l['lane']]
        clu_dens_r2 =  stats['raw_cluster_dens']['read2'][l['lane']]
        clu_dens_sd_r1 =  stats['raw_cluster_dens_sd']['read1'][l['lane']]
        clu_dens_sd_r2 =  stats['raw_cluster_dens_sd']['read2'][l['lane']]
        clu_dens_string_r1 = str(clu_dens_r1) + '+/-' + str(clu_dens_sd_r1) 
        clu_dens_string_r2 = str(clu_dens_r2) + '+/-' + str(clu_dens_sd_r2) 

        # Cluster PF densities
        clu_dens_pf_r1 =  stats['pf_cluster_dens']['read1'][l['lane']]
        clu_dens_pf_r2 =  stats['pf_cluster_dens']['read2'][l['lane']]
        clu_dens_pf_sd_r1 =  stats['pf_cluster_dens_sd']['read1'][l['lane']]
        clu_dens_pf_sd_r2 =  stats['pf_cluster_dens_sd']['read2'][l['lane']]
        clu_dens_pf_string_r1 = str(clu_dens_pf_r1) + '+/-' + str(clu_dens_pf_sd_r1)
        clu_dens_pf_string_r2 = str(clu_dens_pf_r2) + '+/-' + str(clu_dens_pf_sd_r2)

        # % PF clusters
        prc_pf_r1 =  stats['prc_pf']['read1'][l['lane']]
        prc_pf_r2 =  stats['prc_pf']['read2'][l['lane']]
        prc_pf_sd_r1 =  stats['prc_pf_sd']['read1'][l['lane']]
        prc_pf_sd_r2 =  stats['prc_pf_sd']['read2'][l['lane']]
        prc_pf_string_r1 = str(prc_pf_r1) + '+/-' + str(prc_pf_sd_r1)
        prc_pf_string_r2 = str(prc_pf_r2) + '+/-' + str(prc_pf_sd_r2)

        # % phasing and prephasing
        phas_r1 = stats['phasing']['read1'][l['lane']]
        phas_r2 = stats['phasing']['read2'][l['lane']]
        prephas_r1 = stats['prephasing']['read1'][l['lane']]
        prephas_r2 = stats['prephasing']['read2'][l['lane']]
        phas_string_r1 = str(phas_r1) + '/' + str(prephas_r1)
        phas_string_r2 = str(phas_r2) + '/' + str(prephas_r2)

        # % aligned
        aln_r1 = stats['prc_aligned']['read1'][l['lane']]
        aln_r2 = stats['prc_aligned']['read2'][l['lane']]
        aln_sd_r1 = stats['prc_aligned_sd']['read1'][l['lane']]
        aln_sd_r2 = stats['prc_aligned_sd']['read2'][l['lane']]
        aln_string_r1 = str(aln_r1) + '+/-' + str(aln_sd_r1)
        aln_string_r2 = str(aln_r2) + '+/-' + str(aln_sd_r2)

        # error rate
        err_r1 = stats['error_rate']['read1'][l['lane']]
        err_r2 = stats['error_rate']['read2'][l['lane']]
        err_sd_r1 = stats['error_rate_sd']['read1'][l['lane']]
        err_sd_r2 = stats['error_rate_sd']['read2'][l['lane']]
        err_str_r1 = str(err_r1) + '+/-' + str(err_sd_r1)
        err_str_r2 = str(err_r2) + '+/-' + str(err_sd_r2)
        
        comm_r1 = ""
        comm_r2 = ""

        # check criteria
        if float(clu_dens_pf_r1[:-1]) < min_clupf: 
            ok_r1 = False
            ok_cludens_r1 = False
            comm_r1 += "Low cluster density. "
        if float(clu_dens_pf_r2[:-1]) < min_clupf: 
            ok_r2 = False
            ok_cludens_r2 = False
            comm_r2 += "Low cluster density. "
        if float(phas_r1) > max_phas: 
            ok_r1 = False
            ok_phasing_r1 = False
            comm_r1 += "High phasing. "
        if float(phas_r2) > max_phas: 
            ok_r2 = False
            ok_phasing_r2 = False
            comm_r2 += "High phasing. "
        if float(prephas_r1) > max_prephas: 
            ok_r1 = False
            ok_prephasing_r1 = False
            comm_r1 += "High prephasing. "
        if float(prephas_r2) > max_prephas: 
            ok_r2 = False
            ok_prephasing_r2 = False
            comm_r2 += "High prephasing. "
        avg_error_rate = (float(err_r1) + float(err_r2))/2
        if avg_error_rate > max_mean_err:
            ok_err_rate = False
        if float(err_r1) > max_mean_err:
            #ok_r1 = False
            comm_r1 += "High error rate. "
            ok_err_r1 = False
        if float(err_r2) > max_mean_err:
            #ok_r2 = False
            comm_r2 += "High error rate. "
            ok_err_r2 = False

        if comm_r1 == "": comm_r1 = "OK"        
        if comm_r2 == "": comm_r2 = "OK"

        tab_r1.add_row([l['lane'], clu_dens_string_r1, prc_pf_string_r1, clu_dens_pf_string_r1, phas_string_r1, aln_string_r1, err_str_r1, comm_r1])
        tab_r2.add_row([l['lane'], clu_dens_string_r2, prc_pf_string_r2, clu_dens_pf_string_r2, phas_string_r2, aln_string_r2, err_str_r2, comm_r2])

    # Reinitialize comments for the summary. (Which will be for several lanes, potentially)
    comm_r1 = ""
    comm_r2 = ""
 
    if not ok_cludens_r1: comm_r1 += "Low cluster density. " 
    if not ok_cludens_r2: comm_r2 += "Low cluster density. " 
    if not ok_phasing_r1: comm_r1 += "High phasing. " 
    if not ok_phasing_r2: comm_r2 += "High phasing. " 
    if not ok_prephasing_r1: comm_r1 += "High prephasing. " 
    if not ok_prephasing_r2: comm_r2 += "High prephasing. " 
    if not ok_err_rate:
        if not ok_err_r1: 
            ok_r1 = False
            comm_r1 += "High error rate. "
        if not ok_err_r2: 
            ok_r2 = False
            comm_r2 += "High error rate. "

    if (ok_r1 and ok_r2): 
        comm_r1 = comm_r2 = "OK"
        d.update(summary = "Successful run according to QC criteria. ")
    else:  
        if (ok_r1): 
            comm_r1 = "OK"
            d.update (summary = "Read 2 did not pass quality criteria: " + comm_r2)
        elif (ok_r2):
            comm_r2 = "OK"
            d.update (summary = "Read 1 did not pass quality criteria: " + comm_r1)
        else:
            d.update (summary = "Did not pass quality criteria. Read 1: " + comm_r1 + " Read 2: " + comm_r2)


    d.update(read1table=tab_r1.draw())
    d.update(read2table=tab_r2.draw())
        
    ## qcplots
    byCycleDir = os.path.join(proj_conf['archive_dir'], proj_conf['flowcell'], "Data", "reports", "ByCycle")
    res = []
    for l in proj_conf['lanes']:
        res.append(m2r.image(os.path.relpath(os.path.join(byCycleDir, "QScore_L%s.png" % (l['lane']))), width="100%"))
    d.update(qcplots= "\n".join(res))

    ## qc30plots
    res = []
    for l in proj_conf['lanes']:
        res.append(m2r.image(os.path.relpath(os.path.join(byCycleDir, "NumGT30_L%s.png" % (l['lane']))), width="100%"))
    d.update(qc30plots= "\n".join(res))

    ## qcplots
    res = []
    for l in proj_conf['lanes']:
        res.append(m2r.image(os.path.relpath(os.path.join(byCycleDir, "ErrRate_L%s.png" % (l['lane']))), width="100%"))
    d.update(errorrate= "\n".join(res))

    ## Sequence yield table
    target_yield_per_lane = 143000000.0
    if (options.v1_5_fc):  target_yield_per_lane = 60000000.0
    tab = Texttable()
    tab.add_row(['Lane','Sample','Number of sequences','Comment'])
    
    run_info_yaml = os.path.join(proj_conf['archive_dir'],proj_conf['flowcell'],"run_info.yaml")

    if not os.path.exists(run_info_yaml):
        log.warn("Could not find required run_info.yaml configuration file at '%s'" % run_info_yaml)
        return

    #with open(run_info_yaml) as in_handle:
    #    run_info = {'details': yaml.load(in_handle)}

    with open(run_info_yaml) as in_handle:
        run_info = yaml.load(in_handle)

    # fc_name, fc_date = get_flowcell_info(proj_conf['flowcell'])
    # bc_yield = bc_metrics.get_bc_stats(fc_date,fc_name,proj_conf['analysis_dir'], run_info)
   
    fc_name, fc_date = get_flowcell_info(proj_conf['flowcell'])
    low_yield = False
    
    bc_multiplier = 0.75 # Should move to cfg file

    ok_samples = []
    low_samples = []

    for l in proj_conf['lanes']:
        bc_file_name = os.path.join(proj_conf['analysis_dir'], proj_conf['flowcell'], '_'.join([l['lane'], fc_date, fc_name, "barcode"]), '_'.join([l['lane'], fc_date, fc_name, "bc.metrics"]))
        try:
            bc_file = open(bc_file_name)
        except:
            sys.exit("Could not find bc metrics file " + bc_file_name)
        bc_count = {}
        for line in bc_file:
            c = line.strip().split()
            bc_count[c[0]]=c[1] + ' (~' + str (int ( round (float(c[1])/1000000) ) ) + " million)"
        no_samples = len(bc_count)
        if no_samples == 0:
            log.warn("Did not find a BC metrics file... Skipping lane %s for %s" %(l['lane'], proj_conf['id']))
            continue
        target_yield_per_sample = bc_multiplier * target_yield_per_lane / no_samples
        sample_name = {}
        is_multiplexed = True
        is_rerun = False
        # Check here for each sample if it belongs to the project
        for entry in run_info:
            if entry['lane'] == l['lane']:
                is_main_proj = False       
                if entry['description'].split(',')[1].strip() == proj_conf['id']:
                    is_main_proj = True
                if entry.has_key('multiplex'):
                    for sample in entry['multiplex']:
                        if sample.has_key('description'):
                            if is_main_proj: 
                                log.info('Rerun lane: skipping sample ' + sample['name'] + ' in lane ' + l['lane'] + ' which does not belong to the current project')
                                is_rerun=True
                            else:
                                if sample['description'].strip() == proj_conf['id']:
                                    sample_name[sample['barcode_id']]=sample['name']
                                is_rerun = True
                        elif is_main_proj: 
                            sample_name[sample['barcode_id']]=sample['name']
                else: is_multiplexed = False
        samp_count = {}

        for k in bc_count.keys():
            if not k.isdigit(): pass
            else: 
                if sample_name.has_key(int(k)): samp_count[sample_name[int(k)]] =  bc_count[k]

        for k in sorted(samp_count.keys()):
            comment = ''
            if int(samp_count[k].split('(')[0]) < target_yield_per_sample: 
                comment = 'Low. '
                low_yield = True
                low_samples.append(k)
            else: ok_samples.append(k)
            if is_rerun: comment += '(rerun lane)'
            tab.add_row([l['lane'], k, samp_count[k], comment])
        
        if is_multiplexed:
            comment = ''
            try:
                if int (bc_count['unmatched'].split('(')[0]) > target_yield_per_sample: comment = 'High.'
                if is_rerun: comment += '(rerun lane)'
                tab.add_row([l['lane'], 'unmatched', bc_count['unmatched'], comment])
            except:
                log.warning('Unsufficient or no barcode metrics for lane')
        else:
            comment = ''
            for k in bc_count.keys():
                if int (bc_count[k].split('(')[0]) < bc_multiplier * target_yield_per_lane: comment = 'Low.' 
                tab.add_row([l['lane'], "Non-multiplexed lane", bc_count[k], comment])

    # if low_yield:
    #    comm = d['summary'] +  " Some samples had low yields."
    #    d.update(summary = comm)
    delivery_type = "Final delivery. "
    if low_yield:
        delivery_type = "Partial delivery. "
        fail_comm = "Samples " + ", ".join(low_samples) + " yielded fewer sequences than expected. These will be re-run unless this was already a re-run and the total yield is now sufficient. "
    else: fail_comm = ""

    if low_yield: 
        if len(ok_samples)>0: ok_comm = "Samples " + ", ".join(ok_samples) + " yielded the expected number of sequences or more. "
        else: ok_comm = ""
    else: ok_comm = "All samples yielded the expected number of sequences or more. "

    #comm = delivery_type + d['summary'] + fail_comm + ok_comm
    comm = d['summary'] + fail_comm + ok_comm
    d.update(summary = comm)

    d.update(yieldtable=tab.draw())
    return d
def main(flowcell_id, archive_dir, analysis_dir, config_file):
    print " ".join([flowcell_id, archive_dir, analysis_dir])
    fp = os.path.join(archive_dir, flowcell_id, "run_info.yaml")
    with open(fp) as in_handle:
        run_info = yaml.load(in_handle)
    if config_file:
        with open(config_file) as in_handle:
            config = yaml.load(in_handle)
    else:
        config = {}
    project_ids = dict()
    for lane in run_info:
        (l, id) = [x.strip() for x in lane['description'].split(",")]
        if project_ids.has_key(id):
            if not lane in project_ids[id]: project_ids[id].append(lane)
        else:
            project_ids[id] = [lane]
        # Check here if project is a "sub project" of the lane
        if not lane.has_key('multiplex'): continue
        for s in lane['multiplex']:
            if s.has_key('description'):
                if project_ids.has_key(s['description']):
                    if lane not in project_ids[s['description']]: project_ids[s['description']].append(lane)
                else:
                    project_ids[s['description']] = [lane]
                                                                                             
    sphinx_defs = []
    for k in project_ids.keys():
        lanes = [x['lane'] for x in project_ids[k]]
        proj_file_tag = k + "_" + get_flowcell_info(flowcell_id)[1] + get_flowcell_info(flowcell_id)[0][0]
        log.info("saw project %s in lanes %s" %( k, ", ".join(lanes)))
        sphinx_defs.append("('%s', '%s_delivery.tex', 'Raw data delivery note', u'SciLifeLab Stockholm', 'howto'),\n"  % (proj_file_tag, proj_file_tag))
        projectfile = "%s.mako" % (proj_file_tag) 
        fp = open(projectfile, "w")
        fp.write(TEMPLATE)
        fp.close()
        mylookup = TemplateLookup(directories=['./'])
        tmpl = Template(filename=projectfile, lookup=mylookup)
        proj_conf = {
            'id' : k,
            'lanes' : project_ids[k],
            'archive_dir' : archive_dir, 
            'analysis_dir' : analysis_dir,
            'flowcell' : flowcell_id,
            'config' : config,
            }
        d = generate_report(proj_conf)
        rstfile = "%s.rst" % (proj_file_tag)
        fp = open(rstfile, "w")
        fp.write(tmpl.render(**d))
        fp.close()

    sphinxconf = os.path.join(os.getcwd(), "conf.py")
    if not os.path.exists(sphinxconf):
        log.warn("no sphinx configuration file conf.py found: you have to edit conf.py yourself!")
    else:
        fp = open(sphinxconf)
        lines = fp.readlines()
        fp.close()
        sdout = []
        modify_conf = False
        for sd in sphinx_defs:
            if not sd in lines:
                sdout.append(sd)
                modify_conf = True
        if modify_conf:
            i = lines.index("latex_documents = [\n")
            newconf = lines[:i+3] + sdout + lines[i+3:]
            fp = open("conf.py", "w")
            fp.write("".join(newconf))
            fp.close()
예제 #9
0
def _make_dir(dir, label):
    if not os.path.exists(dir):
        os.makedirs(dir)
        log.info("Creating %s directory %s" % (label, dir))
    else:
        log.warn("%s already exists: not creating new directory" % (dir))