Esempio n. 1
0
def merge_basecaller_stats(dirs, BASECALLER_RESULTS, SIGPROC_RESULTS, flows, floworder):

    ########################################################
    # Merge datasets_basecaller.json                       #
    ########################################################
    
    block_datasets_json = []
    combined_datasets_json = {}
    
    for dir in dirs:
        current_datasets_path = os.path.join(dir,BASECALLER_RESULTS,'datasets_basecaller.json')
        try:
            f = open(current_datasets_path,'r')
            block_datasets_json.append(json.load(f))
            f.close()
        except:
            printtime("ERROR: skipped %s" % current_datasets_path)
    
    if (not block_datasets_json) or ('datasets' not in block_datasets_json[0]) or ('read_groups' not in block_datasets_json[0]):
        printtime("merge_basecaller_results: no block contained a valid datasets_basecaller.json, aborting")
        return

    combined_datasets_json = copy.deepcopy(block_datasets_json[0])
    
    for dataset_idx in range(len(combined_datasets_json['datasets'])):
        combined_datasets_json['datasets'][dataset_idx]['read_count'] = 0
        for current_datasets_json in block_datasets_json:
            combined_datasets_json['datasets'][dataset_idx]['read_count'] += current_datasets_json['datasets'][dataset_idx].get("read_count",0)
    
    for read_group in combined_datasets_json['read_groups'].iterkeys():
        combined_datasets_json['read_groups'][read_group]['Q20_bases'] = 0;
        combined_datasets_json['read_groups'][read_group]['total_bases'] = 0;
        combined_datasets_json['read_groups'][read_group]['read_count'] = 0;
        combined_datasets_json['read_groups'][read_group]['filtered'] = True if 'nomatch' not in read_group else False
        for current_datasets_json in block_datasets_json:
            combined_datasets_json['read_groups'][read_group]['Q20_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("Q20_bases",0)
            combined_datasets_json['read_groups'][read_group]['total_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("total_bases",0)
            combined_datasets_json['read_groups'][read_group]['read_count'] += current_datasets_json['read_groups'].get(read_group,{}).get("read_count",0)
            combined_datasets_json['read_groups'][read_group]['filtered'] &= current_datasets_json['read_groups'].get(read_group,{}).get("filtered",True)
    
    try:
        f = open(os.path.join(BASECALLER_RESULTS,'datasets_basecaller.json'),"w")
        json.dump(combined_datasets_json, f, indent=4)
        f.close()
    except:
        printtime("ERROR; Failed to write merged datasets_basecaller.json")
        traceback.print_exc()



    ########################################################
    # Merge ionstats_basecaller.json:                      #
    # First across blocks, then across barcodes            #
    ########################################################

    try:
        composite_filename_list = []
        for dataset in combined_datasets_json["datasets"]:
            composite_filename = os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json')
            barcode_filename_list = [os.path.join(dir,BASECALLER_RESULTS,dataset['file_prefix']+'.ionstats_basecaller.json') for dir in dirs]
            barcode_filename_list = [filename for filename in barcode_filename_list if os.path.exists(filename)]
            ionstats.reduce_stats(barcode_filename_list,composite_filename)
            if os.path.exists(composite_filename):
                composite_filename_list.append(composite_filename)

        ionstats.reduce_stats(composite_filename_list,os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'))
        ionstats.generate_legacy_basecaller_files(
                os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS,''))
    except:
        printtime("ERROR: Failed to merge ionstats_basecaller.json")
        traceback.print_exc()



    ########################################################
    # write composite return code                          #
    ########################################################

    try:
        if len(dirs)==96:
            composite_return_code=96
            for subdir in dirs:

                blockstatus_return_code_file = os.path.join(subdir,"blockstatus.txt")
                if os.path.exists(blockstatus_return_code_file):

                    with open(blockstatus_return_code_file, 'r') as f:
                        text = f.read()
                        if 'Basecaller=0' in text:
                            composite_return_code-=1

            composite_return_code_file = os.path.join(BASECALLER_RESULTS,"composite_return_code.txt")
            if not os.path.exists(composite_return_code_file):
                printtime("DEBUG: create %s" % composite_return_code_file)
                os.umask(0002)
                f = open(composite_return_code_file, 'a')
                f.write(str(composite_return_code))
                f.close()
            else:
                printtime("DEBUG: skip generation of %s" % composite_return_code_file)
    except:
        traceback.print_exc()


    ##################################################
    #generate TF Metrics                             #
    #look for both keys and append same file         #
    ##################################################

    printtime("Merging TFMapper metrics and generating TF plots")
    try:
        TFPipeline.mergeBlocks(BASECALLER_RESULTS,dirs,floworder)
    except:
        printtime("ERROR: Merging TFMapper metrics failed")

    
    ###############################################
    # Merge BaseCaller.json files                 #
    ###############################################
    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS,subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir,'BaseCaller.json')
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson)

        mergeBaseCallerJson.merge(basecallerfiles,BASECALLER_RESULTS)
    except:
        printtime("Merging BaseCaller.json files failed")


    ###############################################
    # Generate composite plots
    ###############################################

    printtime("Build composite basecaller graphs")
    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 400

    # Plot read length sparkline
    for dataset in combined_datasets_json["datasets"]:
        ionstats_plots.read_length_sparkline(
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.sparkline.png'),
                graph_max_x)

    # Plot classic read length histogram
    ionstats_plots.old_read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto.png'),
            graph_max_x)
    
    # Plot new read length histogram
    ionstats_plots.read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto2.png'),
            graph_max_x)

    # Plot quality value histogram
    ionstats_plots.quality_histogram(
        os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
        os.path.join(BASECALLER_RESULTS,'quality_histogram.png'))
    

    try:
        wells_beadogram.generate_wells_beadogram(BASECALLER_RESULTS, SIGPROC_RESULTS)
    except:
        printtime ("ERROR: Wells beadogram generation failed")
        traceback.print_exc()

    printtime("Finished merging basecaller stats")
Esempio n. 2
0
def post_basecalling(BASECALLER_RESULTS,expName,resultsName,flows):

    datasets_basecaller_path = os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json")

    if not os.path.exists(datasets_basecaller_path):
        printtime("ERROR: %s does not exist" % datasets_basecaller_path)
        raise Exception("ERROR: %s does not exist" % datasets_basecaller_path)
    
    datasets_basecaller = {}
    try:
        f = open(datasets_basecaller_path,'r')
        datasets_basecaller = json.load(f);
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % datasets_basecaller_path)
        raise Exception("ERROR: problem parsing %s" % datasets_basecaller_path)

    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 400

    quality_file_list = []
    for dataset in datasets_basecaller["datasets"]:
        if not os.path.exists(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])):
            continue
                
        # Call ionstats utility to generate alignment-independent metrics for current unmapped BAM
        ionstats.generate_ionstats_basecaller(
                os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam']),
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'),
                graph_max_x)
        
        # Plot read length sparkline
        ionstats_plots.read_length_sparkline(
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.sparkline.png'),
                graph_max_x)
        
        quality_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'))
        
    # Merge ionstats_basecaller files from individual barcodes/dataset
    ionstats.reduce_stats(quality_file_list,os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'))

    # Generate legacy stats file: quality.summary
    ionstats.generate_legacy_basecaller_files(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,''))

    # Plot classic read length histogram
    ionstats_plots.old_read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto.png'),
            graph_max_x)
    
    # Plot new read length histogram
    ionstats_plots.read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto2.png'),
            graph_max_x)

    # Plot quality value histogram
    ionstats_plots.quality_histogram(
        os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
        os.path.join(BASECALLER_RESULTS,'quality_histogram.png'))

    printtime("Finished basecaller post processing")