Beispiel #1
0
            traceback.print_exc()

        if reference_selected:
            try:
                # Use ionstats alignment results to generate plots
                ionstats_plots.alignment_rate_plot2('ionstats_alignment.json','alignment_rate_plot.png', graph_max_x)
                ionstats_plots.base_error_plot('ionstats_alignment.json','base_error_plot.png', graph_max_x)
                ionstats_plots.old_aq_length_histogram('ionstats_alignment.json','Filtered_Alignments_Q10.png', 'AQ10', 'red')
                ionstats_plots.old_aq_length_histogram('ionstats_alignment.json','Filtered_Alignments_Q17.png', 'AQ17', 'yellow')
                ionstats_plots.old_aq_length_histogram('ionstats_alignment.json','Filtered_Alignments_Q20.png', 'AQ20', 'green')
                ionstats_plots.old_aq_length_histogram('ionstats_alignment.json','Filtered_Alignments_Q47.png', 'AQ47', 'purple')
            except:
                traceback.print_exc()

        try:
            wells_beadogram.generate_wells_beadogram(env['BASECALLER_RESULTS'], env['SIGPROC_RESULTS'])
        except:
            printtime ("ERROR: Wells beadogram generation failed")
            traceback.print_exc()

        set_result_status('TF Processing')

        try:
            # TODO basecaller_results/datasets_tf.json might contain read_count : 0
            if os.path.exists(os.path.join(env['BASECALLER_RESULTS'], 'rawtf.basecaller.bam')):

                # input
                tf_basecaller_bam_filename = os.path.join(env['BASECALLER_RESULTS'], 'rawtf.basecaller.bam')
                tf_reference_filename = os.path.join("/results/referenceLibrary/TestFragment", env['tfKey'], "DefaultTFs.fasta")

                # These files will be created
Beispiel #2
0
                    "ionstats_alignment.json",
                    "Filtered_Alignments_Q20.png",
                    "AQ20",
                    "green",
                )
                ionstats_plots.old_aq_length_histogram(
                    "ionstats_alignment.json",
                    "Filtered_Alignments_Q47.png",
                    "AQ47",
                    "purple",
                )
            except Exception:
                traceback.print_exc()

        try:
            wells_beadogram.generate_wells_beadogram(env["BASECALLER_RESULTS"],
                                                     env["SIGPROC_RESULTS"])
        except Exception:
            printtime("ERROR: Wells beadogram generation failed")
            traceback.print_exc()

        set_result_status("TF Processing")

        try:
            # TODO basecaller_results/datasets_tf.json might contain read_count : 0
            if os.path.exists(
                    os.path.join(env["BASECALLER_RESULTS"],
                                 "rawtf.basecaller.bam")):

                # input
                tf_basecaller_bam_filename = os.path.join(
                    env["BASECALLER_RESULTS"], "rawtf.basecaller.bam")
Beispiel #3
0
def basecalling(
      SIGPROC_RESULTS,
      basecallerArgs,
      libKey,
      tfKey,
      runID,
      floworder,
      reverse_primer_dict,
      BASECALLER_RESULTS,
      barcodeId,
      barcodeSamples,
      barcodesplit_filter,
      DIR_BC_FILES,
      barcodeList_path,
      barcodeMask_path,
      libraryName,
      sample,
      site_name,
      notes,
      start_time,
      chipType,
      expName,
      resultsName,
      pgmName
      ):

    if not os.path.exists(BASECALLER_RESULTS):
        os.mkdir(BASECALLER_RESULTS)


    ''' Step 1: Generate datasets_pipeline.json '''

    # New file, datasets_pipeline.json, contains the list of all active result files.
    # Tasks like post_basecalling, alignment, plugins, must process each specified file and merge results
    # Temporarily generated in BASECALLER_RESULTS directory from barcodeList.txt.
    # Eventually will replace barcodeList.txt altogether.
    
    datasets_pipeline_path = os.path.join(BASECALLER_RESULTS,"datasets_pipeline.json")
    datasets_basecaller_path = os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json")
    
    try:
        generate_datasets_json(
            barcodeId,
            barcodeSamples,
            barcodeList_path,
            datasets_pipeline_path,
            runID,
            libraryName,
            sample,
            site_name,
            notes,
            chipType,
            expName,
            resultsName,
            pgmName
        )
    except:
        printtime('ERROR: Generation of barcode_files.json unsuccessful')
        traceback.print_exc()



    ''' Step 2: Invoke BaseCaller '''

    try:
        [(x,y)] = re.findall('block_X(.*)_Y(.*)',os.getcwd())
        if x.isdigit():
            block_col_offset = int(x)
        else:
            block_col_offset = 0

        if y.isdigit():
            block_row_offset = int(y)
        else:
            block_row_offset = 0
    except:
        block_col_offset = 0
        block_row_offset = 0

    try:
        # 3' adapter details
        adapter = reverse_primer_dict['sequence']
        # TODO: provide barcode_filter via datasets.json

        cmd = basecaller_cmd(basecallerArgs,
                             SIGPROC_RESULTS,
                             libKey,
                             tfKey,
                             runID,
                             BASECALLER_RESULTS,
                             block_col_offset,
                             block_row_offset,
                             datasets_pipeline_path,
                             adapter,
                             barcodesplit_filter)

        printtime("DEBUG: Calling '%s':" % cmd)
        proc = subprocess.Popen(shlex.split(cmd.encode('utf8')), shell=False, stderr=subprocess.PIPE, stdout=subprocess.PIPE)
        stdout_value, stderr_value = proc.communicate()
        ret = proc.returncode
        sys.stdout.write("%s" % stdout_value)
        sys.stderr.write("%s" % stderr_value)

        # Ion Reporter
        try:
            basecaller_log_path = os.path.join(BASECALLER_RESULTS, 'basecaller.log')
            with open(basecaller_log_path, 'a') as f:
                if stdout_value: f.write(stdout_value)
                if stderr_value: f.write(stderr_value)
        except IOError:
            traceback.print_exc()

        if ret != 0:
            printtime('ERROR: BaseCaller failed with exit code: %d' % ret)
            raise
        #ignore rest of operations
        if '--calibration-training' in basecallerArgs:
            printtime('training mode: ignore filtering')
            return
    except:
        printtime('ERROR: BaseCaller failed')
        traceback.print_exc()
        raise



    ''' Step 3: Apply barcode filtering: just move the filtered files to a different directory '''

    # This approach to barcode filtering needs rethinking. On proton, filtering should happen after block merge

    try:
        DIR_BC_FILTERED = os.path.join(BASECALLER_RESULTS,'bc_filtered')
        if not os.path.exists(DIR_BC_FILTERED):
            os.mkdir(DIR_BC_FILTERED)

        f = open(datasets_basecaller_path,'r')
        datasets_basecaller = json.load(f);
        f.close()
        
        for dataset in datasets_basecaller["datasets"]:
            
            keep_dataset = False
            for rg_name in dataset["read_groups"]:
                if not datasets_basecaller["read_groups"][rg_name].get('filtered',False):
                    keep_dataset = True
            if keep_dataset:
                continue
            
            filtered_file = os.path.join(BASECALLER_RESULTS, dataset["basecaller_bam"])
            printtime ("filter_barcodes: removing %s" % filtered_file)
            try:
                move(filtered_file, DIR_BC_FILTERED)
            except:
                traceback.print_exc()

    except:
        printtime ("Barcode filtering failed")
        traceback.print_exc()
    
    
    try:
        wells_beadogram.generate_wells_beadogram(BASECALLER_RESULTS, SIGPROC_RESULTS)
    except:
        printtime ("Wells beadogram generation failed")
        traceback.print_exc()

    
    printtime("Finished basecaller processing")
Beispiel #4
0
                ionstats_plots.alignment_rate_plot2(
                    'ionstats_alignment.json', 'alignment_rate_plot.png', graph_max_x)
                ionstats_plots.base_error_plot('ionstats_alignment.json', 'base_error_plot.png', graph_max_x)
                ionstats_plots.old_aq_length_histogram(
                    'ionstats_alignment.json', 'Filtered_Alignments_Q10.png', 'AQ10', 'red')
                ionstats_plots.old_aq_length_histogram(
                    'ionstats_alignment.json', 'Filtered_Alignments_Q17.png', 'AQ17', 'yellow')
                ionstats_plots.old_aq_length_histogram(
                    'ionstats_alignment.json', 'Filtered_Alignments_Q20.png', 'AQ20', 'green')
                ionstats_plots.old_aq_length_histogram(
                    'ionstats_alignment.json', 'Filtered_Alignments_Q47.png', 'AQ47', 'purple')
            except:
                traceback.print_exc()

        try:
            wells_beadogram.generate_wells_beadogram(env['BASECALLER_RESULTS'], env['SIGPROC_RESULTS'])
        except:
            printtime("ERROR: Wells beadogram generation failed")
            traceback.print_exc()

        set_result_status('TF Processing')

        try:
            # TODO basecaller_results/datasets_tf.json might contain read_count : 0
            if os.path.exists(os.path.join(env['BASECALLER_RESULTS'], 'rawtf.basecaller.bam')):

                # input
                tf_basecaller_bam_filename = os.path.join(env['BASECALLER_RESULTS'], 'rawtf.basecaller.bam')
                tf_reference_filename = os.path.join(
                    "/results/referenceLibrary/TestFragment", env['tfKey'], "DefaultTFs.fasta")
Beispiel #5
0
def merge_basecaller_stats(dirs, BASECALLER_RESULTS, SIGPROC_RESULTS, flows, floworder):

    ########################################################
    # Merge datasets_basecaller.json                       #
    ########################################################
    
    block_datasets_json = []
    combined_datasets_json = {}
    
    for dir in dirs:
        current_datasets_path = os.path.join(dir,BASECALLER_RESULTS,'datasets_basecaller.json')
        try:
            f = open(current_datasets_path,'r')
            block_datasets_json.append(json.load(f))
            f.close()
        except:
            printtime("ERROR: skipped %s" % current_datasets_path)
    
    if (not block_datasets_json) or ('datasets' not in block_datasets_json[0]) or ('read_groups' not in block_datasets_json[0]):
        printtime("merge_basecaller_results: no block contained a valid datasets_basecaller.json, aborting")
        return

    combined_datasets_json = copy.deepcopy(block_datasets_json[0])
    
    for dataset_idx in range(len(combined_datasets_json['datasets'])):
        combined_datasets_json['datasets'][dataset_idx]['read_count'] = 0
        for current_datasets_json in block_datasets_json:
            combined_datasets_json['datasets'][dataset_idx]['read_count'] += current_datasets_json['datasets'][dataset_idx].get("read_count",0)
    
    for read_group in combined_datasets_json['read_groups'].iterkeys():
        combined_datasets_json['read_groups'][read_group]['Q20_bases'] = 0;
        combined_datasets_json['read_groups'][read_group]['total_bases'] = 0;
        combined_datasets_json['read_groups'][read_group]['read_count'] = 0;
        combined_datasets_json['read_groups'][read_group]['filtered'] = True if 'nomatch' not in read_group else False
        for current_datasets_json in block_datasets_json:
            combined_datasets_json['read_groups'][read_group]['Q20_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("Q20_bases",0)
            combined_datasets_json['read_groups'][read_group]['total_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("total_bases",0)
            combined_datasets_json['read_groups'][read_group]['read_count'] += current_datasets_json['read_groups'].get(read_group,{}).get("read_count",0)
            combined_datasets_json['read_groups'][read_group]['filtered'] &= current_datasets_json['read_groups'].get(read_group,{}).get("filtered",True)
    
    try:
        f = open(os.path.join(BASECALLER_RESULTS,'datasets_basecaller.json'),"w")
        json.dump(combined_datasets_json, f, indent=4)
        f.close()
    except:
        printtime("ERROR; Failed to write merged datasets_basecaller.json")
        traceback.print_exc()



    ########################################################
    # Merge ionstats_basecaller.json:                      #
    # First across blocks, then across barcodes            #
    ########################################################

    try:
        composite_filename_list = []
        for dataset in combined_datasets_json["datasets"]:
            composite_filename = os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json')
            barcode_filename_list = [os.path.join(dir,BASECALLER_RESULTS,dataset['file_prefix']+'.ionstats_basecaller.json') for dir in dirs]
            barcode_filename_list = [filename for filename in barcode_filename_list if os.path.exists(filename)]
            ionstats.reduce_stats(barcode_filename_list,composite_filename)
            if os.path.exists(composite_filename):
                composite_filename_list.append(composite_filename)

        ionstats.reduce_stats(composite_filename_list,os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'))
        ionstats.generate_legacy_basecaller_files(
                os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS,''))
    except:
        printtime("ERROR: Failed to merge ionstats_basecaller.json")
        traceback.print_exc()



    ########################################################
    # write composite return code                          #
    ########################################################

    try:
        if len(dirs)==96:
            composite_return_code=96
            for subdir in dirs:

                blockstatus_return_code_file = os.path.join(subdir,"blockstatus.txt")
                if os.path.exists(blockstatus_return_code_file):

                    with open(blockstatus_return_code_file, 'r') as f:
                        text = f.read()
                        if 'Basecaller=0' in text:
                            composite_return_code-=1

            composite_return_code_file = os.path.join(BASECALLER_RESULTS,"composite_return_code.txt")
            if not os.path.exists(composite_return_code_file):
                printtime("DEBUG: create %s" % composite_return_code_file)
                os.umask(0002)
                f = open(composite_return_code_file, 'a')
                f.write(str(composite_return_code))
                f.close()
            else:
                printtime("DEBUG: skip generation of %s" % composite_return_code_file)
    except:
        traceback.print_exc()


    ##################################################
    #generate TF Metrics                             #
    #look for both keys and append same file         #
    ##################################################

    printtime("Merging TFMapper metrics and generating TF plots")
    try:
        TFPipeline.mergeBlocks(BASECALLER_RESULTS,dirs,floworder)
    except:
        printtime("ERROR: Merging TFMapper metrics failed")

    
    ###############################################
    # Merge BaseCaller.json files                 #
    ###############################################
    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS,subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir,'BaseCaller.json')
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson)

        mergeBaseCallerJson.merge(basecallerfiles,BASECALLER_RESULTS)
    except:
        printtime("Merging BaseCaller.json files failed")


    ###############################################
    # Generate composite plots
    ###############################################

    printtime("Build composite basecaller graphs")
    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 400

    # Plot read length sparkline
    for dataset in combined_datasets_json["datasets"]:
        ionstats_plots.read_length_sparkline(
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.sparkline.png'),
                graph_max_x)

    # Plot classic read length histogram
    ionstats_plots.old_read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto.png'),
            graph_max_x)
    
    # Plot new read length histogram
    ionstats_plots.read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto2.png'),
            graph_max_x)

    # Plot quality value histogram
    ionstats_plots.quality_histogram(
        os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
        os.path.join(BASECALLER_RESULTS,'quality_histogram.png'))
    

    try:
        wells_beadogram.generate_wells_beadogram(BASECALLER_RESULTS, SIGPROC_RESULTS)
    except:
        printtime ("ERROR: Wells beadogram generation failed")
        traceback.print_exc()

    printtime("Finished merging basecaller stats")