Example #1
0
                # Default options to produce smaller basecaller results
                prebasecallerArgs = env['prebasecallerArgs']

                if env['doBaseRecal'] == "panel_recal":
                    prebasecallerArgs = prebasecallerArgs + " --calibration-training=0"
                    prebasecallerArgs = prebasecallerArgs + " --calibration-panel /opt/ion/config/datasets_calibration.json"
                else:
                    if not "--calibration-training=" in prebasecallerArgs:
                        prebasecallerArgs = prebasecallerArgs + " --calibration-training=100000"
                    if not "--flow-signals-type" in prebasecallerArgs:
                        prebasecallerArgs = prebasecallerArgs + " --flow-signals-type scaled-residual"

                basecaller.basecalling(
                    env['SIGPROC_RESULTS'], prebasecallerArgs,
                    env['libraryKey'], env['tfKey'], env['runID'],
                    env['reverse_primer_dict'],
                    os.path.join(env['BASECALLER_RESULTS'], 'recalibration'),
                    env['barcodeId'], env['barcodeInfo'], env['library'],
                    env['notes'], env['site_name'], env['platform'],
                    env['instrumentName'], env['chipType'])

                basecaller_recalibration_datasets = blockprocessing.get_datasets_basecaller(
                    os.path.join(env['BASECALLER_RESULTS'], 'recalibration'))
                if env['doBaseRecal'] == "panel_recal":
                    basecaller_recalibration_datasets = basecaller_recalibration_datasets[
                        'IonControl']

                # file containing dimension info (offsets, rows, cols) and flow info for stratification
                try:
                    c = open(
                        os.path.join(env['BASECALLER_RESULTS'],
                                     "recalibration", 'BaseCaller.json'), 'r')
Example #2
0
        set_result_status('Base Calling')
        try:
            basecaller.basecalling(
                env['SIGPROC_RESULTS'],
                env['basecallerArgs'] + additional_basecallerArgs,
                env['libraryKey'],
                env['tfKey'],
                env['runID'],
                env['flowOrder'],
                env['reverse_primer_dict'],
                env['BASECALLER_RESULTS'],
                env['barcodeId'],
                env['barcodeSamples'],
                env['barcodesplit_filter'],
                env['DIR_BC_FILES'],
                os.path.join("barcodeList.txt"),
                os.path.join(env['BASECALLER_RESULTS'], "barcodeMask.bin"),
                env['libraryName'],
                env['sample'],
                env['site_name'],
                env['notes'],
                env['start_time'],
                env['chipType'],
                env['expName'],
                env['resultsName'],
                env['pgmName']
            )
            add_status("Basecaller", 0)
        except:
            traceback.print_exc()
Example #3
0
                    prebasecallerArgs = prebasecallerArgs + " --calibration-training=0"
                    prebasecallerArgs = prebasecallerArgs + " --calibration-panel /opt/ion/config/datasets_calibration.json"
                else:
                    if not "--calibration-training=" in prebasecallerArgs:
                        prebasecallerArgs = prebasecallerArgs + " --calibration-training=100000"
                    if not "--flow-signals-type" in prebasecallerArgs:
                        prebasecallerArgs = prebasecallerArgs + " --flow-signals-type scaled-residual"

                basecaller.basecalling(
                    env['SIGPROC_RESULTS'],
                    prebasecallerArgs,
                    env['libraryKey'],
                    env['tfKey'],
                    env['runID'],
                    env['reverse_primer_dict'],
                    os.path.join(env['BASECALLER_RESULTS'], 'recalibration'),
                    env['barcodeId'],
                    env['barcodeInfo'],
                    env['library'],
                    env['notes'],
                    env['site_name'],
                    env['platform'],
                    env['instrumentName'],
                    env['chipType'])

                basecaller_recalibration_datasets = blockprocessing.get_datasets_basecaller(os.path.join(env['BASECALLER_RESULTS'],'recalibration'))
                if env['doBaseRecal'] == "panel_recal":
                    basecaller_recalibration_datasets = basecaller_recalibration_datasets['IonControl']

                # file containing dimension info (offsets, rows, cols) and flow info for stratification
                try:
                    c = open(os.path.join(env['BASECALLER_RESULTS'], "recalibration", 'BaseCaller.json'),'r')
Example #4
0
        else:
            printtime(
                "DEBUG: Flow Space Recalibration is disabled, Reference: '%s'"
                % env['libraryName'])
            updated_basecallerArgs = env['basecallerArgs']

        set_result_status('Base Calling')
        try:
            basecaller.basecalling(
                env['SIGPROC_RESULTS'],
                env['basecallerArgs'] + additional_basecallerArgs,
                env['libraryKey'], env['tfKey'], env['runID'],
                env['flowOrder'], env['reverse_primer_dict'],
                env['BASECALLER_RESULTS'],
                env['barcodeId'], env['barcodeSamples'],
                env.get('barcodesplit_filter',
                        0), env.get('barcodesplit_filter_minreads',
                                    0), env['DIR_BC_FILES'],
                os.path.join("barcodeList.txt"),
                os.path.join(env['BASECALLER_RESULTS'], "barcodeMask.bin"),
                env['libraryName'], env['sample'], env['site_name'],
                env['notes'], env['start_time'], env['chipType'],
                env['expName'], env['resultsName'], env['pgmName'])
            add_status("Basecaller", 0)
        except:
            traceback.print_exc()
            add_status("Basecaller", 1)

        set_result_status('Post Basecalling')
        try:
            basecaller.post_basecalling(env['BASECALLER_RESULTS'],
Example #5
0
def base_recalib(SIGPROC_RESULTS, basecallerArgs, libKey, tfKey, runID,
                 floworder, reverse_primer_dict, BASECALLER_RESULTS, barcodeId,
                 barcodeSamples, barcodesplit_filter,
                 barcodesplit_filter_minreads, DIR_BC_FILES, barcodeList_path,
                 barcodeMask_path, libraryName, sample, site_name, notes,
                 start_time, chipType, expName, resultsName, pgmName,
                 tmap_version, dataset_name, chipflow_name):
    '''Do flow space recalibration for all basecall files in a report heirarchy'''
    #    frame = inspect.currentframe()
    #    args, _, _, values = inspect.getargvalues(frame)
    #    print 'function name "%s"' % inspect.getframeinfo(frame)[2]
    #    for i in args:
    #        print "    %s = %s" % (i, values[i])
    #    #overwrite reverse_primer_dict
    #    if not reverse_primer_dict:
    #        reverse_primer_dict = {'adapter_cutoff':16,'sequence':'ATCACCGACTGCCCATAGAGAGGCTGAGAC','qual_window':30,'qual_cutoff':9}
    #

    try:
        # Produce smaller basecaller results
        #                      basecallerArgs + " --calibration-training=2000000 --flow-signals-type scaled-residual",
        if not "--calibration-training=" in basecallerArgs:
            basecallerArgs = basecallerArgs + " --calibration-training=2000000"
        if not "--flow-signals-type" in basecallerArgs:
            basecallerArgs = basecallerArgs + " --flow-signals-type scaled-residual"
        basecaller.basecalling(
            SIGPROC_RESULTS, basecallerArgs, libKey, tfKey, runID, floworder,
            reverse_primer_dict,
            os.path.join(BASECALLER_RESULTS,
                         "recalibration"), barcodeId, barcodeSamples,
            barcodesplit_filter, barcodesplit_filter_minreads, DIR_BC_FILES,
            barcodeList_path, barcodeMask_path, libraryName, sample, site_name,
            notes, start_time, chipType, expName, resultsName, pgmName)

        # load datasets_basecaller.json
        try:
            f = open(
                os.path.join(BASECALLER_RESULTS, "recalibration",
                             dataset_name), 'r')
            datasets_basecaller = json.load(f)
            f.close()
        except:
            printtime("ERROR: load " + dataset_name)
            traceback.print_exc()
            raise

        try:
            c = open(
                os.path.join(BASECALLER_RESULTS, "recalibration",
                             chipflow_name), 'r')
            chipflow = json.load(c)
            c.close()
        except:
            printtime("ERROR: load " + chipflow_name)
            traceback.print_exc()
            raise

        #collect dimension and flow info
        xMin = chipflow["BaseCaller"]['block_col_offset']
        xMax = chipflow["BaseCaller"]['block_col_size'] + xMin - 1
        yMin = chipflow["BaseCaller"]['block_row_offset']
        yMax = chipflow["BaseCaller"]['block_row_size'] + yMin - 1
        yCuts = 2
        xCuts = 2
        numFlows = chipflow["BaseCaller"]['num_flows']
        flowSpan = numFlows / 2
        flowCuts = 2
        #        print("xMin: %d; xMax: %d; xCuts: %d; yMin: %d; yMax: %d; yCuts: %d; numFlows: %d" % (xMin, xMax, xCuts, yMin, yMax, yCuts, numFlows));

        try:
            for dataset in datasets_basecaller["datasets"]:
                read_count = dataset['read_count']
                if (read_count == 0):
                    continue
#                readsFile = os.path.join(BASECALLER_RESULTS,'recalibration',os.path.split(dataset['basecaller_bam'])[-1])
                readsFile = os.path.join(BASECALLER_RESULTS, 'recalibration',
                                         dataset['basecaller_bam'])
                runname_prefix = os.path.split(dataset['file_prefix'])[-1]
                #add protection that readsFile might not exist
                if not os.path.isfile(readsFile):
                    printtime("WARNING: missing file: %s" % readsFile)
                    continue

                printtime("DEBUG: Work starting on %s" % readsFile)
                RECALIBRATION_RESULTS = os.path.join(BASECALLER_RESULTS,
                                                     "recalibration",
                                                     runname_prefix)
                os.makedirs(RECALIBRATION_RESULTS)
                sample_map_path = os.path.join(RECALIBRATION_RESULTS,
                                               "samplelib.bam")
                try:
                    alignment.align(libraryName,
                                    readsFile,
                                    align_full=False,
                                    sam_parsed=False,
                                    bidirectional=False,
                                    mark_duplicates=False,
                                    realign=False,
                                    skip_sorting=True,
                                    aligner_opts_extra="",
                                    logfile=os.path.join(
                                        RECALIBRATION_RESULTS,
                                        "alignmentQC_out.txt"),
                                    output_dir=RECALIBRATION_RESULTS,
                                    output_basename="samplelib")
                except:
                    traceback.print_exc()
                    raise

                try:
                    # Flow QV table generation
                    #     Input -> recalibration/samplelib.bam, genome_path
                    #     Output -> QVtable file
                    #genome_path = "/results/referenceLibrary/%s/%s/%s.fasta" % (tmap_version,libraryName,libraryName)
                    #QVtable(RECALIBRATION_RESULTS,genome_path,sample_map_path,xMin,xMax,xCuts,yMin,yMax,yCuts,flowSpan)
                    HPtable(RECALIBRATION_RESULTS, sample_map_path, xMin, xMax,
                            xCuts, yMin, yMax, yCuts, numFlows, flowCuts)
                except:
                    traceback.print_exc()
                    raise

            #create flowQVtable.txt
            try:
                qvtable = os.path.join(BASECALLER_RESULTS, "recalibration",
                                       "flowQVtable.txt")
                #QVaggregation(
                #    os.path.join(BASECALLER_RESULTS,"recalibration"),
                #    flowSpan,
                #    qvtable
                #)
                HPaggregation(os.path.join(BASECALLER_RESULTS,
                                           "recalibration"))

            except:
                printtime('ERROR: Flow QV aggregation failed')
                raise

        except:
            traceback.print_exc()
            raise

    except Exception as err:
        printtime("WARNING: Recalibration is not performed: %s" % err)
        raise

    return qvtable
Example #6
0
def base_recalib(
      SIGPROC_RESULTS,
      basecallerArgs,
      libKey,
      tfKey,
      runID,
      floworder,
      reverse_primer_dict,
      BASECALLER_RESULTS,
      barcodeId,
      barcodeSamples,
      barcodesplit_filter,
      barcodesplit_filter_minreads,
      DIR_BC_FILES,
      barcodeList_path,
      barcodeMask_path,
      libraryName,
      sample,
      site_name,
      notes,
      start_time,
      chipType,
      expName,
      resultsName,
      pgmName,
      tmap_version,
      dataset_name,
      chipflow_name
    ):
    '''Do flow space recalibration for all basecall files in a report heirarchy'''
#    frame = inspect.currentframe()
#    args, _, _, values = inspect.getargvalues(frame)
#    print 'function name "%s"' % inspect.getframeinfo(frame)[2]
#    for i in args:
#        print "    %s = %s" % (i, values[i])
#    #overwrite reverse_primer_dict
#    if not reverse_primer_dict:
#        reverse_primer_dict = {'adapter_cutoff':16,'sequence':'ATCACCGACTGCCCATAGAGAGGCTGAGAC','qual_window':30,'qual_cutoff':9}
#

    try:
        # Produce smaller basecaller results
                #                      basecallerArgs + " --calibration-training=2000000 --flow-signals-type scaled-residual",
        if not "--calibration-training=" in basecallerArgs:
            basecallerArgs = basecallerArgs + " --calibration-training=2000000"
        if not "--flow-signals-type" in basecallerArgs:
            basecallerArgs = basecallerArgs + " --flow-signals-type scaled-residual"
        basecaller.basecalling(
                      SIGPROC_RESULTS,
                      basecallerArgs,
                      libKey,
                      tfKey,
                      runID,
                      floworder,
                      reverse_primer_dict,
                      os.path.join(BASECALLER_RESULTS, "recalibration"),
                      barcodeId,
                      barcodeSamples,
                      barcodesplit_filter,
                      barcodesplit_filter_minreads,
                      DIR_BC_FILES,
                      barcodeList_path,
                      barcodeMask_path,
                      libraryName,
                      sample,
                      site_name,
                      notes,
                      start_time,
                      chipType,
                      expName,
                      resultsName,
                      pgmName)

        # load datasets_basecaller.json
        try:
            f = open(os.path.join(BASECALLER_RESULTS, "recalibration", dataset_name),'r')
            datasets_basecaller = json.load(f);
            f.close()
        except:
            printtime("ERROR: load " + dataset_name)
            traceback.print_exc()
            raise

        try:
            c = open(os.path.join(BASECALLER_RESULTS, "recalibration", chipflow_name),'r')
            chipflow = json.load(c)
            c.close()
        except:
            printtime("ERROR: load " + chipflow_name)
            traceback.print_exc()
            raise

        #collect dimension and flow info
        xMin = chipflow["BaseCaller"]['block_col_offset']
        xMax = chipflow["BaseCaller"]['block_col_size'] + xMin -1
        yMin = chipflow["BaseCaller"]['block_row_offset']
        yMax = chipflow["BaseCaller"]['block_row_size'] + yMin - 1
        yCuts = 2
        xCuts = 2
        numFlows = chipflow["BaseCaller"]['num_flows']
        flowSpan = numFlows/2
        flowCuts = 2
#        print("xMin: %d; xMax: %d; xCuts: %d; yMin: %d; yMax: %d; yCuts: %d; numFlows: %d" % (xMin, xMax, xCuts, yMin, yMax, yCuts, numFlows));

        try:
            for dataset in datasets_basecaller["datasets"]:
                read_count = dataset['read_count']
                if (read_count == 0):
                    continue
#                readsFile = os.path.join(BASECALLER_RESULTS,'recalibration',os.path.split(dataset['basecaller_bam'])[-1])
                readsFile = os.path.join(BASECALLER_RESULTS,'recalibration',dataset['basecaller_bam'])
                runname_prefix = os.path.split(dataset['file_prefix'])[-1]
                #add protection that readsFile might not exist
                if not os.path.isfile(readsFile):
                    printtime("WARNING: missing file: %s" % readsFile)
                    continue

                printtime("DEBUG: Work starting on %s" % readsFile)
                RECALIBRATION_RESULTS = os.path.join(BASECALLER_RESULTS,"recalibration", runname_prefix)
                os.makedirs(RECALIBRATION_RESULTS)
                sample_map_path = os.path.join(RECALIBRATION_RESULTS, "samplelib.bam")
                try:
                    alignment.align(
                        libraryName,
                        readsFile,
                        align_full=False,
                        sam_parsed=False,
                        bidirectional=False,
                        mark_duplicates=False,
                        realign=False,
                        skip_sorting=True,
                        aligner_opts_extra="",
                        logfile=os.path.join(RECALIBRATION_RESULTS,"alignmentQC_out.txt"),
                        output_dir=RECALIBRATION_RESULTS,
                        output_basename="samplelib")
                except:
                    traceback.print_exc()
                    raise

                try:
                    # Flow QV table generation
                    #     Input -> recalibration/samplelib.bam, genome_path
                    #     Output -> QVtable file
                    #genome_path = "/results/referenceLibrary/%s/%s/%s.fasta" % (tmap_version,libraryName,libraryName)
                    #QVtable(RECALIBRATION_RESULTS,genome_path,sample_map_path,xMin,xMax,xCuts,yMin,yMax,yCuts,flowSpan)
                    HPtable(RECALIBRATION_RESULTS,sample_map_path,xMin,xMax,xCuts,yMin,yMax,yCuts,numFlows,flowCuts)
                except:
                    traceback.print_exc()
                    raise

            #create flowQVtable.txt
            try:
                qvtable = os.path.join(BASECALLER_RESULTS, "recalibration", "flowQVtable.txt")
                #QVaggregation(
                #    os.path.join(BASECALLER_RESULTS,"recalibration"),
                #    flowSpan,
                #    qvtable
                #)
                HPaggregation(os.path.join(BASECALLER_RESULTS,"recalibration"))
                
            except:
                printtime('ERROR: Flow QV aggregation failed')
                raise

        except:
            traceback.print_exc()
            raise


    except Exception as err:
        printtime("WARNING: Recalibration is not performed: %s" % err)
        raise

    return qvtable
Example #7
0
                        " --calibration-panel /opt/ion/config/datasets_calibration.json"
                    )
                else:
                    if not "--calibration-training=" in prebasecallerArgs:
                        prebasecallerArgs = (prebasecallerArgs +
                                             " --calibration-training=100000")

                basecaller.basecalling(
                    my_block_offset,
                    env["SIGPROC_RESULTS"],
                    prebasecallerArgs,
                    env["libraryKey"],
                    env["tfKey"],
                    env["runID"],
                    env["reverse_primer_dict"]["sequence"],
                    os.path.join(env["BASECALLER_RESULTS"], "recalibration"),
                    env["barcodeId"],
                    env["barcodeInfo"],
                    env["library"],
                    env["notes"],
                    env["site_name"],
                    env["platform"],
                    env["instrumentName"],
                    env["chipInfo"],
                )

                # Reuse phase estimates in main base calling task
                additional_basecallerArgs += " --phase-estimation-file " + os.path.join(
                    env["BASECALLER_RESULTS"], "recalibration",
                    "BaseCaller.json")

                #
Example #8
0
            sys.exit(1)

        # create analysis progress bar file
        f = open('progress.txt','w')
        f.write('wellfinding = green\n')
        f.write('signalprocessing = green\n')
        f.write('basecalling = yellow\n')
        f.write('sffread = grey\n')
        f.write('alignment = grey')
        f.close()

        basecaller.basecalling(
            env['SIGPROC_RESULTS'],
            env['previousReport'],
            env['basecallerArgs'],
            env['libraryKey'],
            env['tfKey'],
            env['runID'],
            env['flowOrder'],
            env['reverse_primer_dict'],
            env['BASECALLER_RESULTS'])

        # create analysis progress bar file
        f = open('progress.txt','w')
        f.write('wellfinding = green\n')
        f.write('signalprocessing = green\n')
        f.write('basecalling = green\n')
        f.write('sffread = yellow\n')
        f.write('alignment = grey')
        f.close()

        generate_beadsummary=True