Esempio n. 1
0
                            RECALIBRATION_RESULTS, dataset['file_prefix'])
                        blocks = []
                        basecaller_meta_information = None

                        if len(calibration_input_bams) > 0:
                            calibration_input_bams += ","
                        calibration_input_bams += calibration_bam_base + ".bam"

                        alignment.align(blocks,
                                        basecaller_bam,
                                        env['alignmentArgs'],
                                        env['ionstatsArgs'],
                                        referenceName,
                                        basecaller_meta_information,
                                        env['libraryKey'],
                                        graph_max_x,
                                        do_realign=False,
                                        do_ionstats=False,
                                        do_sorting=False,
                                        do_mark_duplicates=False,
                                        do_indexing=False,
                                        output_dir=RECALIBRATION_RESULTS,
                                        output_basename=dataset['file_prefix'])

                #
                # Part 3) Call Calibration executable to create models and update basecallerArgs
                # If we didn't generate any BAMs for calibration we don't do anything
                #

                # file containing chip dimension info (offsets, rows, cols) and flow info for stratification
                try:
Esempio n. 2
0
                                             dataset['basecaller_bam'])

                    printtime("DEBUG: Work starting on %s" % readsFile)
                    RECALIBRATION_RESULTS = os.path.join(
                        env['BASECALLER_RESULTS'], "recalibration",
                        dataset['file_prefix'])
                    os.makedirs(RECALIBRATION_RESULTS)
                    sample_map_path = os.path.join(RECALIBRATION_RESULTS,
                                                   "samplelib.bam")

                    alignment.align(referenceName,
                                    readsFile,
                                    bidirectional=False,
                                    mark_duplicates=False,
                                    realign=False,
                                    skip_sorting=True,
                                    aligner_opts_extra="",
                                    logfile=os.path.join(
                                        RECALIBRATION_RESULTS,
                                        "alignmentQC_out.txt"),
                                    output_dir=RECALIBRATION_RESULTS,
                                    output_basename="samplelib")

                    # Generate both hpTable and hpModel.
                    flow_space_recal.calibrate(RECALIBRATION_RESULTS,
                                               sample_map_path,
                                               env['recalibArgs'], chipflow)

                # merge step, calibrate collects the training data saved for each barcode,
                # calculate and generate hpTable and hpModel files for the whole dataset
                flow_space_recal.HPaggregation(
                    os.path.join(env['BASECALLER_RESULTS'], "recalibration"),
Esempio n. 3
0
                    if not referenceName:
                        continue

                    readsFile = os.path.join(env['BASECALLER_RESULTS'],'recalibration',dataset['basecaller_bam'])

                    printtime("DEBUG: Work starting on %s" % readsFile)
                    RECALIBRATION_RESULTS = os.path.join(env['BASECALLER_RESULTS'],"recalibration", dataset['file_prefix'])
                    os.makedirs(RECALIBRATION_RESULTS)
                    sample_map_path = os.path.join(RECALIBRATION_RESULTS, "samplelib.bam")

                    alignment.align(
                        referenceName,
                        readsFile,
                        bidirectional=False,
                        mark_duplicates=False,
                        realign=False,
                        skip_sorting=True,
                        aligner_opts_extra="",
                        logfile=os.path.join(RECALIBRATION_RESULTS,"alignmentQC_out.txt"),
                        output_dir=RECALIBRATION_RESULTS,
                        output_basename="samplelib")

                    # Generate both hpTable and hpModel.
                    flow_space_recal.calibrate(
                        RECALIBRATION_RESULTS,
                        sample_map_path,
                        env['recalibArgs'],
                        chipflow)

                # merge step, calibrate collects the training data saved for each barcode,
                # calculate and generate hpTable and hpModel files for the whole dataset
Esempio n. 4
0
                    calibration_bam_base = os.path.join(RECALIBRATION_RESULTS,dataset['file_prefix'])
                    blocks=[]
                    basecaller_meta_information=None
                    
                    if len(calibration_input_bams)>0:
                        calibration_input_bams += ","
                    calibration_input_bams += calibration_bam_base + ".bam"

                    alignment.align(
                        blocks,
                        basecaller_bam,
                        env['alignmentArgs'],
                        env['ionstatsArgs'],
                        referenceName,
                        basecaller_meta_information,
                        env['libraryKey'],
                        graph_max_x,
                        do_realign=False,
                        do_ionstats=False,
                        do_sorting=False,
                        do_mark_duplicates=False,
                        do_indexing=False,
                        output_dir=RECALIBRATION_RESULTS,
                        output_basename=dataset['file_prefix'])

                # Do not call Calibration if we did not generate any aligned input BAMs but gracefully continue
                additional_basecallerArgs += " --phase-estimation-file " + os.path.join(env['BASECALLER_RESULTS'], "recalibration", "BaseCaller.json")
                if env['doBaseRecal'] == "panel_recal":
                    additional_basecallerArgs += " --calibration-panel /opt/ion/config/datasets_calibration.json"
                    
                if len(calibration_input_bams)>0:
                    # Call Calibration module to process aligned training BAM files
Esempio n. 5
0
def base_recalib(SIGPROC_RESULTS, basecallerArgs, libKey, tfKey, runID,
                 floworder, reverse_primer_dict, BASECALLER_RESULTS, barcodeId,
                 barcodeSamples, barcodesplit_filter,
                 barcodesplit_filter_minreads, DIR_BC_FILES, barcodeList_path,
                 barcodeMask_path, libraryName, sample, site_name, notes,
                 start_time, chipType, expName, resultsName, pgmName,
                 tmap_version, dataset_name, chipflow_name):
    '''Do flow space recalibration for all basecall files in a report heirarchy'''
    #    frame = inspect.currentframe()
    #    args, _, _, values = inspect.getargvalues(frame)
    #    print 'function name "%s"' % inspect.getframeinfo(frame)[2]
    #    for i in args:
    #        print "    %s = %s" % (i, values[i])
    #    #overwrite reverse_primer_dict
    #    if not reverse_primer_dict:
    #        reverse_primer_dict = {'adapter_cutoff':16,'sequence':'ATCACCGACTGCCCATAGAGAGGCTGAGAC','qual_window':30,'qual_cutoff':9}
    #

    try:
        # Produce smaller basecaller results
        #                      basecallerArgs + " --calibration-training=2000000 --flow-signals-type scaled-residual",
        if not "--calibration-training=" in basecallerArgs:
            basecallerArgs = basecallerArgs + " --calibration-training=2000000"
        if not "--flow-signals-type" in basecallerArgs:
            basecallerArgs = basecallerArgs + " --flow-signals-type scaled-residual"
        basecaller.basecalling(
            SIGPROC_RESULTS, basecallerArgs, libKey, tfKey, runID, floworder,
            reverse_primer_dict,
            os.path.join(BASECALLER_RESULTS,
                         "recalibration"), barcodeId, barcodeSamples,
            barcodesplit_filter, barcodesplit_filter_minreads, DIR_BC_FILES,
            barcodeList_path, barcodeMask_path, libraryName, sample, site_name,
            notes, start_time, chipType, expName, resultsName, pgmName)

        # load datasets_basecaller.json
        try:
            f = open(
                os.path.join(BASECALLER_RESULTS, "recalibration",
                             dataset_name), 'r')
            datasets_basecaller = json.load(f)
            f.close()
        except:
            printtime("ERROR: load " + dataset_name)
            traceback.print_exc()
            raise

        try:
            c = open(
                os.path.join(BASECALLER_RESULTS, "recalibration",
                             chipflow_name), 'r')
            chipflow = json.load(c)
            c.close()
        except:
            printtime("ERROR: load " + chipflow_name)
            traceback.print_exc()
            raise

        #collect dimension and flow info
        xMin = chipflow["BaseCaller"]['block_col_offset']
        xMax = chipflow["BaseCaller"]['block_col_size'] + xMin - 1
        yMin = chipflow["BaseCaller"]['block_row_offset']
        yMax = chipflow["BaseCaller"]['block_row_size'] + yMin - 1
        yCuts = 2
        xCuts = 2
        numFlows = chipflow["BaseCaller"]['num_flows']
        flowSpan = numFlows / 2
        flowCuts = 2
        #        print("xMin: %d; xMax: %d; xCuts: %d; yMin: %d; yMax: %d; yCuts: %d; numFlows: %d" % (xMin, xMax, xCuts, yMin, yMax, yCuts, numFlows));

        try:
            for dataset in datasets_basecaller["datasets"]:
                read_count = dataset['read_count']
                if (read_count == 0):
                    continue
#                readsFile = os.path.join(BASECALLER_RESULTS,'recalibration',os.path.split(dataset['basecaller_bam'])[-1])
                readsFile = os.path.join(BASECALLER_RESULTS, 'recalibration',
                                         dataset['basecaller_bam'])
                runname_prefix = os.path.split(dataset['file_prefix'])[-1]
                #add protection that readsFile might not exist
                if not os.path.isfile(readsFile):
                    printtime("WARNING: missing file: %s" % readsFile)
                    continue

                printtime("DEBUG: Work starting on %s" % readsFile)
                RECALIBRATION_RESULTS = os.path.join(BASECALLER_RESULTS,
                                                     "recalibration",
                                                     runname_prefix)
                os.makedirs(RECALIBRATION_RESULTS)
                sample_map_path = os.path.join(RECALIBRATION_RESULTS,
                                               "samplelib.bam")
                try:
                    alignment.align(libraryName,
                                    readsFile,
                                    align_full=False,
                                    sam_parsed=False,
                                    bidirectional=False,
                                    mark_duplicates=False,
                                    realign=False,
                                    skip_sorting=True,
                                    aligner_opts_extra="",
                                    logfile=os.path.join(
                                        RECALIBRATION_RESULTS,
                                        "alignmentQC_out.txt"),
                                    output_dir=RECALIBRATION_RESULTS,
                                    output_basename="samplelib")
                except:
                    traceback.print_exc()
                    raise

                try:
                    # Flow QV table generation
                    #     Input -> recalibration/samplelib.bam, genome_path
                    #     Output -> QVtable file
                    #genome_path = "/results/referenceLibrary/%s/%s/%s.fasta" % (tmap_version,libraryName,libraryName)
                    #QVtable(RECALIBRATION_RESULTS,genome_path,sample_map_path,xMin,xMax,xCuts,yMin,yMax,yCuts,flowSpan)
                    HPtable(RECALIBRATION_RESULTS, sample_map_path, xMin, xMax,
                            xCuts, yMin, yMax, yCuts, numFlows, flowCuts)
                except:
                    traceback.print_exc()
                    raise

            #create flowQVtable.txt
            try:
                qvtable = os.path.join(BASECALLER_RESULTS, "recalibration",
                                       "flowQVtable.txt")
                #QVaggregation(
                #    os.path.join(BASECALLER_RESULTS,"recalibration"),
                #    flowSpan,
                #    qvtable
                #)
                HPaggregation(os.path.join(BASECALLER_RESULTS,
                                           "recalibration"))

            except:
                printtime('ERROR: Flow QV aggregation failed')
                raise

        except:
            traceback.print_exc()
            raise

    except Exception as err:
        printtime("WARNING: Recalibration is not performed: %s" % err)
        raise

    return qvtable
Esempio n. 6
0
def base_recalib(
      SIGPROC_RESULTS,
      basecallerArgs,
      libKey,
      tfKey,
      runID,
      floworder,
      reverse_primer_dict,
      BASECALLER_RESULTS,
      barcodeId,
      barcodeSamples,
      barcodesplit_filter,
      barcodesplit_filter_minreads,
      DIR_BC_FILES,
      barcodeList_path,
      barcodeMask_path,
      libraryName,
      sample,
      site_name,
      notes,
      start_time,
      chipType,
      expName,
      resultsName,
      pgmName,
      tmap_version,
      dataset_name,
      chipflow_name
    ):
    '''Do flow space recalibration for all basecall files in a report heirarchy'''
#    frame = inspect.currentframe()
#    args, _, _, values = inspect.getargvalues(frame)
#    print 'function name "%s"' % inspect.getframeinfo(frame)[2]
#    for i in args:
#        print "    %s = %s" % (i, values[i])
#    #overwrite reverse_primer_dict
#    if not reverse_primer_dict:
#        reverse_primer_dict = {'adapter_cutoff':16,'sequence':'ATCACCGACTGCCCATAGAGAGGCTGAGAC','qual_window':30,'qual_cutoff':9}
#

    try:
        # Produce smaller basecaller results
                #                      basecallerArgs + " --calibration-training=2000000 --flow-signals-type scaled-residual",
        if not "--calibration-training=" in basecallerArgs:
            basecallerArgs = basecallerArgs + " --calibration-training=2000000"
        if not "--flow-signals-type" in basecallerArgs:
            basecallerArgs = basecallerArgs + " --flow-signals-type scaled-residual"
        basecaller.basecalling(
                      SIGPROC_RESULTS,
                      basecallerArgs,
                      libKey,
                      tfKey,
                      runID,
                      floworder,
                      reverse_primer_dict,
                      os.path.join(BASECALLER_RESULTS, "recalibration"),
                      barcodeId,
                      barcodeSamples,
                      barcodesplit_filter,
                      barcodesplit_filter_minreads,
                      DIR_BC_FILES,
                      barcodeList_path,
                      barcodeMask_path,
                      libraryName,
                      sample,
                      site_name,
                      notes,
                      start_time,
                      chipType,
                      expName,
                      resultsName,
                      pgmName)

        # load datasets_basecaller.json
        try:
            f = open(os.path.join(BASECALLER_RESULTS, "recalibration", dataset_name),'r')
            datasets_basecaller = json.load(f);
            f.close()
        except:
            printtime("ERROR: load " + dataset_name)
            traceback.print_exc()
            raise

        try:
            c = open(os.path.join(BASECALLER_RESULTS, "recalibration", chipflow_name),'r')
            chipflow = json.load(c)
            c.close()
        except:
            printtime("ERROR: load " + chipflow_name)
            traceback.print_exc()
            raise

        #collect dimension and flow info
        xMin = chipflow["BaseCaller"]['block_col_offset']
        xMax = chipflow["BaseCaller"]['block_col_size'] + xMin -1
        yMin = chipflow["BaseCaller"]['block_row_offset']
        yMax = chipflow["BaseCaller"]['block_row_size'] + yMin - 1
        yCuts = 2
        xCuts = 2
        numFlows = chipflow["BaseCaller"]['num_flows']
        flowSpan = numFlows/2
        flowCuts = 2
#        print("xMin: %d; xMax: %d; xCuts: %d; yMin: %d; yMax: %d; yCuts: %d; numFlows: %d" % (xMin, xMax, xCuts, yMin, yMax, yCuts, numFlows));

        try:
            for dataset in datasets_basecaller["datasets"]:
                read_count = dataset['read_count']
                if (read_count == 0):
                    continue
#                readsFile = os.path.join(BASECALLER_RESULTS,'recalibration',os.path.split(dataset['basecaller_bam'])[-1])
                readsFile = os.path.join(BASECALLER_RESULTS,'recalibration',dataset['basecaller_bam'])
                runname_prefix = os.path.split(dataset['file_prefix'])[-1]
                #add protection that readsFile might not exist
                if not os.path.isfile(readsFile):
                    printtime("WARNING: missing file: %s" % readsFile)
                    continue

                printtime("DEBUG: Work starting on %s" % readsFile)
                RECALIBRATION_RESULTS = os.path.join(BASECALLER_RESULTS,"recalibration", runname_prefix)
                os.makedirs(RECALIBRATION_RESULTS)
                sample_map_path = os.path.join(RECALIBRATION_RESULTS, "samplelib.bam")
                try:
                    alignment.align(
                        libraryName,
                        readsFile,
                        align_full=False,
                        sam_parsed=False,
                        bidirectional=False,
                        mark_duplicates=False,
                        realign=False,
                        skip_sorting=True,
                        aligner_opts_extra="",
                        logfile=os.path.join(RECALIBRATION_RESULTS,"alignmentQC_out.txt"),
                        output_dir=RECALIBRATION_RESULTS,
                        output_basename="samplelib")
                except:
                    traceback.print_exc()
                    raise

                try:
                    # Flow QV table generation
                    #     Input -> recalibration/samplelib.bam, genome_path
                    #     Output -> QVtable file
                    #genome_path = "/results/referenceLibrary/%s/%s/%s.fasta" % (tmap_version,libraryName,libraryName)
                    #QVtable(RECALIBRATION_RESULTS,genome_path,sample_map_path,xMin,xMax,xCuts,yMin,yMax,yCuts,flowSpan)
                    HPtable(RECALIBRATION_RESULTS,sample_map_path,xMin,xMax,xCuts,yMin,yMax,yCuts,numFlows,flowCuts)
                except:
                    traceback.print_exc()
                    raise

            #create flowQVtable.txt
            try:
                qvtable = os.path.join(BASECALLER_RESULTS, "recalibration", "flowQVtable.txt")
                #QVaggregation(
                #    os.path.join(BASECALLER_RESULTS,"recalibration"),
                #    flowSpan,
                #    qvtable
                #)
                HPaggregation(os.path.join(BASECALLER_RESULTS,"recalibration"))
                
            except:
                printtime('ERROR: Flow QV aggregation failed')
                raise

        except:
            traceback.print_exc()
            raise


    except Exception as err:
        printtime("WARNING: Recalibration is not performed: %s" % err)
        raise

    return qvtable
Esempio n. 7
0
                    printtime("DEBUG: Work starting on %s" % readsFile)
                    RECALIBRATION_RESULTS = os.path.join(env['BASECALLER_RESULTS'],"recalibration", dataset['file_prefix'])
                    os.makedirs(RECALIBRATION_RESULTS)
                    sample_map_path = os.path.join(RECALIBRATION_RESULTS, "samplelib.bam")
                    blocks=[]
                    basecaller_meta_information=None

                    alignment.align(
                        blocks,
                        env['alignmentArgs'],
                        env['ionstatsArgs'],
                        referenceName,
                        basecaller_meta_information,
                        env['libraryKey'],
                        graph_max_x,
                        readsFile,
                        do_realign=False,
                        do_ionstats=False,
                        do_sorting=False,
                        do_mark_duplicates=False,
                        do_indexing=False,
                        logfile=os.path.join(RECALIBRATION_RESULTS,"alignmentQC_out.txt"),
                        output_dir=RECALIBRATION_RESULTS,
                        output_basename="samplelib")

                    # Generate both hpTable and hpModel.
                    flow_space_recal.calibrate(
                        RECALIBRATION_RESULTS,
                        sample_map_path,
                        env['recalibArgs'],
                        chipflow)