Example #1
0
def mergeRawPeakSignals(dirs):

    ###############################################
    # Merge raw_peak_signal files                 #
    ###############################################
    printtime("Merging raw_peak_signal files")

    try:
        raw_peak_signal_files = []
        for subdir in dirs:
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging raw_peak_signal files"):
                continue
            raw_peak_signal_file = os.path.join(subdir, 'raw_peak_signal')
            if os.path.exists(raw_peak_signal_file):
                raw_peak_signal_files.append(raw_peak_signal_file)
            else:
                printtime("ERROR: Merging raw_peak_signal files: skipped %s" %
                          raw_peak_signal_file)
        composite_raw_peak_signal_file = "raw_peak_signal"
        blockprocessing.merge_raw_key_signals(raw_peak_signal_files,
                                              composite_raw_peak_signal_file)
    except:
        printtime("Merging raw_peak_signal files failed")

    printtime("Finished mergeRawPeakSignals")
Example #2
0
def merge_basecaller_json(dirs, BASECALLER_RESULTS):

    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS, subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir, "BaseCaller.json")
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime(
                    "ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson
                )

        mergeBaseCallerJson.merge(basecallerfiles, BASECALLER_RESULTS)
    except Exception:
        traceback.print_exc()
        printtime("Merging BaseCaller.json files failed")

    printtime("Finished merging basecaller stats")
Example #3
0
def merge_basecaller_stats(dirs, BASECALLER_RESULTS):

    merge_datasets_basecaller_json(dirs, BASECALLER_RESULTS)

    ########################################################
    # write composite return code                          #
    ########################################################

    try:
        if len(dirs)==96:
            composite_return_code=96
            for subdir in dirs:

                blockstatus_return_code_file = os.path.join(subdir,"blockstatus.txt")
                if os.path.exists(blockstatus_return_code_file):

                    with open(blockstatus_return_code_file, 'r') as f:
                        text = f.read()
                        if 'Basecaller=0' in text:
                            composite_return_code-=1

            composite_return_code_file = os.path.join(BASECALLER_RESULTS,"composite_return_code.txt")
            if not os.path.exists(composite_return_code_file):
                printtime("DEBUG: create %s" % composite_return_code_file)
                os.umask(0002)
                f = open(composite_return_code_file, 'a')
                f.write(str(composite_return_code))
                f.close()
            else:
                printtime("DEBUG: skip generation of %s" % composite_return_code_file)
    except:
        traceback.print_exc()


    ###############################################
    # Merge BaseCaller.json files                 #
    ###############################################
    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS,subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir,'BaseCaller.json')
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson)

        mergeBaseCallerJson.merge(basecallerfiles,BASECALLER_RESULTS)
    except:
        traceback.print_exc()
        printtime("Merging BaseCaller.json files failed")

    printtime("Finished merging basecaller stats")
Example #4
0
def merge_basecaller_json(dirs, BASECALLER_RESULTS):

    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS, subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir, 'BaseCaller.json')
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson)

        mergeBaseCallerJson.merge(basecallerfiles, BASECALLER_RESULTS)
    except:
        traceback.print_exc()
        printtime("Merging BaseCaller.json files failed")

    printtime("Finished merging basecaller stats")
Example #5
0
def mergeRawPeakSignals(dirs):

    ###############################################
    # Merge raw_peak_signal files                 #
    ###############################################
    printtime("Merging raw_peak_signal files")

    try:
        raw_peak_signal_files = []
        for subdir in dirs:
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging raw_peak_signal files"):
                continue
            raw_peak_signal_file = os.path.join(subdir,'raw_peak_signal')
            if os.path.exists(raw_peak_signal_file):
                raw_peak_signal_files.append(raw_peak_signal_file)
            else:
                printtime("ERROR: Merging raw_peak_signal files: skipped %s" % raw_peak_signal_file)
        composite_raw_peak_signal_file = "raw_peak_signal"
        blockprocessing.merge_raw_key_signals(raw_peak_signal_files, composite_raw_peak_signal_file)
    except:
        printtime("Merging raw_peak_signal files failed")

    printtime("Finished mergeRawPeakSignals")
Example #6
0
def mergeBasecallerResults(dirs, QualityPath, merged_bead_mask_path, floworder, libsff, tfsff, BASECALLER_RESULTS):
    ############################################
    # Merge individual quality.summary files #
    ############################################
    printtime("Merging individual quality.summary files")

    config_out = ConfigParser.RawConfigParser()
    config_out.optionxform = str # don't convert to lowercase
    config_out.add_section('global')

    numberkeys = ['Number of 50BP Reads',
                  'Number of 100BP Reads',
                  'Number of 150BP Reads',
                  'Number of Reads at Q0',
                  'Number of Bases at Q0',
                  'Number of 50BP Reads at Q0',
                  'Number of 100BP Reads at Q0',
                  'Number of 150BP Reads at Q0',
                  'Number of Reads at Q17',
                  'Number of Bases at Q17',
                  'Number of 50BP Reads at Q17',
                  'Number of 150BP Reads at Q17',
                  'Number of 100BP Reads at Q17',
                  'Number of Reads at Q20',
                  'Number of Bases at Q20',
                  'Number of 50BP Reads at Q20',
                  'Number of 100BP Reads at Q20',
                  'Number of 150BP Reads at Q20']

    maxkeys = ['Max Read Length at Q0',
               'Max Read Length at Q17',
               'Max Read Length at Q20']

    meankeys = ['System SNR',
                'Mean Read Length at Q0',
                'Mean Read Length at Q17',
                'Mean Read Length at Q20']

    config_in = MyConfigParser()
    config_in.optionxform = str # don't convert to lowercase
    doinit = True
    for i,subdir in enumerate(dirs):
        if isbadblock(subdir, "Merging quality.summary"):
            continue
        summaryfile=os.path.join(BASECALLER_RESULTS, subdir, 'quality.summary')
        if os.path.exists(summaryfile):
            printtime("INFO: process %s" % summaryfile)
            config_in.read(summaryfile)
            for key in numberkeys:
                value_in = config_in.get('global',key)
                if doinit:
                    value_out = 0
                else:
                    value_out = config_out.get('global', key)
                config_out.set('global', key, int(value_in) + int(value_out))
            for key in maxkeys:
                value_in = config_in.get('global',key)
                if doinit:
                    value_out = 0
                else:
                    value_out = config_out.get('global', key)
                config_out.set('global', key, max(int(value_in),int(value_out)))
            for key in meankeys:
                value_in = config_in.get('global',key)
                if doinit:
                    value_out = 0
                else:
                    value_out = config_out.get('global', key)
                config_out.set('global', key, float(value_out)+float(value_in)/len(dirs))
            doinit = False
        else:
            printtime("ERROR: skipped %s" % summaryfile)

    with open(QualityPath, 'wb') as configfile:
        config_out.write(configfile)

    ##################################################
    #generate TF Metrics                             #
    #look for both keys and append same file         #
    ##################################################

    printtime("Merging TFMapper metrics and generating TF plots")

    try:
        TFPipeline.mergeBlocks(BASECALLER_RESULTS,dirs,floworder)

    except:
        printtime("ERROR: Merging TFMapper metrics failed")


    ###############################################
    # Merge BaseCaller.json files                 #
    ###############################################
    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS,subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir,'BaseCaller.json')
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson)

        mergeBaseCallerJson.merge(basecallerfiles,BASECALLER_RESULTS)
    except:
        printtime("Merging BaseCaller.json files failed")


    ########################################
    # Merge individual block SFF files     #
    ########################################
    printtime("Merging Library SFF files")
    try:
        cmd = 'SFFProtonMerge'
        cmd = cmd + ' -i rawlib.sff'
        cmd = cmd + ' -o %s ' % libsff
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS,subdir)
            if isbadblock(subdir, "Merging Library SFF files"):
                continue
            rawlibsff = os.path.join(subdir,'rawlib.sff')
            if os.path.exists(rawlibsff):
                cmd = cmd + ' %s' % subdir
            else:
                printtime("ERROR: skipped %s" % rawlibsff)
        printtime("DEBUG: Calling '%s'" % cmd)
        subprocess.call(cmd,shell=True)
    except:
        printtime("SFFProtonMerge failed (library)")

    printtime("Merging Test Fragment SFF files")
    try:
        cmd = 'SFFProtonMerge'
        cmd = cmd + ' -i rawtf.sff'
        cmd = cmd + ' -o %s ' % tfsff
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS,subdir)
            if isbadblock(subdir, "Merging Test Fragment SFF files"):
                continue
            rawtfsff = os.path.join(subdir,'rawtf.sff')
            if os.path.exists(rawtfsff):
                cmd = cmd + ' %s' % subdir
            else:
                printtime("ERROR: skipped %s" % rawtfsff)
        printtime("DEBUG: Calling '%s'" % cmd)
        subprocess.call(cmd,shell=True)
    except:
        printtime("SFFProtonMerge failed (test fragments)")
Example #7
0
def mergeSigProcResults(dirs, SIGPROC_RESULTS, plot_title, exclusionMask=''):

    bfmaskPath = os.path.join(SIGPROC_RESULTS,'analysis.bfmask.bin')
    bfmaskstatspath = os.path.join(SIGPROC_RESULTS,'analysis.bfmask.stats')

    ########################################################
    # write composite return code                          #
    ########################################################

    try:
        if len(dirs)==96:
            composite_return_code=96
            for subdir in dirs:

                blockstatus_return_code_file = os.path.join(subdir,"blockstatus.txt")
                if os.path.exists(blockstatus_return_code_file):

                    with open(blockstatus_return_code_file, 'r') as f:
                        text = f.read()
                        if 'Analysis=0' in text:
                            composite_return_code-=1

            composite_return_code_file = os.path.join(SIGPROC_RESULTS,"analysis_return_code.txt")
            if not os.path.exists(composite_return_code_file):
                printtime("DEBUG: create %s" % composite_return_code_file)
                os.umask(0002)
                f = open(composite_return_code_file, 'a')
                f.write(str(composite_return_code))
                f.close()
            else:
                printtime("DEBUG: skip generation of %s" % composite_return_code_file)
    except:
        traceback.print_exc()

    ######################################################################
    # Merge individual block bead metrics files and generate bead stats  #
    ######################################################################
    printtime("Merging individual block bead metrics files")

    try:
        cmd = 'BeadmaskMerge -i analysis.bfmask.bin -o ' + bfmaskPath
        if exclusionMask:
            cmd += ' -e %s' % exclusionMask
        
        for subdir in dirs:
            subdir = os.path.join(SIGPROC_RESULTS,subdir)
            if isbadblock(subdir, "Merging individual block bead metrics files"):
                continue
            bfmaskbin = os.path.join(subdir,'analysis.bfmask.bin')
            if os.path.exists(bfmaskbin):
                cmd = cmd + ' %s' % subdir
            else:
                printtime("ERROR: skipped %s" % bfmaskbin)
        printtime("DEBUG: Calling '%s'" % cmd)
        subprocess.call(cmd,shell=True)
    except:
        printtime("BeadmaskMerge failed")


    ''' Not needed: BeadmaskMerge will generate analysis.bfmask.stats with exclusion mask applied
    
    ###############################################
    # Merge individual block bead stats files     #
    ###############################################
    printtime("Merging analysis.bfmask.stats files")

    try:
        bfmaskstatsfiles = []
        for subdir in dirs:
            subdir = os.path.join(SIGPROC_RESULTS,subdir)
            if isbadblock(subdir, "Merging analysis.bfmask.stats files"):
                continue
            bfmaskstats = os.path.join(subdir,'analysis.bfmask.stats')
            if os.path.exists(bfmaskstats):
                bfmaskstatsfiles.append(bfmaskstats)
            else:
                printtime("ERROR: Merging bfmask.stats files: skipped %s" % bfmaskstats)

        StatsMerge.main_merge(bfmaskstatsfiles, bfmaskstatspath, True)
    except:
        printtime("ERROR: No analysis.bfmask.stats files were found to merge")
        traceback.print_exc()
    '''

    ########################################################
    #Make Bead Density Plots                               #
    ########################################################
    printtime("Make Bead Density Plots (composite report)")

    printtime("DEBUG: generate composite heatmap")
    if os.path.exists(bfmaskPath):
        try:
            beadDensityPlot.genHeatmap(bfmaskPath, bfmaskstatspath, "./", plot_title)
        except:
            traceback.print_exc()
    else:
        printtime("Warning: no heatmap generated.")

    printtime("Finished mergeSigProcResults")
Example #8
0
def merge_basecaller_stats(dirs, BASECALLER_RESULTS, SIGPROC_RESULTS, flows, floworder):

    ########################################################
    # Merge datasets_basecaller.json                       #
    ########################################################
    
    block_datasets_json = []
    combined_datasets_json = {}
    
    for dir in dirs:
        current_datasets_path = os.path.join(dir,BASECALLER_RESULTS,'datasets_basecaller.json')
        try:
            f = open(current_datasets_path,'r')
            block_datasets_json.append(json.load(f))
            f.close()
        except:
            printtime("ERROR: skipped %s" % current_datasets_path)
    
    if (not block_datasets_json) or ('datasets' not in block_datasets_json[0]) or ('read_groups' not in block_datasets_json[0]):
        printtime("merge_basecaller_results: no block contained a valid datasets_basecaller.json, aborting")
        return

    combined_datasets_json = copy.deepcopy(block_datasets_json[0])
    
    for dataset_idx in range(len(combined_datasets_json['datasets'])):
        combined_datasets_json['datasets'][dataset_idx]['read_count'] = 0
        for current_datasets_json in block_datasets_json:
            combined_datasets_json['datasets'][dataset_idx]['read_count'] += current_datasets_json['datasets'][dataset_idx].get("read_count",0)
    
    for read_group in combined_datasets_json['read_groups'].iterkeys():
        combined_datasets_json['read_groups'][read_group]['Q20_bases'] = 0;
        combined_datasets_json['read_groups'][read_group]['total_bases'] = 0;
        combined_datasets_json['read_groups'][read_group]['read_count'] = 0;
        combined_datasets_json['read_groups'][read_group]['filtered'] = True if 'nomatch' not in read_group else False
        for current_datasets_json in block_datasets_json:
            combined_datasets_json['read_groups'][read_group]['Q20_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("Q20_bases",0)
            combined_datasets_json['read_groups'][read_group]['total_bases'] += current_datasets_json['read_groups'].get(read_group,{}).get("total_bases",0)
            combined_datasets_json['read_groups'][read_group]['read_count'] += current_datasets_json['read_groups'].get(read_group,{}).get("read_count",0)
            combined_datasets_json['read_groups'][read_group]['filtered'] &= current_datasets_json['read_groups'].get(read_group,{}).get("filtered",True)
    
    try:
        f = open(os.path.join(BASECALLER_RESULTS,'datasets_basecaller.json'),"w")
        json.dump(combined_datasets_json, f, indent=4)
        f.close()
    except:
        printtime("ERROR; Failed to write merged datasets_basecaller.json")
        traceback.print_exc()



    ########################################################
    # Merge ionstats_basecaller.json:                      #
    # First across blocks, then across barcodes            #
    ########################################################

    try:
        composite_filename_list = []
        for dataset in combined_datasets_json["datasets"]:
            composite_filename = os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json')
            barcode_filename_list = [os.path.join(dir,BASECALLER_RESULTS,dataset['file_prefix']+'.ionstats_basecaller.json') for dir in dirs]
            barcode_filename_list = [filename for filename in barcode_filename_list if os.path.exists(filename)]
            ionstats.reduce_stats(barcode_filename_list,composite_filename)
            if os.path.exists(composite_filename):
                composite_filename_list.append(composite_filename)

        ionstats.reduce_stats(composite_filename_list,os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'))
        ionstats.generate_legacy_basecaller_files(
                os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS,''))
    except:
        printtime("ERROR: Failed to merge ionstats_basecaller.json")
        traceback.print_exc()



    ########################################################
    # write composite return code                          #
    ########################################################

    try:
        if len(dirs)==96:
            composite_return_code=96
            for subdir in dirs:

                blockstatus_return_code_file = os.path.join(subdir,"blockstatus.txt")
                if os.path.exists(blockstatus_return_code_file):

                    with open(blockstatus_return_code_file, 'r') as f:
                        text = f.read()
                        if 'Basecaller=0' in text:
                            composite_return_code-=1

            composite_return_code_file = os.path.join(BASECALLER_RESULTS,"composite_return_code.txt")
            if not os.path.exists(composite_return_code_file):
                printtime("DEBUG: create %s" % composite_return_code_file)
                os.umask(0002)
                f = open(composite_return_code_file, 'a')
                f.write(str(composite_return_code))
                f.close()
            else:
                printtime("DEBUG: skip generation of %s" % composite_return_code_file)
    except:
        traceback.print_exc()


    ##################################################
    #generate TF Metrics                             #
    #look for both keys and append same file         #
    ##################################################

    printtime("Merging TFMapper metrics and generating TF plots")
    try:
        TFPipeline.mergeBlocks(BASECALLER_RESULTS,dirs,floworder)
    except:
        printtime("ERROR: Merging TFMapper metrics failed")

    
    ###############################################
    # Merge BaseCaller.json files                 #
    ###############################################
    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS,subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir,'BaseCaller.json')
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson)

        mergeBaseCallerJson.merge(basecallerfiles,BASECALLER_RESULTS)
    except:
        printtime("Merging BaseCaller.json files failed")


    ###############################################
    # Generate composite plots
    ###############################################

    printtime("Build composite basecaller graphs")
    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 400

    # Plot read length sparkline
    for dataset in combined_datasets_json["datasets"]:
        ionstats_plots.read_length_sparkline(
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.sparkline.png'),
                graph_max_x)

    # Plot classic read length histogram
    ionstats_plots.old_read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto.png'),
            graph_max_x)
    
    # Plot new read length histogram
    ionstats_plots.read_length_histogram(
            os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
            os.path.join(BASECALLER_RESULTS,'readLenHisto2.png'),
            graph_max_x)

    # Plot quality value histogram
    ionstats_plots.quality_histogram(
        os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'),
        os.path.join(BASECALLER_RESULTS,'quality_histogram.png'))
    

    try:
        wells_beadogram.generate_wells_beadogram(BASECALLER_RESULTS, SIGPROC_RESULTS)
    except:
        printtime ("ERROR: Wells beadogram generation failed")
        traceback.print_exc()

    printtime("Finished merging basecaller stats")
Example #9
0
def mergeSigProcResults(dirs, SIGPROC_RESULTS, plot_title, exclusionMask=''):

    bfmaskPath = os.path.join(SIGPROC_RESULTS, 'analysis.bfmask.bin')
    bfmaskstatspath = os.path.join(SIGPROC_RESULTS, 'analysis.bfmask.stats')

    # 
    # Merge individual block bead metrics files and generate bead stats  #
    # 
    printtime("Merging individual block bead metrics files")

    try:
        cmd = 'BeadmaskMerge -i analysis.bfmask.bin -o ' + bfmaskPath
        if exclusionMask:
            cmd += ' -e %s' % exclusionMask

        for subdir in dirs:
            subdir = os.path.join(SIGPROC_RESULTS, subdir)
            if isbadblock(subdir, "Merging individual block bead metrics files"):
                continue
            bfmaskbin = os.path.join(subdir, 'analysis.bfmask.bin')
            if os.path.exists(bfmaskbin):
                cmd = cmd + ' %s' % subdir
            else:
                printtime("ERROR: skipped %s" % bfmaskbin)
        printtime("DEBUG: Calling '%s'" % cmd)
        subprocess.call(cmd, shell=True)
    except:
        printtime("BeadmaskMerge failed")

    ''' Not needed: BeadmaskMerge will generate analysis.bfmask.stats with exclusion mask applied

    ###############################################
    # Merge individual block bead stats files     #
    ###############################################
    printtime("Merging analysis.bfmask.stats files")

    try:
        bfmaskstatsfiles = []
        for subdir in dirs:
            subdir = os.path.join(SIGPROC_RESULTS,subdir)
            if isbadblock(subdir, "Merging analysis.bfmask.stats files"):
                continue
            bfmaskstats = os.path.join(subdir,'analysis.bfmask.stats')
            if os.path.exists(bfmaskstats):
                bfmaskstatsfiles.append(bfmaskstats)
            else:
                printtime("ERROR: Merging bfmask.stats files: skipped %s" % bfmaskstats)

        StatsMerge.main_merge(bfmaskstatsfiles, bfmaskstatspath, True)
    except:
        printtime("ERROR: No analysis.bfmask.stats files were found to merge")
        traceback.print_exc()
    '''

    # 
    # Make Bead Density Plots                               #
    # 
    printtime("Make Bead Density Plots (composite report)")

    printtime("DEBUG: generate composite heatmap")
    if os.path.exists(bfmaskPath):
        try:
            beadDensityPlot.genHeatmap(bfmaskPath, bfmaskstatspath, "./", plot_title)
        except:
            traceback.print_exc()
    else:
        printtime("Warning: no heatmap generated.")

    printtime("Finished mergeSigProcResults")
Example #10
0
def mergeAlignmentResults(dirs, env, ALIGNMENT_RESULTS):

    ############################################
    # Merge individual alignment.summary files #
    ############################################
    printtime("Merging individual alignment.summary files")

    config_out = ConfigParser.RawConfigParser()
    config_out.optionxform = str # don't convert to lowercase
    config_out.add_section('global')

    quallist = ['Q7', 'Q10', 'Q17', 'Q20', 'Q47']
    bplist = [50, 100, 150, 200, 250, 300, 350, 400]

    fixedkeys = [ 'Genome', 'Genome Version', 'Index Version', 'Genomesize' ]

    numberkeys = ['Total number of Reads',
                  'Filtered Mapped Bases in Q7 Alignments',
                  'Filtered Mapped Bases in Q10 Alignments',
                  'Filtered Mapped Bases in Q17 Alignments',
                  'Filtered Mapped Bases in Q20 Alignments',
                  'Filtered Mapped Bases in Q47 Alignments',
                  'Filtered Q7 Alignments',
                  'Filtered Q10 Alignments',
                  'Filtered Q17 Alignments',
                  'Filtered Q20 Alignments',
                  'Filtered Q47 Alignments']

    for q in quallist:
        for bp in bplist:
            numberkeys.append('Filtered %s%s Reads' % (bp, q))

    maxkeys = ['Filtered Q7 Longest Alignment',
               'Filtered Q10 Longest Alignment',
               'Filtered Q17 Longest Alignment',
               'Filtered Q20 Longest Alignment',
               'Filtered Q47 Longest Alignment']

    meankeys = ['Filtered Q7 Mean Alignment Length',
                'Filtered Q10 Mean Alignment Length',
                'Filtered Q17 Mean Alignment Length',
                'Filtered Q20 Mean Alignment Length',
                'Filtered Q47 Mean Alignment Length',
                'Filtered Q7 Coverage Percentage',
                'Filtered Q10 Coverage Percentage',
                'Filtered Q17 Coverage Percentage',
                'Filtered Q20 Coverage Percentage',
                'Filtered Q47 Coverage Percentage',
                'Filtered Q7 Mean Coverage Depth',
                'Filtered Q10 Mean Coverage Depth',
                'Filtered Q17 Mean Coverage Depth',
                'Filtered Q20 Mean Coverage Depth',
                'Filtered Q47 Mean Coverage Depth']

    # init
    for key in fixedkeys:
        value_out = 'unknown'
        config_out.set('global', key, value_out)
    for key in numberkeys:
        value_out = 0
        config_out.set('global', key, int(value_out))
    for key in maxkeys:
        value_out = 0
        config_out.set('global', key, int(value_out))
    for key in meankeys:
        value_out = 0
        config_out.set('global', key, float(value_out))

    config_in = MyConfigParser()
    config_in.optionxform = str # don't convert to lowercase
    for i,subdir in enumerate(dirs):
        if isbadblock(subdir, "Merging alignment.summary"):
            continue
        alignmentfile=os.path.join(subdir, 'alignment.summary')
        if os.path.exists(alignmentfile):
            config_in.read(os.path.join(alignmentfile))

            for key in numberkeys:
                value_in = config_in.get('global',key)
                value_out = config_out.get('global', key)
                config_out.set('global', key, int(value_in) + int(value_out))
            for key in maxkeys:
                value_in = config_in.get('global',key)
                value_out = config_out.get('global', key)
                config_out.set('global', key, max(int(value_in),int(value_out)))
            for key in fixedkeys:
                value_in = config_in.get('global',key)
                value_out = config_out.get('global',key)
                #todo
                config_out.set('global', key, value_in)
            for key in meankeys:
                value_in = config_in.get('global',key)
                value_out = config_out.get('global', key)
                config_out.set('global', key, float(value_out)+float(value_in)/len(dirs))

         #              'Filtered Q17 Mean Coverage Depth' = 
         #                  'Filtered Mapped Bases in Q17 Alignments' / 'Genomesize';

        else:
            printtime("ERROR: skipped %s" % alignmentfile)


    with open('alignment.summary.merged', 'wb') as configfile:
        config_out.write(configfile)

    r = subprocess.call(["ln", "-s", os.path.join(ALIGNMENT_RESULTS,"alignment.summary.merged"), os.path.join(ALIGNMENT_RESULTS,"alignment.summary")])

    #########################################
    # Merge individual alignTable.txt files #
    #########################################
    printtime("Merging individual alignTable.txt files")

    table = 0
    header = None
    for subdir in dirs:
        if isbadblock(subdir, "Merging alignTable.txt"):
            continue
        alignTableFile = os.path.join(subdir,'alignTable.txt')
        if os.path.exists(alignTableFile):
            if header is None:
                header = numpy.loadtxt(alignTableFile, dtype='string', comments='#')
            table += numpy.loadtxt(alignTableFile, dtype='int', comments='#',skiprows=1)
        else:
            printtime("ERROR: skipped %s" % alignTableFile)
    #fix first column
    table[:,0] = (header[1:,0])
    f_handle = open('alignTable.txt.merged', 'w')
    numpy.savetxt(f_handle, header[0][None], fmt='%s', delimiter='\t')
    numpy.savetxt(f_handle, table, fmt='%i', delimiter='\t')
    f_handle.close()

    r = subprocess.call(["ln", "-s", os.path.join(ALIGNMENT_RESULTS,"alignTable.txt.merged"), os.path.join(ALIGNMENT_RESULTS,"alignTable.txt")])


    #############################################
    # Merge alignment.summary (json)            #
    #############################################
    printtime("Merging  alignment.summary (json)")
    try:
        cmd = 'merge_alignment.summary.py'
        for subdir in dirs:
            if isbadblock(subdir, "Merging alignment.summary (json)"):
                continue
            alignmentfile=os.path.join(subdir, 'alignment.summary')
            if os.path.exists(alignmentfile):
                cmd = cmd + ' %s' % alignmentfile
            else:
                printtime("ERROR: skipped %s" % alignmentfile)
        cmd = cmd + ' > alignment.summary.json'
        printtime("DEBUG: Calling '%s'" % cmd)
        subprocess.call(cmd,shell=True)
    except:
        printtime("Merging alignment.summary (json) failed")


    #############################################
    # Merge alignTable.txt (json)               #
    #############################################
    printtime("Merging alignTable.txt (json)")
    try:
        cmd = 'merge_alignTable.py'
        for subdir in dirs:
            if isbadblock(subdir, "Merging alignTable.txt (json)"):
                continue
            alignstatsfile=os.path.join(subdir, 'alignTable.txt')
            if os.path.exists(alignstatsfile):
                cmd = cmd + ' %s' % alignstatsfile
            else:
                printtime("ERROR: skipped %s" % alignstatsfile)
        cmd = cmd + ' > alignTable.txt.json'
        printtime("DEBUG: Calling '%s'" % cmd)
        subprocess.call(cmd,shell=True)
    except:
        printtime("Merging alignTable.txt (json) failed")


    #############################################
    # Merge individual block bam files   #
    #############################################
    printtime("Merging bam files")
    try:
#        cmd = 'picard-tools MergeSamFiles'
        cmd = 'java -Xmx8g -jar /opt/picard/picard-tools-current/MergeSamFiles.jar'
        for subdir in dirs:
            if isbadblock(subdir, "Merging bam files"):
                continue
            bamfile = os.path.join(ALIGNMENT_RESULTS, subdir, "rawlib.bam")
            if os.path.exists(bamfile):
                cmd = cmd + ' I=%s' % bamfile
            else:
                printtime("ERROR: skipped %s" % bamfile)
        cmd = cmd + ' O=%s/%s_%s.bam' % (ALIGNMENT_RESULTS, env['expName'], env['resultsName'])
        cmd = cmd + ' ASSUME_SORTED=true'
        cmd = cmd + ' CREATE_INDEX=true'
        cmd = cmd + ' USE_THREADING=true'
        cmd = cmd + ' VALIDATION_STRINGENCY=LENIENT'
        printtime("DEBUG: Calling '%s'" % cmd)
        subprocess.call(cmd,shell=True)
    except:
        printtime("bam file merge failed")

    try:
        srcbaifilepath = '%s/%s_%s.bai' % (ALIGNMENT_RESULTS, env['expName'], env['resultsName'])
        dstbaifilepath = '%s/%s_%s.bam.bai' % (ALIGNMENT_RESULTS, env['expName'], env['resultsName'])
        if os.path.exists(srcbaifilepath):
            os.rename(srcbaifilepath, dstbaifilepath)
        else:
            printtime("ERROR: %s doesn't exists" % srcbaifilepath)
    except:
        traceback.print_exc()

    #remove symbolic links
    os.remove("alignment.summary")
    os.remove("alignTable.txt")

    ##################################################
    #Call alignStats on merged bam file              #
    ##################################################
    printtime("Call alignStats on merged bam file")

    try:
        cmd = "alignStats -i %s/%s_%s.bam" % (ALIGNMENT_RESULTS, env['expName'], env['resultsName'])
        cmd = cmd + " -g /results/referenceLibrary/%s/%s/%s.info.txt" % (env["tmap_version"],env["libraryName"], env["libraryName"])
        cmd = cmd + " -n 12 -l 20 -m 400 -q 7,10,17,20,47 -s 0 -a alignTable.txt"
        cmd = cmd + " --outputDir %s" % ALIGNMENT_RESULTS
        cmd = cmd + " 2>> " + os.path.join(ALIGNMENT_RESULTS, "alignStats_out.txt")
        printtime("DEBUG: Calling '%s'" % cmd)
        os.system(cmd)
    except:
        printtime("alignStats failed")
Example #11
0
def mergeSigProcResults(dirs, pathToRaw, skipchecksum, SIGPROC_RESULTS):
    #####################################################
    # Grab one of the processParameters.txt files       #
    #####################################################
    printtime("Merging processParameters.txt")

    for subdir in dirs:
        subdir = os.path.join(SIGPROC_RESULTS,subdir)
        ppfile = os.path.join(subdir,'processParameters.txt')
        printtime(ppfile)
        if os.path.isfile(ppfile):
            processParametersMerge.processParametersMerge(ppfile,True)
            break



    ########################################################
    # write composite return code                          #
    ########################################################
    composite_return_code=0

    for subdir in dirs:
        if subdir == "block_X0_Y9331":
            continue
        if subdir == "block_X14168_Y9331":
            continue
        if subdir == "block_X0_Y0":
            continue
        if subdir == "block_X14168_Y0":
            continue

        try:
            f = open(os.path.join(SIGPROC_RESULTS,subdir,"analysis_return_code.txt"), 'r')
            analysis_return_code = int(f.read(1))
            f.close()
            if analysis_return_code!=0:
                printtime("DEBUG: errors in %s " % subdir)
                composite_return_code=1
                break
        except:
            traceback.print_exc()

    csp = os.path.join(pathToRaw,'checksum_status.txt')
    if not os.path.exists(csp) and not skipchecksum and len(dirs)==96:
        printtime("DEBUG: create checksum_status.txt")
        try:
            os.umask(0002)
            f = open(csp, 'w')
            f.write(str(composite_return_code))
            f.close()
        except:
            traceback.print_exc()
    else:
        printtime("DEBUG: skip generation of checksum_status.txt")


    #################################################
    # Merge individual block bead metrics files     #
    #################################################
    printtime("Merging individual block bead metrics files")

    try:
        _tmpfile = os.path.join(SIGPROC_RESULTS,'bfmask.bin')
        cmd = 'BeadmaskMerge -i bfmask.bin -o ' + _tmpfile
        for subdir in dirs:
            subdir = os.path.join(SIGPROC_RESULTS,subdir)
            if isbadblock(subdir, "Merging individual block bead metrics files"):
                continue
            bfmaskbin = os.path.join(subdir,'bfmask.bin')
            if os.path.exists(bfmaskbin):
                cmd = cmd + ' %s' % subdir
            else:
                printtime("ERROR: skipped %s" % bfmaskbin)
        printtime("DEBUG: Calling '%s'" % cmd)
        subprocess.call(cmd,shell=True)
    except:
        printtime("BeadmaskMerge failed (test fragments)")



    ###############################################
    # Merge individual block bead stats files     #
    ###############################################
    printtime("Merging bfmask.stats files")

    try:
        bfmaskstatsfiles = []
        for subdir in dirs:
            subdir = os.path.join(SIGPROC_RESULTS,subdir)
            if isbadblock(subdir, "Merging bfmask.stats files"):
                continue
            bfmaskstats = os.path.join(subdir,'bfmask.stats')
            if os.path.exists(bfmaskstats):
                bfmaskstatsfiles.append(subdir)
            else:
                printtime("ERROR: Merging bfmask.stats files: skipped %s" % bfmaskstats)

        StatsMerge.main_merge(bfmaskstatsfiles, True)
        #TODO
        shutil.move('bfmask.stats', SIGPROC_RESULTS)
    except:
        printtime("No bfmask.stats files were found to merge")

    ###############################################
    # Merge individual block MaskBead files       #
    ###############################################
#    printtime("Merging MaskBead.mask files")
#
#    try:
#        bfmaskfolders = []
#        for subdir in dirs:
#            subdir = os.path.join(SIGPROC_RESULTS,subdir)
#            printtime("DEBUG: %s:" % subdir)
#
#            if isbadblock(subdir, "Merging MaskBead.mask files"):
#                continue
#
#            bfmaskbead = os.path.join(subdir,'MaskBead.mask')
#            if not os.path.exists(bfmaskbead):
#                printtime("ERROR: Merging MaskBead.mask files: skipped %s" % bfmaskbead)
#                continue
#
#            bfmaskfolders.append(subdir)
#
#        offset_str = "use_blocks"
#        MaskMerge.main_merge('MaskBead.mask', bfmaskfolders, merged_bead_mask_path, True, offset_str)
#    except:
#        printtime("Merging MaskBead.mask files failed")


    ########################################################
    #Make Bead Density Plots                               #
    ########################################################
    printtime("Make Bead Density Plots (composite report)")

    bfmaskPath = os.path.join(SIGPROC_RESULTS,'bfmask.bin')
    maskpath = os.path.join(SIGPROC_RESULTS,'MaskBead.mask')

    # skip if merged MaskBead.mask exists TODO
    printtime("generate MaskBead.mask")
    if os.path.isfile(bfmaskPath):
        com = "BeadmaskParse -m MaskBead %s" % bfmaskPath
        os.system(com)
        #TODO
        try:
            shutil.move('MaskBead.mask', maskpath)
        except:
            printtime("ERROR: MaskBead.mask already moved")
    else:
        printtime("Warning: %s doesn't exists." % bfmaskPath)

    printtime("generate graph")
    if os.path.exists(maskpath):
        try:
            # Makes Bead_density_contour.png
            beadDensityPlot.genHeatmap(maskpath, SIGPROC_RESULTS) # todo, takes too much time
  #          os.remove(maskpath)
        except:
            traceback.print_exc()
    else:
        printtime("Warning: no MaskBead.mask file exists.")
Example #12
0
def merge_basecaller_stats(dirs, BASECALLER_RESULTS):

    merge_datasets_basecaller_json(dirs, BASECALLER_RESULTS)

    ########################################################
    # write composite return code                          #
    ########################################################

    try:
        if len(dirs) == 96:
            composite_return_code = 96
            for subdir in dirs:

                blockstatus_return_code_file = os.path.join(subdir, "blockstatus.txt")
                if os.path.exists(blockstatus_return_code_file):

                    with open(blockstatus_return_code_file, "r") as f:
                        text = f.read()
                        if "Basecaller=0" in text:
                            composite_return_code -= 1
                        else:
                            with open(os.path.join(subdir, "sigproc_results", "analysis_return_code.txt"), "r") as g:
                                return_code_text = g.read()
                                if return_code_text == "3" and subdir in [
                                    "block_X0_Y0",
                                    "block_X14168_Y0",
                                    "block_X0_Y9324",
                                    "block_X14168_Y9324",
                                ]:
                                    printtime("INFO: suppress non-critical error in %s" % subdir)
                                    composite_return_code -= 1

            composite_return_code_file = os.path.join(BASECALLER_RESULTS, "composite_return_code.txt")
            if not os.path.exists(composite_return_code_file):
                printtime("DEBUG: create %s" % composite_return_code_file)
                os.umask(0002)
                f = open(composite_return_code_file, "a")
                f.write(str(composite_return_code))
                f.close()
            else:
                printtime("DEBUG: skip generation of %s" % composite_return_code_file)
    except:
        traceback.print_exc()

    ###############################################
    # Merge BaseCaller.json files                 #
    ###############################################
    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS, subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir, "BaseCaller.json")
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime("ERROR: Merging BaseCaller.json files: skipped %s" % basecallerjson)

        mergeBaseCallerJson.merge(basecallerfiles, BASECALLER_RESULTS)
    except:
        traceback.print_exc()
        printtime("Merging BaseCaller.json files failed")

    printtime("Finished merging basecaller stats")
Example #13
0
def merge_basecaller_stats(dirs, BASECALLER_RESULTS):

    merge_datasets_basecaller_json(dirs, BASECALLER_RESULTS)

    ########################################################
    # write composite return code                          #
    ########################################################

    try:
        if len(dirs) == 96:
            composite_return_code = 96
            for subdir in dirs:

                blockstatus_return_code_file = os.path.join(
                    subdir, "blockstatus.txt")
                if os.path.exists(blockstatus_return_code_file):

                    with open(blockstatus_return_code_file, 'r') as f:
                        text = f.read()
                        if 'Basecaller=0' in text:
                            composite_return_code -= 1
                        else:
                            with open(
                                    os.path.join(subdir, "sigproc_results",
                                                 "analysis_return_code.txt"),
                                    'r') as g:
                                return_code_text = g.read()
                                if return_code_text == "3" and subdir in [
                                        'block_X0_Y0', 'block_X14168_Y0',
                                        'block_X0_Y9324', 'block_X14168_Y9324'
                                ]:
                                    printtime(
                                        "INFO: suppress non-critical error in %s"
                                        % subdir)
                                    composite_return_code -= 1

            composite_return_code_file = os.path.join(
                BASECALLER_RESULTS, "composite_return_code.txt")
            if not os.path.exists(composite_return_code_file):
                printtime("DEBUG: create %s" % composite_return_code_file)
                os.umask(0002)
                f = open(composite_return_code_file, 'a')
                f.write(str(composite_return_code))
                f.close()
            else:
                printtime("DEBUG: skip generation of %s" %
                          composite_return_code_file)
    except:
        traceback.print_exc()

    ###############################################
    # Merge BaseCaller.json files                 #
    ###############################################
    printtime("Merging BaseCaller.json files")

    try:
        basecallerfiles = []
        for subdir in dirs:
            subdir = os.path.join(BASECALLER_RESULTS, subdir)
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging BaseCaller.json files"):
                continue
            basecallerjson = os.path.join(subdir, 'BaseCaller.json')
            if os.path.exists(basecallerjson):
                basecallerfiles.append(subdir)
            else:
                printtime("ERROR: Merging BaseCaller.json files: skipped %s" %
                          basecallerjson)

        mergeBaseCallerJson.merge(basecallerfiles, BASECALLER_RESULTS)
    except:
        traceback.print_exc()
        printtime("Merging BaseCaller.json files failed")

    printtime("Finished merging basecaller stats")
Example #14
0
def mergeSigProcResults(dirs, SIGPROC_RESULTS, plot_title):

    bfmaskPath = os.path.join(SIGPROC_RESULTS,'analysis.bfmask.bin')
    bfmaskstatspath = os.path.join(SIGPROC_RESULTS,'analysis.bfmask.stats')

    ########################################################
    # write composite return code                          #
    ########################################################

    try:
        if len(dirs)==96:
            composite_return_code=96
            for subdir in dirs:

                blockstatus_return_code_file = os.path.join(subdir,"blockstatus.txt")
                if os.path.exists(blockstatus_return_code_file):

                    with open(blockstatus_return_code_file, 'r') as f:
                        text = f.read()
                        if 'Analysis=0' in text:
                            composite_return_code-=1

            composite_return_code_file = os.path.join(SIGPROC_RESULTS,"analysis_return_code.txt")
            if not os.path.exists(composite_return_code_file):
                printtime("DEBUG: create %s" % composite_return_code_file)
                os.umask(0002)
                f = open(composite_return_code_file, 'a')
                f.write(str(composite_return_code))
                f.close()
            else:
                printtime("DEBUG: skip generation of %s" % composite_return_code_file)
    except:
        traceback.print_exc()

    #################################################
    # Merge individual block bead metrics files     #
    #################################################
    printtime("Merging individual block bead metrics files")

    try:
        cmd = 'BeadmaskMerge -i analysis.bfmask.bin -o ' + bfmaskPath
        for subdir in dirs:
            subdir = os.path.join(SIGPROC_RESULTS,subdir)
            if isbadblock(subdir, "Merging individual block bead metrics files"):
                continue
            bfmaskbin = os.path.join(subdir,'analysis.bfmask.bin')
            if os.path.exists(bfmaskbin):
                cmd = cmd + ' %s' % subdir
            else:
                printtime("ERROR: skipped %s" % bfmaskbin)
        printtime("DEBUG: Calling '%s'" % cmd)
        subprocess.call(cmd,shell=True)
    except:
        printtime("BeadmaskMerge failed")



    ###############################################
    # Merge individual block bead stats files     #
    ###############################################
    printtime("Merging analysis.bfmask.stats files")

    try:
        bfmaskstatsfiles = []
        for subdir in dirs:
            subdir = os.path.join(SIGPROC_RESULTS,subdir)
            if isbadblock(subdir, "Merging analysis.bfmask.stats files"):
                continue
            bfmaskstats = os.path.join(subdir,'analysis.bfmask.stats')
            if os.path.exists(bfmaskstats):
                bfmaskstatsfiles.append(bfmaskstats)
            else:
                printtime("ERROR: Merging bfmask.stats files: skipped %s" % bfmaskstats)

        StatsMerge.main_merge(bfmaskstatsfiles, bfmaskstatspath, True)
    except:
        printtime("ERROR: No analysis.bfmask.stats files were found to merge")
        traceback.print_exc()


    ########################################################
    #Make Bead Density Plots                               #
    ########################################################
    printtime("Make Bead Density Plots (composite report)")

    printtime("DEBUG: generate composite heatmap")
    if os.path.exists(bfmaskPath):
        try:
            # Makes Bead_density_contour.png, TODO have to read multiple blocks
            beadDensityPlot.genHeatmap(bfmaskPath, bfmaskstatspath, SIGPROC_RESULTS, plot_title)
        except:
            traceback.print_exc()
    else:
        printtime("Warning: no heatmap generated.")


    ###############################################
    # Merge raw_peak_signal files                 #
    ###############################################
    printtime("Merging raw_peak_signal files")

    try:
        raw_peak_signal_files = []
        for subdir in dirs:
            printtime("DEBUG: %s:" % subdir)
            if isbadblock(subdir, "Merging raw_peak_signal files"):
                continue
            raw_peak_signal_file = os.path.join(subdir,'raw_peak_signal')
            if os.path.exists(raw_peak_signal_file):
                raw_peak_signal_files.append(raw_peak_signal_file)
            else:
                printtime("ERROR: Merging raw_peak_signal files: skipped %s" % raw_peak_signal_file)
        composite_raw_peak_signal_file = "raw_peak_signal"
        blockprocessing.merge_raw_key_signals(raw_peak_signal_files, composite_raw_peak_signal_file)
    except:
        printtime("Merging raw_peak_signal files failed")


    printtime("Finished sigproc merging")