Esempio n. 1
0
def alignment_post_processing(BASECALLER_RESULTS, ALIGNMENT_RESULTS, flows,
                              mark_duplicates, force_alignstats):

    datasets_basecaller = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS, "datasets_basecaller.json"),
                 'r')
        datasets_basecaller = json.load(f)
        f.close()
    except:
        printtime("ERROR: problem parsing %s" %
                  os.path.join(BASECALLER_RESULTS, "datasets_basecaller.json"))
        traceback.print_exc()
        return

    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 400

    input_prefix_list = []

    for dataset in datasets_basecaller["datasets"]:
        if not os.path.exists(
                os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])):
            continue

        printtime("Barcode processing, rename")
        src = os.path.join(ALIGNMENT_RESULTS,
                           dataset['file_prefix'] + '.alignment.summary')
        if os.path.exists(src):
            input_prefix_list.append(
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix'] + '.'))
            #terrible hack to make aggregate_alignment happy
            X_name = 'nomatch'
            read_group = dataset['read_groups'][0]
            if 'barcode_name' in datasets_basecaller['read_groups'][
                    read_group]:
                X_name = datasets_basecaller['read_groups'][read_group][
                    'barcode_name']
            dst = os.path.join(ALIGNMENT_RESULTS,
                               'alignment_%s.summary' % X_name)
            try:
                os.symlink(os.path.relpath(src, os.path.dirname(dst)), dst)
            except:
                printtime("ERROR: Unable to symlink '%s' to '%s'" % (src, dst))

    # Special legacy post-processing.
    # Generate merged rawlib.bam on barcoded runs

    composite_bam_filename = os.path.join(ALIGNMENT_RESULTS, 'rawlib.bam')
    if not os.path.exists(composite_bam_filename):

        bam_file_list = []
        for dataset in datasets_basecaller["datasets"]:
            bam_name = os.path.join(
                ALIGNMENT_RESULTS,
                os.path.basename(dataset['file_prefix']) + '.bam')
            if os.path.exists(bam_name):
                bam_file_list.append(bam_name)

        blockprocessing.merge_bam_files(bam_file_list, composite_bam_filename,
                                        composite_bam_filename + '.bai',
                                        mark_duplicates)
        force_alignstats = True

    if force_alignstats:
        ## Generate data for error plot for barcoded run from composite bam
        printtime("Call alignStats to generate raw accuracy")
        try:
            cmd = "alignStats"
            cmd += " -n 12"
            cmd += " --alignSummaryFile alignStats_err.txt"
            cmd += " --alignSummaryJsonFile alignStats_err.json"
            cmd += " --alignSummaryMinLen  1"
            #cmd += " --alignSummaryMaxLen  %s" % str(int(graph_max_x))
            cmd += " --alignSummaryMaxLen  %s" % str(int(400))
            cmd += " --alignSummaryLenStep 1"
            cmd += " --alignSummaryMaxErr  10"
            cmd += " --infile %s" % composite_bam_filename
            cmd = cmd + " --outputDir %s" % ALIGNMENT_RESULTS
            printtime("DEBUG: Calling '%s'" % cmd)
            os.system(cmd)
        except:
            printtime("alignStats failed")

    mergeAlignStatsResults(input_prefix_list, ALIGNMENT_RESULTS + "/")

    try:
        base_error_plot.generate_base_error_plot(
            os.path.join(ALIGNMENT_RESULTS, 'alignStats_err.json'),
            os.path.join(ALIGNMENT_RESULTS, 'base_error_plot.png'),
            int(graph_max_x))
        ionstats_plots.alignment_rate_plot(
            os.path.join(ALIGNMENT_RESULTS, 'alignStats_err.json'),
            os.path.join(BASECALLER_RESULTS, 'ionstats_basecaller.json'),
            os.path.join(ALIGNMENT_RESULTS, 'alignment_rate_plot.png'),
            int(graph_max_x))

        # Create aligned histogram plot

        # Create AQ20 plot

        printtime("Base error plot has been created successfully")
    except:
        printtime("ERROR: Failed to generate base error plot")
        traceback.print_exc()

    # Generate alignment_barcode_summary.csv
    barcodelist_path = 'barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../../barcodeList.txt'
    if os.path.exists(barcodelist_path):
        printtime("Barcode processing, aggregate")
        aggregate_alignment("./", barcodelist_path)

    # These graphs are likely obsolete
    makeAlignGraphs()
Esempio n. 2
0
def merge_alignment_stats(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS, flows):

    datasets_json = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS, "datasets_basecaller.json"),
                 'r')
        datasets_json = json.load(f)
        f.close()
    except:
        printtime("ERROR: problem parsing %s" %
                  os.path.join(BASECALLER_RESULTS, "datasets_basecaller.json"))
        traceback.print_exc()
        return

    for dataset in datasets_json['datasets']:

        # What needs merging:
        #  - alignment.summary
        #  - alignTable.txt
        # Some time in the future:
        #  - alignStats_err.json

        # Merge alignStats metrics
        try:
            input_prefix_list = [
                os.path.join(dir, ALIGNMENT_RESULTS,
                             dataset['file_prefix'] + '.') for dir in dirs
            ]
            input_prefix_list = [
                prefix for prefix in input_prefix_list
                if os.path.exists(prefix + 'alignment.summary')
            ]
            composite_prefix = os.path.join(ALIGNMENT_RESULTS,
                                            dataset['file_prefix'] + '.')
            if input_prefix_list:
                mergeAlignStatsResults(input_prefix_list, composite_prefix)
            else:
                printtime("Nothing to merge: " + dataset['file_prefix'])
        except:
            printtime("ERROR: merging %s stats unsuccessful" %
                      (dataset['file_prefix'] + '.bam'))

    datasets_basecaller = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS, "datasets_basecaller.json"),
                 'r')
        datasets_basecaller = json.load(f)
        f.close()
    except:
        printtime("ERROR: problem parsing %s" %
                  os.path.join(BASECALLER_RESULTS, "datasets_basecaller.json"))
        traceback.print_exc()
        return

    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 400

    input_prefix_list = []

    for dataset in datasets_basecaller["datasets"]:
        printtime("Barcode processing, rename")
        src = os.path.join(ALIGNMENT_RESULTS,
                           dataset['file_prefix'] + '.alignment.summary')
        if os.path.exists(src):
            input_prefix_list.append(
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix'] + '.'))
            #terrible hack to make aggregate_alignment happy
            X_name = 'nomatch'
            read_group = dataset['read_groups'][0]
            if 'barcode_name' in datasets_basecaller['read_groups'][
                    read_group]:
                X_name = datasets_basecaller['read_groups'][read_group][
                    'barcode_name']
            dst = os.path.join(ALIGNMENT_RESULTS,
                               'alignment_%s.summary' % X_name)
            try:
                os.symlink(os.path.relpath(src, os.path.dirname(dst)), dst)
            except:
                printtime("ERROR: Unable to symlink '%s' to '%s'" % (src, dst))

    # Merge alignStats_err.json right here!

    merged_align_stats = {}
    align_stats_num_bases = 400
    for dir in dirs:
        current_align_stats = {}
        try:
            f = open(
                os.path.join(dir, ALIGNMENT_RESULTS, 'alignStats_err.json'),
                'r')
            current_align_stats = json.load(f)
            f.close()
        except:
            printtime(
                "Merge alignStats_err.json: skipping %s" %
                os.path.join(dir, ALIGNMENT_RESULTS, 'alignStats_err.json'))
            continue

        if not merged_align_stats:
            merged_align_stats = current_align_stats
            align_stats_num_bases = len(
                merged_align_stats.get("read_length", []))
            continue

        for idx in range(align_stats_num_bases):
            merged_align_stats['nread'][idx] += current_align_stats['nread'][
                idx]
            merged_align_stats['unaligned'][idx] += current_align_stats[
                'unaligned'][idx]
            merged_align_stats['filtered'][idx] += current_align_stats[
                'filtered'][idx]
            merged_align_stats['clipped'][idx] += current_align_stats[
                'clipped'][idx]
            merged_align_stats['aligned'][idx] += current_align_stats[
                'aligned'][idx]
            merged_align_stats['n_err_at_position'][
                idx] += current_align_stats['n_err_at_position'][idx]
            merged_align_stats['cum_aligned'][idx] += current_align_stats[
                'cum_aligned'][idx]
            merged_align_stats['cum_err_at_position'][
                idx] += current_align_stats['cum_err_at_position'][idx]

        merged_align_stats['accuracy_total_bases'] += current_align_stats[
            'accuracy_total_bases']
        merged_align_stats['accuracy_total_errors'] += current_align_stats[
            'accuracy_total_errors']
        merged_align_stats['total_mapped_target_bases'] += current_align_stats[
            'total_mapped_target_bases']
        merged_align_stats['total_mapped_reads'] += current_align_stats[
            'total_mapped_reads']

    try:
        f = open(os.path.join(ALIGNMENT_RESULTS, 'alignStats_err.json'), "w")
        json.dump(merged_align_stats, f, indent=4)
        f.close()
    except:
        printtime("ERROR; Failed to write merged alignStats_err.json")
        traceback.print_exc()

    mergeAlignStatsResults(input_prefix_list, ALIGNMENT_RESULTS + "/")

    try:
        base_error_plot.generate_base_error_plot(
            os.path.join(ALIGNMENT_RESULTS, 'alignStats_err.json'),
            os.path.join(ALIGNMENT_RESULTS, 'base_error_plot.png'),
            int(graph_max_x))

        ionstats_plots.alignment_rate_plot(
            os.path.join(ALIGNMENT_RESULTS, 'alignStats_err.json'),
            os.path.join(BASECALLER_RESULTS, 'ionstats_basecaller.json'),
            os.path.join(ALIGNMENT_RESULTS, 'alignment_rate_plot.png'),
            int(graph_max_x))

        printtime("Base error plot has been created successfully")
    except:
        printtime("ERROR: Failed to generate base error plot")
        traceback.print_exc()

    # Generate alignment_barcode_summary.csv
    barcodelist_path = 'barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../../barcodeList.txt'
    if os.path.exists(barcodelist_path):
        printtime("Barcode processing, aggregate")
        aggregate_alignment("./", barcodelist_path)
Esempio n. 3
0
def alignment_post_processing(
        libraryName,
        BASECALLER_RESULTS,
        ALIGNMENT_RESULTS,
        flows,
        mark_duplicates):


    datasets_basecaller = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"),'r')
        datasets_basecaller = json.load(f)
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"))
        traceback.print_exc()
        return

    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 800



    alignment_file_list = []

    for dataset in datasets_basecaller["datasets"]:
        if not os.path.exists(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])):
            continue

        ionstats.generate_ionstats_alignment(
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam'),
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'),
                graph_max_x)
        ionstats2alignstats(libraryName,
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'),
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.alignment.summary'))

        alignment_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'))

    # In Progress: merge ionstats alignment results
    ionstats.reduce_stats(alignment_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'))    
    ionstats2alignstats(libraryName,
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            os.path.join(ALIGNMENT_RESULTS,'alignment.summary'))

    # Special legacy post-processing.
    # Generate merged rawlib.bam on barcoded runs

    composite_bam_filename = os.path.join(ALIGNMENT_RESULTS,'rawlib.bam')
    if not os.path.exists(composite_bam_filename):

        bam_file_list = []
        for dataset in datasets_basecaller["datasets"]:
            bam_name = os.path.join(ALIGNMENT_RESULTS,os.path.basename(dataset['file_prefix'])+'.bam')
            if os.path.exists(bam_name):
                bam_file_list.append(bam_name)

        blockprocessing.merge_bam_files(bam_file_list,composite_bam_filename,composite_bam_filename+'.bai',mark_duplicates)

    # Generate alignment_barcode_summary.csv
    #TODO: use datasets_basecaller.json + *.ionstats_alignment.json instead of barcodeList.txt and alignment_*.summary
    barcodelist_path = 'barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../../barcodeList.txt'
    if os.path.exists(barcodelist_path):
        printtime("Barcode processing, aggregate")
        aggregate_alignment ("./",barcodelist_path)

    # These graphs are likely obsolete
    #makeAlignGraphs()

    # In Progress: Use ionstats alignment results to generate plots
    ionstats_plots.alignment_rate_plot2(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'alignment_rate_plot.png', graph_max_x)
    ionstats_plots.base_error_plot(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'base_error_plot.png', graph_max_x)
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'Filtered_Alignments_Q10.png', 'AQ10', 'red')
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'Filtered_Alignments_Q17.png', 'AQ17', 'yellow')
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'Filtered_Alignments_Q20.png', 'AQ20', 'green')
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 
            'Filtered_Alignments_Q47.png', 'AQ47', 'purple')
Esempio n. 4
0
def merge_alignment_stats(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS, flows):

    datasets_basecaller = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"),'r')
        datasets_basecaller = json.load(f)
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"))
        traceback.print_exc()
        return



    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 800




    ########################################################
    # Merge ionstats_alignment.json
    # First across blocks, then across barcoded
    ########################################################

    try:
        composite_filename_list = []
        for dataset in datasets_basecaller["datasets"]:
            composite_filename = os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json')
            barcode_filename_list = [os.path.join(dir,ALIGNMENT_RESULTS,dataset['file_prefix']+'.ionstats_alignment.json') for dir in dirs]
            barcode_filename_list = [filename for filename in barcode_filename_list if os.path.exists(filename)]
            ionstats.reduce_stats(barcode_filename_list,composite_filename)
            if os.path.exists(composite_filename):
                composite_filename_list.append(composite_filename)

        ionstats.reduce_stats(composite_filename_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'))
    except:
        printtime("ERROR: Failed to merge ionstats_alignment.json")
        traceback.print_exc()

    # Use ionstats alignment results to generate plots
    ionstats_plots.alignment_rate_plot2(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'alignment_rate_plot.png', graph_max_x)
    ionstats_plots.base_error_plot(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'base_error_plot.png', graph_max_x)
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'Filtered_Alignments_Q10.png', 'AQ10', 'red')
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'Filtered_Alignments_Q17.png', 'AQ17', 'yellow')
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'Filtered_Alignments_Q20.png', 'AQ20', 'green')
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 
            'Filtered_Alignments_Q47.png', 'AQ47', 'purple')



    # Generate alignment_barcode_summary.csv
    barcodelist_path = 'barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../../barcodeList.txt'
    if os.path.exists(barcodelist_path):
        printtime("Barcode processing, aggregate")
        aggregate_alignment ("./",barcodelist_path)
Esempio n. 5
0
def merge_alignment_stats(dirs, BASECALLER_RESULTS, ALIGNMENT_RESULTS, flows):
    
    datasets_json = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"),'r')
        datasets_json = json.load(f);
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"))
        traceback.print_exc()
        return
    
    for dataset in datasets_json['datasets']:

        # What needs merging:
        #  - alignment.summary
        #  - alignTable.txt
        # Some time in the future:
        #  - alignStats_err.json

        # Merge alignStats metrics
        try:
            input_prefix_list = [os.path.join(dir,ALIGNMENT_RESULTS, dataset['file_prefix']+'.') for dir in dirs]
            input_prefix_list = [prefix for prefix in input_prefix_list if os.path.exists(prefix+'alignment.summary')]
            composite_prefix = os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.')
            if input_prefix_list:
                mergeAlignStatsResults(input_prefix_list,composite_prefix)
            else:
                printtime("Nothing to merge: "+dataset['file_prefix'])
        except:
            printtime("ERROR: merging %s stats unsuccessful" % (dataset['file_prefix']+'.bam'))
    

    datasets_basecaller = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"),'r')
        datasets_basecaller = json.load(f);
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"))
        traceback.print_exc()
        return

    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 400

    

    input_prefix_list = []

    for dataset in datasets_basecaller["datasets"]:
        printtime("Barcode processing, rename")
        src = os.path.join(ALIGNMENT_RESULTS,dataset['file_prefix']+'.alignment.summary')
        if os.path.exists(src):
            input_prefix_list.append(os.path.join(ALIGNMENT_RESULTS,dataset['file_prefix']+'.'))
            #terrible hack to make aggregate_alignment happy
            X_name = 'nomatch'
            read_group = dataset['read_groups'][0]
            if 'barcode_name' in datasets_basecaller['read_groups'][read_group]:
                X_name = datasets_basecaller['read_groups'][read_group]['barcode_name']
            dst = os.path.join(ALIGNMENT_RESULTS, 'alignment_%s.summary' % X_name)
            try:
                os.symlink(os.path.relpath(src,os.path.dirname(dst)),dst)
            except:
                printtime("ERROR: Unable to symlink '%s' to '%s'" % (src, dst))


    # Merge alignStats_err.json right here!

    merged_align_stats = {}
    align_stats_num_bases = 400
    for dir in dirs:
        current_align_stats = {}
        try:
            f = open(os.path.join(dir,ALIGNMENT_RESULTS,'alignStats_err.json'),'r')
            current_align_stats = json.load(f);
            f.close()
        except:
            printtime("Merge alignStats_err.json: skipping %s" % os.path.join(dir,ALIGNMENT_RESULTS,'alignStats_err.json'))
            continue
        
        if not merged_align_stats:
            merged_align_stats = current_align_stats
            align_stats_num_bases = len(merged_align_stats.get("read_length",[]))
            continue
        
        for idx in range(align_stats_num_bases):
            merged_align_stats['nread'][idx] += current_align_stats['nread'][idx]
            merged_align_stats['unaligned'][idx] += current_align_stats['unaligned'][idx]
            merged_align_stats['filtered'][idx] += current_align_stats['filtered'][idx]
            merged_align_stats['clipped'][idx] += current_align_stats['clipped'][idx]
            merged_align_stats['aligned'][idx] += current_align_stats['aligned'][idx]
            merged_align_stats['n_err_at_position'][idx] += current_align_stats['n_err_at_position'][idx]
            merged_align_stats['cum_aligned'][idx] += current_align_stats['cum_aligned'][idx]
            merged_align_stats['cum_err_at_position'][idx] += current_align_stats['cum_err_at_position'][idx]

        merged_align_stats['accuracy_total_bases'] += current_align_stats['accuracy_total_bases']
        merged_align_stats['accuracy_total_errors'] += current_align_stats['accuracy_total_errors']
        merged_align_stats['total_mapped_target_bases'] += current_align_stats['total_mapped_target_bases']
        merged_align_stats['total_mapped_reads'] += current_align_stats['total_mapped_reads']
            
        
    try:
        f = open(os.path.join(ALIGNMENT_RESULTS,'alignStats_err.json'),"w")
        json.dump(merged_align_stats, f, indent=4)
        f.close()
    except:
        printtime("ERROR; Failed to write merged alignStats_err.json")
        traceback.print_exc()
        
        
        
    mergeAlignStatsResults(input_prefix_list,ALIGNMENT_RESULTS+"/")

    try:
        base_error_plot.generate_base_error_plot(
            os.path.join(ALIGNMENT_RESULTS,'alignStats_err.json'),
            os.path.join(ALIGNMENT_RESULTS,'base_error_plot.png'),int(graph_max_x))
        base_error_plot.generate_alignment_rate_plot(
            os.path.join(ALIGNMENT_RESULTS,'alignStats_err.json'),
            os.path.join(BASECALLER_RESULTS,'readLen.txt'),
            os.path.join(ALIGNMENT_RESULTS,'alignment_rate_plot.png'),int(graph_max_x))

        
        printtime("Base error plot has been created successfully")
    except:
        printtime("ERROR: Failed to generate base error plot")
        traceback.print_exc()

    # Generate alignment_barcode_summary.csv
    barcodelist_path = 'barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../../barcodeList.txt'
    if os.path.exists(barcodelist_path):
        printtime("Barcode processing, aggregate")
        aggregate_alignment ("./",barcodelist_path)
Esempio n. 6
0
def alignment_post_processing(
        BASECALLER_RESULTS,
        ALIGNMENT_RESULTS,
        flows,
        mark_duplicates,
        force_alignstats):


    datasets_basecaller = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"),'r')
        datasets_basecaller = json.load(f);
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"))
        traceback.print_exc()
        return

    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 400

    

    input_prefix_list = []

    for dataset in datasets_basecaller["datasets"]:
        if not os.path.exists(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])):
            continue

        printtime("Barcode processing, rename")
        src = os.path.join(ALIGNMENT_RESULTS,dataset['file_prefix']+'.alignment.summary')
        if os.path.exists(src):
            input_prefix_list.append(os.path.join(ALIGNMENT_RESULTS,dataset['file_prefix']+'.'))
            #terrible hack to make aggregate_alignment happy
            X_name = 'nomatch'
            read_group = dataset['read_groups'][0]
            if 'barcode_name' in datasets_basecaller['read_groups'][read_group]:
                X_name = datasets_basecaller['read_groups'][read_group]['barcode_name']
            dst = os.path.join(ALIGNMENT_RESULTS, 'alignment_%s.summary' % X_name)
            try:
                os.symlink(os.path.relpath(src,os.path.dirname(dst)),dst)
            except:
                printtime("ERROR: Unable to symlink '%s' to '%s'" % (src, dst))

        printtime("Creating legacy name links")
        if dataset.has_key('legacy_prefix'):
            link_src = [
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam'),
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam.bai')]
            link_dst = [
                os.path.join(ALIGNMENT_RESULTS, os.path.basename(dataset['legacy_prefix'])+'.bam'),
                os.path.join(ALIGNMENT_RESULTS, os.path.basename(dataset['legacy_prefix'])+'.bam.bai')]
            for (src,dst) in zip(link_src,link_dst):
                try:
                    os.symlink(os.path.relpath(src,os.path.dirname(dst)),dst)
                except:
                    printtime("ERROR: Unable to symlink '%s' to '%s'" % (src, dst))

    # Special legacy post-processing.
    # Generate merged rawlib.basecaller.bam and rawlib.sff on barcoded runs

    composite_bam_filename = os.path.join(ALIGNMENT_RESULTS,'rawlib.bam')
    if not os.path.exists(composite_bam_filename):

        bam_file_list = []
        for dataset in datasets_basecaller["datasets"]:
            bam_name = os.path.join(ALIGNMENT_RESULTS,os.path.basename(dataset['file_prefix'])+'.bam')
            if os.path.exists(bam_name):
                bam_file_list.append(bam_name)

        blockprocessing.merge_bam_files(bam_file_list,composite_bam_filename,composite_bam_filename+'.bai',mark_duplicates)
        force_alignstats = True

    if force_alignstats:        
        ## Generate data for error plot for barcoded run from composite bam
        printtime("Call alignStats to generate raw accuracy")
        try:
            cmd = "alignStats"
            cmd += " -n 12"
            cmd += " --alignSummaryFile alignStats_err.txt"
            cmd += " --alignSummaryJsonFile alignStats_err.json"
            cmd += " --alignSummaryMinLen  1"
            #cmd += " --alignSummaryMaxLen  %s" % str(int(graph_max_x))
            cmd += " --alignSummaryMaxLen  %s" % str(int(400))
            cmd += " --alignSummaryLenStep 1"
            cmd += " --alignSummaryMaxErr  10"
            cmd += " --infile %s" % composite_bam_filename
            cmd = cmd + " --outputDir %s" % ALIGNMENT_RESULTS
            printtime("DEBUG: Calling '%s'" % cmd)
            os.system(cmd)
        except:
            printtime("alignStats failed")


    mergeAlignStatsResults(input_prefix_list,ALIGNMENT_RESULTS+"/")

    try:
        base_error_plot.generate_base_error_plot(
            os.path.join(ALIGNMENT_RESULTS,'alignStats_err.json'),
            os.path.join(ALIGNMENT_RESULTS,'base_error_plot.png'),int(graph_max_x))
        base_error_plot.generate_alignment_rate_plot(
            os.path.join(ALIGNMENT_RESULTS,'alignStats_err.json'),
            os.path.join(BASECALLER_RESULTS,'readLen.txt'),
            os.path.join(ALIGNMENT_RESULTS,'alignment_rate_plot.png'),int(graph_max_x))

        # Create aligned histogram plot
        
        # Create AQ20 plot
        
        printtime("Base error plot has been created successfully")
    except:
        printtime("ERROR: Failed to generate base error plot")
        traceback.print_exc()

    # Generate alignment_barcode_summary.csv
    barcodelist_path = 'barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../../barcodeList.txt'
    if os.path.exists(barcodelist_path):
        printtime("Barcode processing, aggregate")
        aggregate_alignment ("./",barcodelist_path)

    # These graphs are likely obsolete
    makeAlignGraphs()
Esempio n. 7
0
def align_barcodes(
            sammeta,
            libsff_path,
            align_full,
            sam_parsed,
            bidirectional,
            libraryName,
            DIR_BC_FILES,
            flows,
            aligner_opts_extra,
            mark_duplicates,
            ALIGNMENT_RESULTS,
            outBaseName=''
            ):
    printtime("Renaming non-barcoded alignment results to 'comprehensive'")
    files = [ 'alignment.summary',
              'alignmentQC_out.txt',
              'alignTable.txt',
            ]
    for fname in files:
        if os.path.exists(fname):
            try:
                #if os.path.exists(fname):
                #   os.rename(fname, fname + ".comprehensive")
                shutil.copyfile(fname, fname + ".comprehensive")
            except:
                printtime('ERROR copying %s' % fname)
                traceback.print_exc()

    printtime("STARTING BARCODE ALIGNMENTS")

    barcodelist_path = 'barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        printtime('ERROR: barcodeList.txt not found')
    barcodeList = parse_bcfile(barcodelist_path)

    align_full = True

    (head,tail) = os.path.split(libsff_path)
    for bcid in (x['id_str'] for x in barcodeList):
        sffName = os.path.join(DIR_BC_FILES,"%s_%s" % (bcid, tail))
        print "sffName: "+sffName
        if os.path.exists(sffName):
            printtime("Barcode processing for '%s': %s" % (bcid, sffName))
        else:
            printtime("No barcode SFF file found for '%s': %s" % (bcid, sffName))
            continue

        align_full_chip(
                sammeta,
                sffName,
                align_full,
                1,
                True,
                sam_parsed,
                bidirectional,
                libraryName,
                flows,
                aligner_opts_extra,
                mark_duplicates,
                ALIGNMENT_RESULTS,
                outBaseName)

        #rename each output file based on barcode found in fastq filename
        #but ignore the comprehensive fastq output files
        printtime("Barcode processing, rename")
        if os.path.exists('alignment.summary'):
            try:
                fname='alignment_%s.summary' % bcid
                os.rename('alignment.summary', fname)
#                os.rename(fname,os.path.join(DIR_BC_FILES,fname))
                fname='alignmentQC_out_%s.txt' % bcid
                os.rename('alignmentQC_out.txt', fname)
#                os.rename(fname,os.path.join(DIR_BC_FILES,fname))
                fname='alignTable_%s.txt' % bcid
                os.rename('alignTable.txt', fname)
#                os.rename(fname,os.path.join(DIR_BC_FILES,fname))

            except:
                printtime('error renaming')
                traceback.print_exc()

    #rename comprehensive results back to default names
    for fname in files:
        if os.path.exists(fname + '.comprehensive'):
            #    os.rename(fname + '.comprehensive', fname)
            try:
                shutil.copyfile(fname + '.comprehensive', fname)
            except:
                printtime('ERROR copying %s' % fname + '.comprehensive')
                traceback.print_exc()

    printtime("Barcode processing, aggreagate")
    aggregate_alignment ("./",barcodelist_path)
Esempio n. 8
0
def align_full_chip_core(libsff, libKey, tfKey, floworder, fastqName, align_full, graph_max_x, do_barcode, make_align_graphs, sam_parsed, DIR_BC_FILES, env, outputdir):
    #collect all the meta data for the SAM file
    SAM_META = {}

    # id - this hash comes from the fastq file
    try:
        #read the first line of the fastq file
        fastqFile = open(fastqName,'r')
        id_hash = fastqFile.readline()
        fastqFile.close()

        #now just pull out the hash
        id_hash = id_hash[1:6]

        SAM_META['ID'] = id_hash

    except IOError:
        printtime("Could not read fastq file.  The ID for the SAM file could not be found.")

    # sm - name for reads - project name
    SAM_META['SM'] = env['project']

    # lb - library name
    SAM_META['LB'] = env['libraryName']

    # pu - the platform unit
    SAM_META['PU'] = "PGM/" + env['chipType'].replace('"',"")

    SAM_META['PL'] = "IONTORRENT"

    #TODO: do not assume localhost.  Find the name of the masternode
    try:
        #this will get the exp data from the database
        exp_json = json.loads(env['exp_json'])

        # ds - the "notes", only the alphanumeric and space characters.
        SAM_META['DS'] = ''.join(ch for ch in exp_json['notes'] if ch.isalnum() or ch == " ")

        # dt - the run date
        exp_log_json = json.loads(exp_json['log'])
        iso_exp_time = exp_log_json['start_time']

        #convert to ISO time
        iso_exp_time = dateutil.parser.parse(iso_exp_time)

        SAM_META['DT'] = iso_exp_time.isoformat()

        #the site name should be here, also remove spaces
        site_name = env['site_name']
        site_name = ''.join(ch for ch in site_name if ch.isalnum() )
        SAM_META['CN'] = site_name

        env['flows'] = exp_json['flows']

    except:
        printtime("There was an error getting the site name, because the Torrent Browser could not be contacted")
        traceback.print_exc()

    #Now build the SAM meta data arg string
    aligner_opts_rg= '--aligner-opts-rg "'
    aligner_opts_extra = ''
    if sam_parsed:
        aligner_opts_extra += ' -p 1'
    if env['aligner_opts_extra']:
        print '  found extra alignment options: "%s"' % env['aligner_opts_extra']
        aligner_opts_extra = ' --aligner-opts-extra "'
        aligner_opts_extra += env['aligner_opts_extra'] + '"'
    first = True
    for key, value in SAM_META.items():
        if value:
            sam_arg =  r'-R \"'
            end =  r'\"'

            sam_arg = sam_arg + key + ":" + value + end

            if first:
                aligner_opts_rg = aligner_opts_rg + sam_arg
                first = False
            else:
                aligner_opts_rg = aligner_opts_rg + " " + sam_arg

    #add the trailing quote
    aligner_opts_rg = aligner_opts_rg + '"'

    if 0 < graph_max_x:
        # establish the read-length histogram range by using the simple rule: 0.6 * num-flows
        flowsUsed = 0
    	try:
            flowsUsed = int(env['flows'])
        except:
            flowsUsed = 400
        graph_max_x = 100 * math.trunc((0.6 * flowsUsed + 99)/100.0)
    if graph_max_x < 400:
        graph_max_x = 400

    #-----------------------------------
    # DEFAULT SINGLE SFF/FASTQ BEHAVIOR - (Runs for barcoded runs too)
    #-----------------------------------
    if (align_full):
        #If a full align is forced add a '--align-all-reads' flag
        com = "alignmentQC.pl"
        com += " --logfile %s" % os.path.join(outputdir,"alignmentQC_out.txt")
        com += " --output-dir %s" % outputdir
        com += " --input %s" % libsff
        com += " --genome %s" % env["libraryName"]
        com += " --max-plot-read-len %s" % graph_max_x
        com += " --align-all-reads"
        com += " %s %s" % (aligner_opts_rg,aligner_opts_extra)
        com += " >> ReportLog.html 2>&1"
    else:
        # Add -p 1 to enable default.sam file generation
        com = "alignmentQC.pl"
        com += " --logfile %s" % os.path.join(outputdir,"alignmentQC_out.txt")
        com += " --output-dir %s" % outputdir
        com += " --input %s" % libsff
        com += " --genome %s" % env["libraryName"]
        com += " --max-plot-read-len %s" % graph_max_x
        com += " %s %s" % (aligner_opts_rg,aligner_opts_extra)
        com += " >> ReportLog.html 2>&1"

    try:
        printtime("Alignment QC command line:\n%s" % com)
        retcode = subprocess.call(com, shell=True)
        if retcode != 0:
            printtime("alignmentQC failed, return code: %d" % retcode)
            alignError = open("alignment.error", "w")
            alignError.write('alignmentQC returned with error code: ')
            alignError.write(str(retcode))
            alignError.close()
    except OSError:
        printtime('Alignment Failed to start')
        alignError = open("alignment.error", "w")
        alignError.write(str(traceback.format_exc()))
        alignError.close()
        traceback.print_exc()
    if make_align_graphs:
        makeAlignGraphs()

    #--------------------------------------------
    # BARCODE HANDLING BEHAVIOR (Multiple FASTQ)
    #--------------------------------------------
    if env['barcodeId'] and True == do_barcode:
        printtime("Renaming non-barcoded alignment results to 'comprehensive'")
        files = [ 'alignment.summary',
                  'alignmentQC_out.txt',
                  'alignTable.txt',
                ]
        for fname in files:
            try:
                if os.path.exists(fname):
                    os.rename(fname, fname + ".comprehensive")
            except:
                printtime('error renaming')
                traceback.print_exc()
        # Only make the graphs from the alignment of comprehensive fastq file
	if make_align_graphs:
            makeAlignGraphs()

        printtime("STARTING BARCODE ALIGNMENTS")
        if not os.path.exists(DIR_BC_FILES):
            os.mkdir(DIR_BC_FILES)

        barcodeList = parse_bcfile('barcodeList.txt')

        align_full = True
        for bcid in (x['id_str'] for x in barcodeList):
            sffName = "%s_%s_%s.sff" % (bcid, env['expName'], env['resultsName'])
            if not os.path.exists(sffName):
                printtime("No barcode SFF file found for '%s'" % bcid)
                continue
            if (align_full):
                printtime("Align All Reads")
                #If a full align is forced add a '--align-all-reads' flag
                com = "alignmentQC.pl" 
                com += " --logfile %s" % os.path.join(outputdir,"alignmentQC_out.txt")
                com += " --output-dir %s" % outputdir
                com += " --input %s" % sffName
                com += " --genome %s" % env["libraryName"]
                com += " --max-plot-read-len %s" % graph_max_x
                com += " --align-all-reads"
                com += " %s %s" % (aligner_opts_rg, aligner_opts_extra)
                com += " >> ReportLog.html 2>&1" 
            else:
                printtime("Align Subset of Reads")
                # Add -p 1 to enable default.sam file generation
                com = "alignmentQC.pl" 
                com += " --logfile %s" % os.path.join(outputdir,"alignmentQC_out.txt")
                com += " --output-dir %s" % outputdir
                com += " --input %s" % sffName
                com += " --genome %s" % env["libraryName"]
                com += " --max-plot-read-len %s" % graph_max_x
                com += " %s %s" % (aligner_opts_rg, aligner_opts_extra)
                com += " >> ReportLog.html 2>&1"
            try:
                printtime("Alignment QC command line:\n%s" % com)
                retcode = subprocess.call(com, shell=True)
                if retcode != 0:
                    printtime("alignmentQC failed, return code: %d" % retcode)
                    alignError = open("alignment.error", "a")
                    alignError.write(com)
                    alignError.write(': \nalignmentQC returned with error code: ')
                    alignError.write(str(retcode))
                    alignError.close()
            except OSError:
                printtime('Alignment Failed to start')
                alignError = open("alignment.error", "a")
                alignError.write(str(traceback.format_exc()))
                alignError.close()
                traceback.print_exc()

            #rename each output file based on barcode found in fastq filename
            #but ignore the comprehensive fastq output files
            if os.path.exists('alignment.summary'):
                try:
                    fname='alignment_%s.summary' % bcid
                    os.rename('alignment.summary', fname)
                    os.rename(fname,os.path.join(DIR_BC_FILES,fname))
                    fname='alignmentQC_out_%s.txt' % bcid
                    os.rename('alignmentQC_out.txt', fname)
                    os.rename(fname,os.path.join(DIR_BC_FILES,fname))
                    fname='alignTable_%s.txt' % bcid
                    os.rename('alignTable.txt', fname)
                    os.rename(fname,os.path.join(DIR_BC_FILES,fname))

                    #move fastq, sff, bam, bai files
                    extlist = ['fastq','sff','bam','bam.bai']
                    for ext in extlist:
                        bcfile = "%s_%s_%s.%s" % (bcid,env['expName'], env['resultsName'],ext)
                        if os.path.isfile(bcfile):
                            os.rename(bcfile,os.path.join(DIR_BC_FILES,bcfile))
                except:
                    printtime('error renaming')
                    traceback.print_exc()
        #rename comprehensive results back to default names
        files = [ 'alignment.summary',
                  'alignmentQC_out.txt',
                  'alignTable.txt',
                ]
        for fname in files:
            if os.path.exists(fname + '.comprehensive'):
                os.rename(fname + '.comprehensive', fname)

        aggregate_alignment (DIR_BC_FILES,'barcodeList.txt')
Esempio n. 9
0
def align_full_chip(
    SAM_META,
    libsff_path,
    align_full,
    graph_max_x,
    do_barcode,
    make_align_graphs,
    sam_parsed,
    bidirectional,
    DIR_BC_FILES,
    libraryName,
    flows,
    barcodeId,
    opts_extra,
    outputdir):

    printtime("sam_parsed is %s" % sam_parsed)

    #Now build the SAM meta data arg string
    aligner_opts_rg= '--aligner-opts-rg "'
    aligner_opts_extra = ''
    additional_aligner_opts = ''
    if sam_parsed:
        additional_aligner_opts += ' -p 1'
    if bidirectional:
        additional_aligner_opts += ' --bidirectional'
    if opts_extra:
        print '  found extra alignment options: "%s"' % opts_extra
        aligner_opts_extra = ' --aligner-opts-extra "'
        aligner_opts_extra += opts_extra + '"'
    first = True
    for key, value in SAM_META.items():
        if value:
            sam_arg =  r'-R \"'
            end =  r'\"'

            sam_arg = sam_arg + key + ":" + value + end

            if first:
                aligner_opts_rg = aligner_opts_rg + sam_arg
                first = False
            else:
                aligner_opts_rg = aligner_opts_rg + " " + sam_arg

    #add the trailing quote
    aligner_opts_rg = aligner_opts_rg + '"'

    if 0 < graph_max_x:
        # establish the read-length histogram range by using the simple rule: 0.6 * num-flows
        flowsUsed = 0
        try:
            flowsUsed = int(flows)
        except:
            flowsUsed = 400
        graph_max_x = 100 * math.trunc((0.6 * flowsUsed + 99)/100.0)
    if graph_max_x < 400:
        graph_max_x = 400

    #-----------------------------------
    # DEFAULT SINGLE SFF/FASTQ BEHAVIOR - (Runs for barcoded runs too)
    #-----------------------------------
    if (align_full):
        #If a full align is forced add a '--align-all-reads' flag
        com = "alignmentQC.pl"
        com += " --logfile %s" % os.path.join(outputdir,"alignmentQC_out.txt")
        com += " --output-dir %s" % outputdir
        com += " --input %s" % libsff_path
        com += " --genome %s" % libraryName
        com += " --max-plot-read-len %s" % graph_max_x
        com += " --align-all-reads"
        com += " %s" % (additional_aligner_opts)
        com += " %s %s" % (aligner_opts_rg,aligner_opts_extra)
        com += " >> ReportLog.html 2>&1"
    else:
        com = "alignmentQC.pl"
        com += " --logfile %s" % os.path.join(outputdir,"alignmentQC_out.txt")
        com += " --output-dir %s" % outputdir
        com += " --input %s" % libsff_path
        com += " --genome %s" % libraryName
        com += " --max-plot-read-len %s" % graph_max_x
        com += " %s" % (additional_aligner_opts)
        com += " %s %s" % (aligner_opts_rg,aligner_opts_extra)
        com += " >> ReportLog.html 2>&1"

    try:
        printtime("Alignment QC command line:\n%s" % com)
        retcode = subprocess.call(com, shell=True)
        blockprocessing.add_status("alignmentQC.pl", retcode)
        if retcode != 0:
            printtime("alignmentQC failed, return code: %d" % retcode)
            alignError = open("alignment.error", "w")
            alignError.write('alignmentQC returned with error code: ')
            alignError.write(str(retcode))
            alignError.close()
    except OSError:
        printtime('Alignment Failed to start')
        alignError = open("alignment.error", "w")
        alignError.write(str(traceback.format_exc()))
        alignError.close()
        traceback.print_exc()
    if make_align_graphs:
        makeAlignGraphs()

    #--------------------------------------------
    # BARCODE HANDLING BEHAVIOR (Multiple FASTQ)
    #--------------------------------------------
    if barcodeId and do_barcode:
        printtime("Renaming non-barcoded alignment results to 'comprehensive'")
        files = [ 'alignment.summary',
                  'alignmentQC_out.txt',
                  'alignTable.txt',
                ]
        for fname in files:
            try:
                #if os.path.exists(fname):
                #   os.rename(fname, fname + ".comprehensive")
                shutil.copyfile(fname, fname + ".comprehensive")
            except:
                printtime('ERROR copying %s' % fname)
                traceback.print_exc()

        printtime("STARTING BARCODE ALIGNMENTS")
        
        barcodelist_path = 'barcodeList.txt'
        if not os.path.exists(barcodelist_path):
            barcodelist_path = '../barcodeList.txt'
        if not os.path.exists(barcodelist_path):
            barcodelist_path = '../../barcodeList.txt'
        if not os.path.exists(barcodelist_path):
            printtime('ERROR: barcodeList.txt not found')
        barcodeList = parse_bcfile(barcodelist_path)

        align_full = True
        top_dir = os.getcwd()
        try:
            os.chdir(DIR_BC_FILES)
            printtime('DEBUG changing to %s for barcodes alignment' % DIR_BC_FILES)
        except:
            printtime('ERROR missing %s folder' % DIR_BC_FILES)
            
        for bcid in (x['id_str'] for x in barcodeList):
            (head,tail) = os.path.split(libsff_path)
            sffName = os.path.join(head,"%s_%s" % (bcid, tail))
            if os.path.exists(sffName):
                printtime("Barcode processing for '%s': %s" % (bcid, sffName))
            else:
                printtime("No barcode SFF file found for '%s': %s" % (bcid, sffName))
                continue
            if (align_full):
                printtime("Align All Reads")
                #If a full align is forced add a '--align-all-reads' flag
                com = "alignmentQC.pl" 
                com += " --logfile %s" % os.path.join(outputdir,"alignmentQC_out.txt")
                com += " --output-dir %s" % outputdir
                com += " --input %s" % sffName
                com += " --genome %s" % libraryName
                com += " --max-plot-read-len %s" % graph_max_x
                com += " --align-all-reads"
                com += " %s" % (additional_aligner_opts)
                com += " %s %s" % (aligner_opts_rg, aligner_opts_extra)
                com += " >> ReportLog.html 2>&1" 
            else:
                printtime("Align Subset of Reads")
                com = "alignmentQC.pl" 
                com += " --logfile %s" % os.path.join(outputdir,"alignmentQC_out.txt")
                com += " --output-dir %s" % outputdir
                com += " --input %s" % sffName
                com += " --genome %s" % libraryName
                com += " --max-plot-read-len %s" % graph_max_x
                com += " %s" % (additional_aligner_opts)
                com += " %s %s" % (aligner_opts_rg, aligner_opts_extra)
                com += " >> ReportLog.html 2>&1"
            try:
                printtime("Alignment QC command line:\n%s" % com)
                retcode = subprocess.call(com, shell=True)
                blockprocessing.add_status("alignmentQC.pl", retcode)
                if retcode != 0:
                    printtime("alignmentQC failed, return code: %d" % retcode)
                    alignError = open("alignment.error", "a")
                    alignError.write(com)
                    alignError.write(': \nalignmentQC returned with error code: ')
                    alignError.write(str(retcode))
                    alignError.close()
            except:
                printtime('ERROR: Alignment Failed to start')
                alignError = open("alignment.error", "a")
                alignError.write(str(traceback.format_exc()))
                alignError.close()
                traceback.print_exc()

            #rename each output file based on barcode found in fastq filename
            #but ignore the comprehensive fastq output files
            if os.path.exists('alignment.summary'):
                try:
                    fname='alignment_%s.summary' % bcid
                    os.rename('alignment.summary', fname)
    #                os.rename(fname,os.path.join(DIR_BC_FILES,fname))
                    fname='alignmentQC_out_%s.txt' % bcid
                    os.rename('alignmentQC_out.txt', fname)
    #                os.rename(fname,os.path.join(DIR_BC_FILES,fname))
                    fname='alignTable_%s.txt' % bcid
                    os.rename('alignTable.txt', fname)
    #                os.rename(fname,os.path.join(DIR_BC_FILES,fname))
                    
                except:
                    printtime('error renaming')
                    traceback.print_exc()
                    
        os.chdir(top_dir)     

        #rename comprehensive results back to default names
        for fname in files:
            #if os.path.exists(fname + '.comprehensive'):
            #    os.rename(fname + '.comprehensive', fname)
            try:
                shutil.copyfile(fname + '.comprehensive', fname)
            except:
                printtime('ERROR copying %s' % fname + '.comprehensive')
                traceback.print_exc()
                
        aggregate_alignment (DIR_BC_FILES,barcodelist_path)