예제 #1
0
    args = parser.parse_args()
    
    
    if args.merge_out and len(args.files) > 1:   
       # Merge BAM files 
       outputBAM = args.merge_out
       print "Merging bam files to %s, mark duplicates is %s" % (outputBAM, args.duplicates)
       merge_bam_files(args.files, outputBAM, outputBAM.replace('.bam','.bam.bai'), args.duplicates)
       # generate ionstats files from merged BAM
       graph_max_x = 400
       if outputBAM == 'rawlib.bam':
          ionstats_file = 'ionstats_alignment.json'
       else:
          ionstats_file = outputBAM.split('.bam')[0] + '.ionstats_alignment.json'
       ionstats.generate_ionstats_alignment(outputBAM, ionstats_file, graph_max_x)
       
    if args.merge_plots:          
        print "Generating plots for merged report"
        ionstats_file = 'ionstats_alignment.json'
        
        try:       
            stats = json.load(open(ionstats_file))
            l = stats['full']['max_read_length']
            graph_max_x = int(round(l + 49, -2))
            
            # Make alignment_rate_plot.png and base_error_plot.png
            ionstats_plots.alignment_rate_plot2(ionstats_file, 'alignment_rate_plot.png', int(graph_max_x))
            ionstats_plots.base_error_plot(ionstats_file, 'base_error_plot.png', int(graph_max_x))
        except:
            traceback.print_exc()
예제 #2
0
파일: alignment.py 프로젝트: skner/TS
def create_ionstats(BASECALLER_RESULTS, ALIGNMENT_RESULTS,
                    basecaller_meta_information, basecaller_datasets,
                    graph_max_x, activate_barcode_filter, evaluate_hp):

    # TEST
    basecaller_bam_file_list = []
    alignment_bam_file_list = []

    ionstats_alignment_file_list = []
    if evaluate_hp:
        ionstats_alignment_h5_file_list = []

    ionstats_basecaller_file_list = []

    for dataset in basecaller_datasets["datasets"]:

        keep_dataset = False
        for rg_name in dataset["read_groups"]:
            if not basecaller_datasets["read_groups"][rg_name].get(
                    'filtered', False):
                keep_dataset = True
        filtered = not keep_dataset

        # filter out based on flag
        if activate_barcode_filter:
            if filtered:
                continue

        # skip non-existing bam file
        if int(dataset["read_count"]) == 0:
            continue

        read_group = dataset['read_groups'][0]
        reference = basecaller_datasets['read_groups'][read_group]['reference']
        if reference and not filtered:

            # TEST
            alignment_bam_file_list.append(
                os.path.join(ALIGNMENT_RESULTS,
                             dataset['file_prefix'] + '.bam'))

            ionstats.generate_ionstats_alignment(
                [
                    os.path.join(ALIGNMENT_RESULTS,
                                 dataset['file_prefix'] + '.bam')
                ],
                os.path.join(
                    ALIGNMENT_RESULTS,
                    dataset['file_prefix'] + '.ionstats_alignment.json'),
                os.path.join(
                    ALIGNMENT_RESULTS, dataset['file_prefix'] +
                    '.ionstats_error_summary.h5') if evaluate_hp else None,
                basecaller_meta_information if evaluate_hp else None,
                graph_max_x)

            ionstats_alignment_file_list.append(
                os.path.join(
                    ALIGNMENT_RESULTS,
                    dataset['file_prefix'] + '.ionstats_alignment.json'))
            if evaluate_hp:
                ionstats_alignment_h5_file_list.append(
                    os.path.join(
                        ALIGNMENT_RESULTS,
                        dataset['file_prefix'] + '.ionstats_error_summary.h5'))
        else:

            # TEST
            basecaller_bam_file_list.append(
                os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam']))

            ionstats.generate_ionstats_basecaller(
                [os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])],
                os.path.join(
                    BASECALLER_RESULTS,
                    dataset['file_prefix'] + '.ionstats_basecaller.json'),
                graph_max_x)

            ionstats_basecaller_file_list.append(
                os.path.join(
                    BASECALLER_RESULTS,
                    dataset['file_prefix'] + '.ionstats_basecaller.json'))

    # Merge ionstats files from individual (barcoded) datasets
    if len(ionstats_alignment_file_list) > 0:
        ionstats.reduce_stats(
            ionstats_alignment_file_list,
            os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json'))
    else:  # barcode classification filtered all barcodes or no reads available
        # TODO: ionstats needs to produce initial json file
        try:
            #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"
            cmd = "echo  '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"

            printtime("DEBUG: Calling '%s':" % cmd)
            ret = subprocess.call(cmd, shell=True)
            if ret != 1:
                printtime(
                    "ERROR: empty bam file generation failed, return code: %d"
                    % ret)
                raise RuntimeError('exit code: %d' % ret)

            ionstats.generate_ionstats_alignment(
                ['empty_dummy.bam'],
                os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json'),
                os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5')
                if evaluate_hp else None,
                basecaller_meta_information if evaluate_hp else None,
                graph_max_x)

        except:
            pass

    if len(ionstats_basecaller_file_list) > 0:
        ionstats.reduce_stats(
            ionstats_basecaller_file_list,
            os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json'))
    else:  # barcode classification filtered all barcodes or no reads available
        # TODO: ionstats needs to produce initial json file
        try:
            #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"
            cmd = "echo  '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"

            printtime("DEBUG: Calling '%s':" % cmd)
            ret = subprocess.call(cmd, shell=True)
            if ret != 1:
                printtime(
                    "ERROR: empty bam file generation failed, return code: %d"
                    % ret)
                raise RuntimeError('exit code: %d' % ret)

            ionstats.generate_ionstats_basecaller(
                ['empty_dummy.bam'],
                os.path.join(BASECALLER_RESULTS,
                             'ionstats_tmp_basecaller.json'), graph_max_x)
        except:
            pass

    ionstatslist = []
    a = os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json')
    b = os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json')
    if os.path.exists(a):
        ionstatslist.append(a)
    if os.path.exists(b):
        ionstatslist.append(b)
    if len(ionstatslist) > 0:
        ionstats.reduce_stats(
            ionstatslist,
            os.path.join(BASECALLER_RESULTS,
                         'ionstats_basecaller_with_aligninfos.json'))
        ionstats.reduce_stats(
            reversed(ionstatslist),
            os.path.join(BASECALLER_RESULTS, 'ionstats_basecaller.json'))
    if evaluate_hp and len(ionstats_alignment_h5_file_list
                           ) > 0 and basecaller_meta_information:
        ionstats.reduce_stats_h5(
            ionstats_alignment_h5_file_list,
            os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5'))
    '''
예제 #3
0
파일: alignment.py 프로젝트: dkeren/TS
def alignment_post_processing(
        libraryName,
        BASECALLER_RESULTS,
        ALIGNMENT_RESULTS,
        flows,
        mark_duplicates):


    datasets_basecaller = {}
    try:
        f = open(os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"),'r')
        datasets_basecaller = json.load(f)
        f.close()
    except:
        printtime("ERROR: problem parsing %s" % os.path.join(BASECALLER_RESULTS,"datasets_basecaller.json"))
        traceback.print_exc()
        return

    try:
        graph_max_x = int(50 * math.ceil(0.014 * int(flows)))
    except:
        graph_max_x = 800



    alignment_file_list = []

    for dataset in datasets_basecaller["datasets"]:
        if not os.path.exists(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])):
            continue

        ionstats.generate_ionstats_alignment(
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam'),
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'),
                graph_max_x)
        ionstats2alignstats(libraryName,
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'),
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.alignment.summary'))

        alignment_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'))

    # In Progress: merge ionstats alignment results
    ionstats.reduce_stats(alignment_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'))    
    ionstats2alignstats(libraryName,
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            os.path.join(ALIGNMENT_RESULTS,'alignment.summary'))

    # Special legacy post-processing.
    # Generate merged rawlib.bam on barcoded runs

    composite_bam_filename = os.path.join(ALIGNMENT_RESULTS,'rawlib.bam')
    if not os.path.exists(composite_bam_filename):

        bam_file_list = []
        for dataset in datasets_basecaller["datasets"]:
            bam_name = os.path.join(ALIGNMENT_RESULTS,os.path.basename(dataset['file_prefix'])+'.bam')
            if os.path.exists(bam_name):
                bam_file_list.append(bam_name)

        blockprocessing.merge_bam_files(bam_file_list,composite_bam_filename,composite_bam_filename+'.bai',mark_duplicates)

    # Generate alignment_barcode_summary.csv
    #TODO: use datasets_basecaller.json + *.ionstats_alignment.json instead of barcodeList.txt and alignment_*.summary
    barcodelist_path = 'barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../barcodeList.txt'
    if not os.path.exists(barcodelist_path):
        barcodelist_path = '../../../../barcodeList.txt'
    if os.path.exists(barcodelist_path):
        printtime("Barcode processing, aggregate")
        aggregate_alignment ("./",barcodelist_path)

    # These graphs are likely obsolete
    #makeAlignGraphs()

    # In Progress: Use ionstats alignment results to generate plots
    ionstats_plots.alignment_rate_plot2(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'alignment_rate_plot.png', graph_max_x)
    ionstats_plots.base_error_plot(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'base_error_plot.png', graph_max_x)
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'Filtered_Alignments_Q10.png', 'AQ10', 'red')
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'Filtered_Alignments_Q17.png', 'AQ17', 'yellow')
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'),
            'Filtered_Alignments_Q20.png', 'AQ20', 'green')
    ionstats_plots.old_aq_length_histogram(
            os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'), 
            'Filtered_Alignments_Q47.png', 'AQ47', 'purple')
예제 #4
0
    printtime("Merging bam files to %s, mark duplicates is %s" % (outputBAM, args.duplicates))
    try:
       merge_bam_files(args.files, outputBAM, outputBAM.replace('.bam','.bam.bai'), args.duplicates)
    except:
       traceback.print_exc()
    
 if args.align_stats and len(args.files) > 0:
     # generate ionstats files from merged BAMs
     printtime("Generating alignment stats for %s" % ', '.join(args.files))
     graph_max_x = 400
     for bamfile in args.files:
         if bamfile == 'rawlib.bam':
            ionstats_file = 'ionstats_alignment.json'
         else:
            ionstats_file = bamfile.split('.bam')[0] + '.ionstats_alignment.json'
         ionstats.generate_ionstats_alignment(bamfile, ionstats_file, graph_max_x)
    
 if args.merge_plots:          
     printtime("Generating plots for merged report")
     ionstats_file = 'ionstats_alignment.json'
     
     try:       
         stats = json.load(open(ionstats_file))
         l = stats['full']['max_read_length']
         graph_max_x = int(round(l + 49, -2))
         
         # Make alignment_rate_plot.png and base_error_plot.png
         ionstats_plots.alignment_rate_plot2(ionstats_file, 'alignment_rate_plot.png', int(graph_max_x))
         ionstats_plots.base_error_plot(ionstats_file, 'base_error_plot.png', int(graph_max_x))
     except:
         traceback.print_exc()
예제 #5
0
        print "Merging bam files to %s, mark duplicates is %s" % (
            outputBAM, args.duplicates)
        try:
            merge_bam_files(args.files, outputBAM,
                            outputBAM.replace('.bam', '.bam.bai'),
                            args.duplicates)
        except:
            traceback.print_exc()
        # generate ionstats files from merged BAM
        graph_max_x = 400
        if outputBAM == 'rawlib.bam':
            ionstats_file = 'ionstats_alignment.json'
        else:
            ionstats_file = outputBAM.split(
                '.bam')[0] + '.ionstats_alignment.json'
        ionstats.generate_ionstats_alignment(outputBAM, ionstats_file,
                                             graph_max_x)

    if args.merge_plots:
        print "Generating plots for merged report"
        ionstats_file = 'ionstats_alignment.json'

        try:
            stats = json.load(open(ionstats_file))
            l = stats['full']['max_read_length']
            graph_max_x = int(round(l + 49, -2))

            # Make alignment_rate_plot.png and base_error_plot.png
            ionstats_plots.alignment_rate_plot2(ionstats_file,
                                                'alignment_rate_plot.png',
                                                int(graph_max_x))
            ionstats_plots.base_error_plot(ionstats_file,
예제 #6
0
파일: alignment.py 프로젝트: Brainiarc7/TS
def create_ionstats(
        BASECALLER_RESULTS,
        ALIGNMENT_RESULTS,
        basecaller_meta_information,
        basecaller_datasets,
        graph_max_x,
        activate_barcode_filter):

    # TEST
    basecaller_bam_file_list = []
    alignment_bam_file_list = []


    ionstats_alignment_file_list = []
    ionstats_alignment_h5_file_list = []

    ionstats_basecaller_file_list = []

    for dataset in basecaller_datasets["datasets"]:

        keep_dataset = False
        for rg_name in dataset["read_groups"]:
            if not basecaller_datasets["read_groups"][rg_name].get('filtered',False):
                keep_dataset = True
        filtered = not keep_dataset

        # filter out based on flag
        if activate_barcode_filter:
            if filtered:
                continue

        # skip non-existing bam file
        if int(dataset["read_count"]) == 0:
            continue

        read_group = dataset['read_groups'][0]
        reference = basecaller_datasets['read_groups'][read_group]['reference']
        if reference and not filtered:

            # TEST
            alignment_bam_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam'))

            ionstats.generate_ionstats_alignment(
                [os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.bam')],
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'),
                os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'),
                basecaller_meta_information,
                graph_max_x)

            ionstats_alignment_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_alignment.json'))
            ionstats_alignment_h5_file_list.append(os.path.join(ALIGNMENT_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'))
        else:

            # TEST
            basecaller_bam_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam']))

            ionstats.generate_ionstats_basecaller(
                [os.path.join(BASECALLER_RESULTS, dataset['basecaller_bam'])],
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'),
                os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_error_summary.h5'), # TODO, not needed
                basecaller_meta_information,
                graph_max_x)

            ionstats_basecaller_file_list.append(os.path.join(BASECALLER_RESULTS, dataset['file_prefix']+'.ionstats_basecaller.json'))


    # Merge ionstats files from individual (barcoded) datasets
    if len(ionstats_alignment_file_list) > 0:
        ionstats.reduce_stats(ionstats_alignment_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json'))
    else: # barcode classification filtered all barcodes or no reads available
        # TODO: ionstats needs to produce initial json file
        try:
            #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"
            cmd  = "echo  '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"

            printtime("DEBUG: Calling '%s':" % cmd)
            ret = subprocess.call(cmd,shell=True)
            if ret != 1:
                printtime("ERROR: empty bam file generation failed, return code: %d" % ret)
                raise RuntimeError('exit code: %d' % ret)

            ionstats.generate_ionstats_alignment(
                ['empty_dummy.bam'],
                os.path.join(ALIGNMENT_RESULTS, 'ionstats_alignment.json'),
                os.path.join(ALIGNMENT_RESULTS, 'ionstats_error_summary.h5'),
                basecaller_meta_information,
                graph_max_x)

        except:
            pass

    if len(ionstats_basecaller_file_list) > 0:
        ionstats.reduce_stats(ionstats_basecaller_file_list,os.path.join(BASECALLER_RESULTS,'ionstats_tmp_basecaller.json'))
    else: # barcode classification filtered all barcodes or no reads available
        # TODO: ionstats needs to produce initial json file
        try:
            #cmd = "echo $'@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"
            cmd  = "echo  '@HD\tVN:1.5\tSO:coordinate\n@SQ\tSN:ref\tLN:4\n@RG\tID:filename\tSM:filename' | samtools view -F4 -S -b - > empty_dummy.bam"

            printtime("DEBUG: Calling '%s':" % cmd)
            ret = subprocess.call(cmd,shell=True)
            if ret != 1:
                printtime("ERROR: empty bam file generation failed, return code: %d" % ret)
                raise RuntimeError('exit code: %d' % ret)

            ionstats.generate_ionstats_basecaller(
                ['empty_dummy.bam'],
                os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_basecaller.json'),
                os.path.join(BASECALLER_RESULTS, 'ionstats_tmp_error_summary.h5'), # TODO, not needed
                basecaller_meta_information,
                graph_max_x)
        except:
            pass


    ionstatslist = []
    a = os.path.join(ALIGNMENT_RESULTS,'ionstats_alignment.json')
    b = os.path.join(BASECALLER_RESULTS,'ionstats_tmp_basecaller.json')
    if os.path.exists(a):
        ionstatslist.append(a)
    if os.path.exists(b):
        ionstatslist.append(b)
    if len(ionstatslist) > 0:
        ionstats.reduce_stats( ionstatslist, os.path.join(BASECALLER_RESULTS,'ionstats_basecaller_with_aligninfos.json'))
        ionstats.reduce_stats( reversed(ionstatslist), os.path.join(BASECALLER_RESULTS,'ionstats_basecaller.json'))
    if len(ionstats_alignment_h5_file_list) > 0 and basecaller_meta_information:
        ionstats.reduce_stats_h5(ionstats_alignment_h5_file_list,os.path.join(ALIGNMENT_RESULTS,'ionstats_error_summary.h5'))

    '''