def main():
    parser = ArgumentParser(description='Add readgroups based on Illumina ' +
                            'IDs to given BAM')
    parser.add_argument('-c', '--cn', required=True,
                        help='Sequencing Center Name')
    parser.add_argument('-l', '--lb', required=True, help='Library')
    parser.add_argument('-s', '--sm', required=True, help='Sample')
    parser.add_argument('bam', help='Input BAM')

    args = parser.parse_args()

    ReadGroup = namedtuple('ReadGroup', 'ID CN LB PL SM',
                           defaults=[args.cn, args.lb, 'ILLUMINA', args.sm])

    in_bam = args.bam
    in_bam_stem = Path(in_bam).stem

    # Intermediate BAM with readgroup tagged reads, but no readgroups in header
    temp_bam = in_bam_stem + '.temp.bam'

    # Header file with readgroups
    header_sam = in_bam_stem + '.header.sam'

    # Final BAM
    final_bam = in_bam_stem + '.readgroupsadded.bam'

    readgroups = dict()
    header_dict = dict()

    # Add readgroup tag to each read and create readgroups for header
    with pysam.AlignmentFile(in_bam, 'rb') as in_bam_fh, \
         pysam.AlignmentFile(temp_bam, 'wb', template=in_bam_fh) as temp_bam_fh:
        header_dict = in_bam_fh.header.to_dict()
        for read in in_bam_fh.fetch(until_eof=True):
            readgroup_id = '.'.join(read.query_name.split(':')[2:4])
            read.set_tag('RG', readgroup_id, 'Z')
            temp_bam_fh.write(read)
            if readgroup_id not in readgroups:
                readgroups[readgroup_id] = ReadGroup(readgroup_id)

    # Add readgroups to header
    header_dict['RG'] = [readgroup._asdict() for readgroup in readgroups.values()]
    # Write updated header to file
    header_sam_fh = pysam.AlignmentFile(header_sam, 'wh', header=header_dict)
    header_sam_fh.close()

    # Create new bam with readgroups in header and read tags
    Path(final_bam).touch()
    pysam.reheader(header_sam, temp_bam, save_stdout=final_bam)

    # Delete intermediate BAM and header file
    Path(temp_bam).unlink()
    Path(header_sam).unlink()
Beispiel #2
0
def reheader_and_rename_chromosomes(in_bam_file, out_bam_file, replacements):
    with pysam.Samfile(in_bam_file) as f:
        h = str(f.header)
        org_header = h
        for a, b in replacements.items():
            h = h.replace(f"SN:{a}", f"SN:{b}")
        if h == org_header:
            raise ValueError("No replacement happened")
        tf = tempfile.NamedTemporaryFile()
        tf.write(h.encode("utf-8"))
        tf.flush()
        out_bam_file.write_text("")  # must be there for save_stdout to work..
        pysam.reheader(
            tf.name,
            str(in_bam_file.absolute()),
            save_stdout=str(out_bam_file.absolute()).encode("utf-8"),
        )
        pysam.index(str(out_bam_file))
Beispiel #3
0
def main(args):
    # Detect the input format.
    try:
        with open(args.bam, 'r') as f:
            for line in f:
                is_sam = True
                break
    except UnicodeDecodeError:
        is_sam = False

    # Rename the sample(s).
    old_lines = pysam.view('-H', args.bam, '--no-PG').strip().split('\n')
    new_lines = []
    for old_line in old_lines:
        old_fields = old_line.split('\t')
        if old_fields[0] != '@RG':
            new_lines.append(old_line)
            continue
        new_fields = []
        for old_field in old_fields:
            if 'SM:' not in old_field:
                new_fields.append(old_field)
                continue
            new_fields.append(f'SM:{args.name}')
        new_lines.append('\t'.join(new_fields))

    # Write the output file.
    if is_sam:
        for new_line in new_lines:
            sys.stdout.write(new_line + '\n')
        alignments = pysam.view(args.bam, '--no-PG')
        sys.stdout.write(alignments)
    else:
        with tempfile.TemporaryDirectory() as t:
            with open(f'{t}/header.sam', 'w') as f:
                f.write('\n'.join(new_lines))
            alignments = pysam.reheader(f'{t}/header.sam',
                                        args.bam,
                                        '--no-PG')
            sys.stdout.buffer.write(alignments)
def premap_stampy(data_folder,
                  adaID,
                  VERBOSE=0,
                  threads=1,
                  summary=True,
                  maxreads=-1,
                  subsrate=0.05,
                  gapopen=40,
                  gapextend=3):
    '''Call stampy for actual mapping'''
    if VERBOSE:
        print 'Premapping: adaID ', adaID

    if summary:
        summary_filename = get_premap_summary_filename(data_folder, adaID)

    # Stampy can handle both gzipped and uncompressed fastq inputs
    input_filenames = get_read_filenames(data_folder, adaID, gzip=True)
    if not os.path.isfile(input_filenames[0]):
        input_filenames = get_read_filenames(data_folder, adaID, gzip=False)
    if not all(map(os.path.isfile, input_filenames)):
        raise OSError('Input files for mapping not found: ' +
                      input_filenames[0])

    # parallelize if requested
    if threads == 1:
        call_list = [
            stampy_bin,
            '--overwrite',
            '-g',
            get_reference_premap_index_filename(data_folder, adaID, ext=False),
            '-h',
            get_reference_premap_hash_filename(data_folder, adaID, ext=False),
            '-o',
            get_premapped_filename(data_folder, adaID, type='sam'),
            '--insertsize=450',
            '--insertsd=100',
            '--substitutionrate=' + str(subsrate),
            '--gapopen=' + str(gapopen),
            '--gapextend=' + str(gapextend),
        ]
        if maxreads > 0:
            call_list.append('--numrecords=' + str(maxreads))
        call_list.extend(['-M'] + input_filenames)
        call_list = map(str, call_list)
        if VERBOSE >= 2:
            print ' '.join(call_list)
        sp.call(call_list)

        if summary:
            with open(get_premap_summary_filename(data_folder, adaID),
                      'a') as f:
                f.write('\nStampy premapped (single thread).\n')

        # Convert to compressed BAM
        convert_sam_to_bam(
            get_premapped_filename(data_folder, adaID, type='bam'))

        if summary:
            with open(summary_filename, 'a') as f:
                f.write('\nSAM file converted to compressed BAM: '+\
                        get_premapped_filename(data_folder, adaID, type='bam')+'\n')

    else:

        # Multithreading works as follows: call qsub + stampy, monitor the process
        # IDs with qstat at regular intervals, and finally merge results with pysam
        output_file_parts = [
            get_premapped_filename(data_folder,
                                   adaID,
                                   type='bam',
                                   part=(j + 1)) for j in xrange(threads)
        ]

        # Submit map script
        jobs_done = np.zeros(threads, bool)
        job_IDs = np.zeros(threads, 'S30')

        # Submit map call
        import hivwholeseq
        JOBDIR = hivwholeseq.__path__[0].rstrip('/') + '/'
        JOBLOGOUT = JOBDIR + 'logout'
        JOBLOGERR = JOBDIR + 'logerr'
        cluster_time = ['23:59:59', '1:59:59']
        vmem = '8G'
        for j in xrange(threads):
            call_list = [
                'qsub', '-cwd', '-b', 'y', '-S', '/bin/bash', '-o', JOBLOGOUT,
                '-e', JOBLOGERR, '-N', adaID + ' p' + str(j + 1), '-l',
                'h_rt=' + cluster_time[threads >= 30], '-l', 'h_vmem=' + vmem,
                stampy_bin, '--overwrite', '-g',
                get_reference_premap_index_filename(
                    data_folder, adaID, ext=False), '-h',
                get_reference_premap_hash_filename(
                    data_folder, adaID, ext=False), '-o',
                get_premapped_filename(
                    data_folder, adaID, type='sam', part=(j + 1)),
                '--processpart=' + str(j + 1) + '/' + str(threads),
                '--insertsize=450', '--insertsd=100', '--substitutionrate=' +
                str(subsrate), '--gapopen=' + str(gapopen),
                '--gapextend=' + str(gapextend), '-M'
            ] + input_filenames
            call_list = map(str, call_list)
            if VERBOSE >= 2:
                print ' '.join(call_list)
            job_ID = sp.check_output(call_list)
            job_ID = job_ID.split()[2]
            job_IDs[j] = job_ID

        # Monitor output
        time_wait = 10  # secs
        while not jobs_done.all():

            # Sleep some time
            time.sleep(time_wait)

            # Get the output of qstat to check the status of jobs
            qstat_output = sp.check_output(['qstat'])
            qstat_output = qstat_output.split(
                '\n')[:-1]  # The last is an empty line
            if VERBOSE >= 3:
                print qstat_output
            if len(qstat_output) < 3:
                jobs_done[:] = True
                break
            else:
                qstat_output = [line.split()[0] for line in qstat_output[2:]]

            time_wait = 10  # secs
            for j in xrange(threads):
                if jobs_done[j]:
                    continue

                if job_IDs[j] not in qstat_output:
                    # Convert to BAM for merging
                    if VERBOSE >= 1:
                        print 'Convert premapped reads to BAM for merging: adaID '+\
                               adaID+', part '+str(j+1)+ ' of '+ \
                               str(threads)
                    convert_sam_to_bam(output_file_parts[j])
                    # We do not need to wait if we did the conversion (it takes
                    # longer than some secs)
                    time_wait = 0
                    jobs_done[j] = True

        if summary:
            with open(summary_filename, 'a') as f:
                f.write('Stampy premapped (' + str(threads) + ' threads).\n')

        # Concatenate output files
        if VERBOSE >= 1:
            print 'Concatenate premapped reads: adaID ' + adaID + '...',
        output_filename = get_premapped_filename(data_folder,
                                                 adaID,
                                                 type='bam',
                                                 unsorted=True)
        pysam.cat('-o', output_filename, *output_file_parts)
        if VERBOSE >= 1:
            print 'done.'
        if summary:
            with open(summary_filename, 'a') as f:
                f.write('BAM files concatenated (unsorted).\n')

        # Sort the file by read names (to ensure the pair_generator)
        # NOTE: we exclude the extension and the option -f because of a bug in samtools
        if VERBOSE >= 1:
            print 'Sort premapped reads: adaID ' + adaID
        output_filename_sorted = get_premapped_filename(data_folder,
                                                        adaID,
                                                        type='bam',
                                                        unsorted=False)
        pysam.sort('-n', output_filename, output_filename_sorted[:-4])
        if summary:
            with open(summary_filename, 'a') as f:
                f.write('Joint BAM file sorted.\n')

        # Reheader the file without BAM -> SAM -> BAM
        if VERBOSE >= 1:
            print 'Reheader premapped reads: adaID ' + adaID
        header_filename = get_premapped_filename(data_folder,
                                                 adaID,
                                                 type='sam',
                                                 part=1)
        pysam.reheader(header_filename, output_filename_sorted)
        if summary:
            with open(summary_filename, 'a') as f:
                f.write('Joint BAM file reheaded.\n')

    if VERBOSE >= 1:
        print 'Remove temporary files: adaID ' + adaID
    remove_premapped_tempfiles(data_folder, adaID, VERBOSE=VERBOSE)
    if summary:
        with open(summary_filename, 'a') as f:
            f.write('Temp premapping files removed.\n')
            f.write('\n')
Beispiel #5
0
def map_stampy(data_folder,
               adaID,
               fragment,
               VERBOSE=0,
               threads=1,
               cluster_time='23:59:59',
               maxreads=-1,
               summary=True,
               rescue=False,
               dry=False):
    '''Map using stampy'''
    frag_gen = fragment[:2]

    if summary:
        summary_filename = get_map_summary_filename(data_folder,
                                                    adaID,
                                                    frag_gen,
                                                    rescue=rescue)

    # Set mapping penalty scores: softer for rescues and F3 and F5
    global subsrate
    if rescue:
        subsrate = '0.2'
        stampy_gapopen = 5  # Default: 40
        stampy_gapextend = 1  # Default: 3

    elif frag_gen not in ('F3', 'F5'):
        stampy_gapopen = 60  # Default: 40
        stampy_gapextend = 5  # Default: 3
    else:
        stampy_gapopen = 30  # Default: 40
        stampy_gapextend = 2  # Default: 3

    if VERBOSE:
        print 'Map via stampy: ' + adaID + ' ' + frag_gen

    if not rescue:
        input_filename = get_divided_filename(data_folder,
                                              adaID,
                                              fragment,
                                              type='bam')

        # NOTE: we introduced fragment nomenclature late, e.g. F3a. Check for that
        if not os.path.isfile(input_filename):
            if frag_gen == 'F3':
                input_filename = input_filename.replace('F3a', 'F3')

    else:
        input_filename = get_divided_filename(data_folder,
                                              adaID,
                                              'unmapped',
                                              type='bam')

    # Check existance of input file, because stampy creates output anyway
    if not os.path.isfile(input_filename):
        if summary:
            with open(summary_filename, 'a') as f:
                f.write('Failed (input file for mapping not found).\n')

        raise ValueError(samplename + ', fragment ' + fragment +
                         ': input file not found.')

    # parallelize if requested
    if threads == 1:

        output_filename = get_mapped_filename(data_folder,
                                              adaID,
                                              frag_gen,
                                              type='sam',
                                              rescue=rescue)

        # Map
        call_list = [
            stampy_bin, '-g',
            get_index_file(data_folder, adaID, frag_gen, ext=False), '-h',
            get_hash_file(data_folder, adaID, frag_gen, ext=False), '-o',
            output_filename, '--overwrite', '--substitutionrate=' + subsrate,
            '--gapopen', stampy_gapopen, '--gapextend', stampy_gapextend
        ]
        if stampy_sensitive:
            call_list.append('--sensitive')

        # Take only a (random) subsample: stampy uses the fraction of reads
        # intead of the number
        if maxreads > 0:
            # FIXME: figure out the -s option and the --numrecords option
            call_list.extend(['--numrecords', maxreads])

            #n_pairs_tot = get_number_reads(input_filename, 'bam') / 2
            #frac_pairs = 1.0 * maxreads / n_pairs_tot
            #random_seed = np.random.randint(1e5)
            #call_list.extend(['-s', frac_pairs + random_seed])

        call_list = call_list + ['-M', input_filename]
        call_list = map(str, call_list)
        if VERBOSE >= 2:
            print ' '.join(call_list)

        if not dry:
            sp.call(call_list)

            if summary:
                with open(summary_filename, 'a') as f:
                    f.write('Stampy mapped (single thread).\n')

            output_filename = get_mapped_filename(data_folder,
                                                  adaID,
                                                  frag_gen,
                                                  type='bam',
                                                  rescue=rescue)
            convert_sam_to_bam(output_filename)
        else:
            if summary:
                with open(summary_filename, 'a') as f:
                    f.write('Dry run works (single thread).\n')

            if VERBOSE >= 1:
                print 'Dry run works (single thread)'

            return

    else:

        # Submit map script
        jobs_done = np.zeros(threads, bool)
        job_IDs = np.zeros(threads, 'S30')
        for j in xrange(threads):

            # Get output filename
            output_filename = get_mapped_filename(data_folder,
                                                  adaID,
                                                  frag_gen,
                                                  type='sam',
                                                  part=(j + 1),
                                                  rescue=rescue)
            # Map
            call_list = [
                'qsub', '-cwd', '-b', 'y', '-S', '/bin/bash', '-o', JOBLOGOUT,
                '-e', JOBLOGERR, '-N',
                'm' + adaID.replace('-', '') + frag_gen + str(j + 1), '-l',
                'h_rt=' + cluster_time, '-l', 'h_vmem=' + vmem, stampy_bin,
                '-g',
                get_index_file(data_folder, adaID, frag_gen, ext=False), '-h',
                get_hash_file(data_folder, adaID, frag_gen,
                              ext=False), '-o', output_filename, '--overwrite',
                '--processpart=' + str(j + 1) + '/' + str(threads),
                '--substitutionrate=' + subsrate, '--gapopen', stampy_gapopen,
                '--gapextend', stampy_gapextend
            ]
            if stampy_sensitive:
                call_list.append('--sensitive')
            call_list = call_list + ['-M', input_filename]
            call_list = map(str, call_list)
            if VERBOSE >= 2:
                print ' '.join(call_list)

            if not dry:
                job_ID = sp.check_output(call_list)
                job_ID = job_ID.split()[2]
                job_IDs[j] = job_ID

        if dry:
            if summary:
                with open(summary_filename, 'a') as f:
                    f.write('Dry run works (multi thread).\n')

            if VERBOSE >= 1:
                print 'Dry run works (multi thread)'
            return

        # Monitor output
        output_file_parts = [
            get_mapped_filename(data_folder,
                                adaID,
                                frag_gen,
                                type='bam',
                                part=(j + 1),
                                rescue=rescue) for j in xrange(threads)
        ]
        time_wait = 10  # secs
        while not jobs_done.all():

            # Sleep some time
            time.sleep(time_wait)

            # Get the output of qstat to check the status of jobs
            qstat_output = sp.check_output(['qstat'])
            qstat_output = qstat_output.split(
                '\n')[:-1]  # The last is an empty line
            if len(qstat_output) < 3:
                jobs_done[:] = True
                break
            else:
                qstat_output = [line.split()[0] for line in qstat_output[2:]]

            time_wait = 10  # secs
            for j in xrange(threads):
                if jobs_done[j]:
                    continue

                if job_IDs[j] not in qstat_output:
                    # Convert to BAM for merging
                    if VERBOSE >= 1:
                        print 'Convert mapped reads to BAM for merging: adaID '+\
                               adaID+', fragment '+frag_gen+', part '+str(j+1)+ ' of '+ \
                               str(threads)
                    convert_sam_to_bam(output_file_parts[j])
                    # We do not need to wait if we did the conversion (it takes
                    # longer than some secs)
                    time_wait = 0
                    jobs_done[j] = True

        if summary:
            with open(summary_filename, 'a') as f:
                f.write('Stampy mapped (' + str(threads) + ' threads).\n')

        # Concatenate output files
        output_filename = get_mapped_filename(data_folder,
                                              adaID,
                                              frag_gen,
                                              type='bam',
                                              unsorted=True,
                                              rescue=rescue)
        if VERBOSE >= 1:
            print 'Concatenate mapped reads: adaID ' + adaID + ', fragment ' + frag_gen
        pysam.cat('-o', output_filename, *output_file_parts)
        if summary:
            with open(summary_filename, 'a') as f:
                f.write('BAM files concatenated (unsorted).\n')

        # Sort the file by read names (to ensure the pair_generator)
        output_filename_sorted = get_mapped_filename(data_folder,
                                                     adaID,
                                                     frag_gen,
                                                     type='bam',
                                                     unsorted=False,
                                                     rescue=rescue)
        # NOTE: we exclude the extension and the option -f because of a bug in samtools
        if VERBOSE >= 1:
            print 'Sort mapped reads: adaID ' + adaID + ', fragment ' + frag_gen
        pysam.sort('-n', output_filename, output_filename_sorted[:-4])
        if summary:
            with open(summary_filename, 'a') as f:
                f.write('Joint BAM file sorted.\n')

        # Reheader the file without BAM -> SAM -> BAM
        if VERBOSE >= 1:
            print 'Reheader mapped reads: adaID ' + adaID + ', fragment ' + frag_gen
        header_filename = get_mapped_filename(data_folder,
                                              adaID,
                                              frag_gen,
                                              type='sam',
                                              part=1,
                                              rescue=rescue)
        pysam.reheader(header_filename, output_filename_sorted)
        if summary:
            with open(summary_filename, 'a') as f:
                f.write('Joint BAM file reheaded.\n')

    # FIXME: check whether temp files are all deleted
    if VERBOSE >= 1:
        print 'Remove temporary files: adaID ' + adaID + ', fragment ' + frag_gen
    remove_mapped_tempfiles(data_folder,
                            adaID,
                            frag_gen,
                            VERBOSE=VERBOSE,
                            rescue=rescue)
    if summary:
        with open(summary_filename, 'a') as f:
            f.write('Temp mapping files removed.\n')
            f.write('\n')
Beispiel #6
0
        if cell_barcode in filtered_barcodes:
            read_groups.add(cell_barcode)
            # replace RG tag with the cell barcode
            read.set_tag('RG', cell_barcode, 'Z', True)
            nkeep += 1
            output_bam.write(read)
        else:
            nfiltered += 1
    else:
        nmiss += 1
input_bam.close()
output_bam.close()

print('\n')
print('No. of alignments missing CB tag: ' + str(nmiss))
print('No. of alignments contains filtered out CB tag: ' + str(nmiss))
print('No. of Reads with passing-filter CB tag: ' + str(nfiltered))
print('No. of read groups: ' + str(len(read_groups)))
print('\n')

print("Writing header.sam with updated RG tags.")
# write out RG tags to header
rg_header = [{'ID': id} for id in read_groups]
header['RG'] = rg_header
with pysam.AlignmentFile(args.header_file, "wh", header=header) as outf:
    a = pysam.AlignedSegment()
    outf.write(a)

# re-header the output_bam file in place
pysam.reheader("-P", "-i", args.header_file, args.output_bam)
def map_stampy_multithread(sample, fragment, VERBOSE=0, threads=2, summary=True,
                           filtered=True):
    '''Map using stampy, multithread (via cluster requests, queueing race conditions possible)'''
    import hivwholeseq
    JOBDIR = hivwholeseq.__path__[0].rstrip('/')+'/'
    JOBLOGOUT = JOBDIR+'logout/'
    JOBLOGERR = JOBDIR+'logerr/'
    cluster_time = ['23:59:59', '0:59:59']
    vmem = '8G'

    pname = patient.id
    sample = patient.sample_table.loc[samplename]
    seq_run = sample['run']
    data_folder = MiSeq_runs[seq_run]['folder']
    adaID = sample['adaID']

    if VERBOSE:
        print 'Map via stampy: '+pname+' '+samplename+' '+fragment

    if summary:
        summary_filename = get_map_initial_summary_filename(pname, samplename, fragment)

    # Specific fragment (e.g. F5 --> F5bi)
    frag_spec = filter(lambda x: fragment in x, sample['fragments'])
    if not len(frag_spec):
        raise ValueError(str(patient)+', '+samplename+': fragment '+fragment+' not found.')
    frag_spec = frag_spec[0]

    input_filename = get_input_filename(data_folder, adaID, frag_spec, type='bam')

    # Submit map scripts in parallel to the cluster
    jobs_done = np.zeros(threads, bool)
    job_IDs = np.zeros(threads, 'S30')
    for j in xrange(threads):
    
        output_filename = get_mapped_to_initial_filename(pname, samplename,
                                                         fragment,
                                                         type='sam', part=(j+1))
        # Map
        call_list = ['qsub','-cwd',
                     '-b', 'y',
                     '-S', '/bin/bash',
                     '-o', JOBLOGOUT,
                     '-e', JOBLOGERR,
                     '-N', 'm '+samplename+fragment+' p'+str(j+1),
                     '-l', 'h_rt='+cluster_time[threads >= 10],
                     '-l', 'h_vmem='+vmem,
                     stampy_bin,
                     '--overwrite',
                     '-g', get_initial_index_filename(pname, fragment, ext=False),
                     '-h', get_initial_hash_filename(pname, fragment, ext=False),
                     '-o', output_filename,
                     '--processpart='+str(j+1)+'/'+str(threads),
                     '--substitutionrate='+subsrate,
                     '--gapopen', stampy_gapopen,
                     '--gapextend', stampy_gapextend]
        if stampy_sensitive:
            call_list.append('--sensitive')
        call_list = call_list + ['-M', input_filename]

        call_list = map(str, call_list)
        if VERBOSE >= 2:
            print ' '.join(call_list)
        job_ID = sp.check_output(call_list)
        job_ID = job_ID.split()[2]
        job_IDs[j] = job_ID

    # Monitor output
    output_file_parts = [get_mapped_to_initial_filename(pname, samplename,
                                                        fragment,
                                                        type='bam', part=(j+1))
                         for j in xrange(threads)]
    time_wait = 10 # secs
    while not jobs_done.all():

        # Sleep some time
        time.sleep(time_wait)

        # Get the output of qstat to check the status of jobs
        qstat_output = sp.check_output(['qstat'])
        qstat_output = qstat_output.split('\n')[:-1] # The last is an empty line
        if len(qstat_output) < 3:
            jobs_done[:] = True
            break
        else:
            qstat_output = [line.split()[0] for line in qstat_output[2:]]

        time_wait = 10 # secs
        for j in xrange(threads):
            if jobs_done[j]:
                continue

            if job_IDs[j] not in qstat_output:
                # Convert to BAM for merging
                if VERBOSE >= 1:
                    print 'Convert mapped reads to BAM for merging: sample '+\
                           samplename+', part '+str(j+1)+ ' of '+ \
                           str(threads)
                convert_sam_to_bam(output_file_parts[j])
                # We do not need to wait if we did the conversion (it takes
                # longer than some secs)
                time_wait = 0
                jobs_done[j] = True

    if summary:
        with open(summary_filename, 'a') as f:
            f.write('Stampy mapped ('+str(threads)+' threads).\n')

    # Concatenate output files
    output_filename = get_mapped_to_initial_filename(pname, samplename,
                                                     fragment,
                                                     type='bam', unsorted=True)
    if VERBOSE >= 1:
        print 'Concatenate premapped reads: sample '+samplename
    pysam.cat('-o', output_filename, *output_file_parts)
    if summary:
        with open(summary_filename, 'a') as f:
            f.write('BAM files concatenated (unsorted).\n')

    # Sort the file by read names (to ensure the pair_generator)
    output_filename_sorted = get_mapped_to_initial_filename(pname, samplename,
                                                            fragment,
                                                            type='bam')
    # NOTE: we exclude the extension and the option -f because of a bug in samtools
    if VERBOSE >= 1:
        print 'Sort mapped reads: sample '+samplename
    pysam.sort('-n', output_filename, output_filename_sorted[:-4])
    if summary:
        with open(summary_filename, 'a') as f:
            f.write('Joint BAM file sorted.\n')

    # Reheader the file without BAM -> SAM -> BAM
    if VERBOSE >= 1:
        print 'Reheader mapped reads: sample '+samplename
    header_filename = get_mapped_to_initial_filename(pname, samplename,
                                                     fragment,
                                                     type='sam', part=1)
    pysam.reheader(header_filename, output_filename_sorted)
    if summary:
        with open(summary_filename, 'a') as f:
            f.write('Joint BAM file reheaded.\n')

    if VERBOSE >= 1:
        print 'Remove temporary files: sample '+samplename
    remove_mapped_init_tempfiles(pname, samplename, fragment, VERBOSE=VERBOSE)
    if summary:
        with open(summary_filename, 'a') as f:
            f.write('Temp mapping files removed.\n')
            f.write('\n')