def write_read_introns_from_sam_stream(sam_stream,
                                       output_stream,
                                       retrieved_intron_counts,
                                       instance=False):
    """ Writes output that maps QNAMES to exon-exon junctions overlapped.

        sam_stream: where to find retrieved alignments in SAM form
        output_stream: where to write output. Each line takes the form:
            <read name><TAB>RNAME<TAB><sorted list of intron starts and ends>
            <TAB>['r' for 'retrieved']
        retrieved_intron_counts: defaultdict(int) that counts number of
            retrieved alignments overlapping exon-exon junction

        No return value.
    """
    for line in sam_stream:
        if line[0] == '@': continue
        try:
            tokens = line.strip().split('\t')
            flag = int(tokens[1])
            if flag & 4:
                continue
            name = tokens[0]
            rname = tokens[2]
            cigar = tokens[5]
            pos = int(tokens[3])
            seq = tokens[9]
            flag = int(tokens[1])
            if 'N' not in cigar or flag & 256:
                continue
            #md = [token[5:] for token in tokens if token[:5] == 'MD:Z:'][0]
            _, _, introns, _, _ = indels_junctions_exons_mismatches(
                cigar, dummy_md_index(cigar), pos, seq)
            introns = [intron[:2] for intron in introns]
            introns = [
                rname + ';'.join([''] + [str(bound) for bound in intron])
                for intron in sorted(list(introns))
            ]
            if instance:
                for intron in introns:
                    retrieved_intron_counts[intron] += 1
                print >> output_stream, '%s\t%s\tr' % (name,
                                                       '\t'.join(introns))
            else:
                for intron in introns:
                    retrieved_intron_counts[intron] += 1
                    print >> output_stream, '%s;%s\t%s\tr' % (name, intron,
                                                              intron)
        except IndexError:
            print >> sys.stderr, ('Error found on line: ' + line)
            raise
def write_read_introns_from_sam_stream(sam_stream, output_stream, retrieved_intron_counts, instance=False):
    """ Writes output that maps QNAMES to exon-exon junctions overlapped.

        sam_stream: where to find retrieved alignments in SAM form
        output_stream: where to write output. Each line takes the form:
            <read name><TAB>RNAME<TAB><sorted list of intron starts and ends>
            <TAB>['r' for 'retrieved']
        retrieved_intron_counts: defaultdict(int) that counts number of
            retrieved alignments overlapping exon-exon junction

        No return value.
    """
    for line in sam_stream:
        if line[0] == "@":
            continue
        try:
            tokens = line.strip().split("\t")
            flag = int(tokens[1])
            if flag & 4:
                continue
            name = tokens[0]
            rname = tokens[2]
            cigar = tokens[5]
            pos = int(tokens[3])
            seq = tokens[9]
            flag = int(tokens[1])
            if "N" not in cigar or flag & 256:
                continue
            # md = [token[5:] for token in tokens if token[:5] == 'MD:Z:'][0]
            _, _, introns, _, _ = indels_junctions_exons_mismatches(cigar, dummy_md_index(cigar), pos, seq)
            introns = [intron[:2] for intron in introns]
            introns = [rname + ";".join([""] + [str(bound) for bound in intron]) for intron in sorted(list(introns))]
            if instance:
                for intron in introns:
                    retrieved_intron_counts[intron] += 1
                print >> output_stream, "%s\t%s\tr" % (name, "\t".join(introns))
            else:
                for intron in introns:
                    retrieved_intron_counts[intron] += 1
                    print >> output_stream, "%s;%s\t%s\tr" % (name, intron, intron)
        except IndexError:
            print >> sys.stderr, ("Error found on line: " + line)
            raise
예제 #3
0
def go(true_bed_stream, sam_stream=sys.stdin, generous=False,
        base_threshold=0.5, clip_threshold=1.0, dump_incorrect=False,
        temp_dir=None, ignore_spliced_reads=False):
    """ Finds relevant and retrieved instance counts.

        true_bed_stream: file handle for BED output of Flux simulation
        sam_stream: where to read in aligner's mappings
        generous: True iff aligner cuts off /1 or /2 of a given read
        base_threshold: proportion of a read's bases that must align
            correctly for a read to be considered a correct mapping
        clip_threshold: proportion of a read's bases that must be clipped
            for a read to be considered unmapped
        dump_incorrect: write incorrect (read) alignments to stderr
        ignore_spliced_reads: ignores all spliced reads
    """
    from tempdel import remove_temporary_directories
    import tempfile
    import atexit
    if temp_dir is None:
        temp_dir_path = tempfile.mkdtemp()
    else:
        try:
            temp_dir_path = tempfile.mkdtemp(dir=temp_dir)
        except:
            temp_dir_path = tempfile.mkdtemp()
    #print >>sys.stderr, temp_dir_path
    atexit.register(remove_temporary_directories, [temp_dir_path])
    # Store everything in one file, then sort it on read name
    combined_file = os.path.join(temp_dir_path, 'combined.temp')
    with open(combined_file, 'w') as temp_stream:
        if ignore_spliced_reads:
            if generous:
                for line in true_bed_stream:
                    tokens = line.strip().split('\t')
                    if ',' in tokens[-1]: continue # skip intron line
                    print >>temp_stream, '\t'.join([tokens[3][:-2], '0']
                                                    + tokens[:3] + tokens[4:])
            else:
                for line in true_bed_stream:
                    tokens = line.strip().split('\t')
                    if ',' in tokens[-1]: continue # skip intron line
                    print >>temp_stream, '\t'.join(
                                    [tokens[3], '0'] + tokens[:3] + tokens[4:]
                                )
            for line in sam_stream:
                if line[0] == '@' or not line.strip(): continue
                tokens = line.strip().split('\t')
                if 'N' in tokens[5]: continue # skip intron line
                print >>temp_stream, '\t'.join([tokens[0], '1'] + tokens[1:])
        else:
            if generous:
                for line in true_bed_stream:
                    tokens = line.strip().split('\t')
                    print >>temp_stream, '\t'.join([tokens[3][:-2], '0']
                                                    + tokens[:3] + tokens[4:])
            else:
                for line in true_bed_stream:
                    tokens = line.strip().split('\t')
                    print >>temp_stream, '\t'.join(
                                    [tokens[3], '0'] + tokens[:3] + tokens[4:]
                                )
            for line in sam_stream:
                if line[0] == '@' or not line.strip(): continue
                tokens = line.strip().split('\t')
                print >>temp_stream, '\t'.join([tokens[0], '1'] + tokens[1:])
    import subprocess
    sorted_combined_file = os.path.join(temp_dir_path, 'combined.sorted.temp')
    subprocess.check_call(' '.join(['sort -T %s -k1,1 -k2,2n'
                                        % temp_dir_path, combined_file, 
                                        '>', sorted_combined_file]),
                            bufsize=-1, shell=True)
    basewise_relevant, read_relevant = 0, 0
    # Initialize counters for computing accuracy metrics
    basewise_retrieved, basewise_intersection = 0, 0
    read_retrieved, read_intersection = 0, 0
    with open(sorted_combined_file) as sorted_combined_stream:
        for (name,), xpartition in xstream(sorted_combined_stream, 1):
            '''Dict mapping read names to alignments
            (chrom, 1-based start, 1-based end)'''
            true_maps = []
            saved = []
            for tokens in xpartition:
                saved.append(tokens)
                if tokens[0] == '0':
                    if len(tokens) < 12:
                        continue
                    chrom = tokens[1]
                    chrom_start = int(tokens[2])
                    chrom_end = int(tokens[3])
                    block_sizes = tokens[10].split(',')
                    block_starts = tokens[11].split(',')
                    # Handle trailing commas
                    try:
                        int(block_sizes[-1])
                    except ValueError:
                        block_sizes = block_sizes[:-1]
                    try:
                        int(block_starts[-1])
                    except ValueError:
                        block_starts = block_starts[:-1]
                    block_count = len(block_sizes)
                    assert block_count == len(block_starts)
                    exons = [(chrom,
                                chrom_start + int(block_starts[i]),
                                chrom_start + int(block_starts[i])
                                + int(block_sizes[i]))
                                for i in xrange(block_count)]
                    true_maps.append(exons)
                    basewise_relevant += sum([int(block_size) for block_size
                                                in block_sizes])
                    read_relevant += 1
                elif tokens[0] == '1':
                    flag = int(tokens[1])
                    if flag & 256 or flag & 4:
                        '''Secondary alignment or unmapped and thus not
                        retrieved; ignore'''
                        continue
                    cigar, pos, seq = tokens[5], int(tokens[3]), tokens[9]
                    (dummy_md, mapped,
                        unmapped, clip_count, read_length) \
                        = dummy_md_and_mapped_offsets(
                                            cigar,
                                            clip_threshold=clip_threshold
                                        )
                    if unmapped:
                        # Too much clipping
                        continue
                    basewise_retrieved += read_length - clip_count
                    read_retrieved += 1
                    if not true_maps:
                        assert ignore_spliced_reads
                        continue
                    # Try both /1 and /2; choose the best basewise result
                    intersected_base_count = 0
                    for true_map in true_maps:
                        if tokens[2] != true_map[0][0]:
                            '''chr is wrong, but this is still counted as a
                            retrieval above'''
                            continue
                        base_counter, base_truths = 0, set()
                        '''Each tuple in base_truths is
                        (index of base in read, mapped location)'''
                        for block in true_map:
                            base_truths.update([(base_counter + i, j + 1)
                                                    for i, j in enumerate(
                                                        xrange(
                                                            block[1], block[2]
                                                        ))])
                            base_counter += block[2] - block[1]
                        base_predictions = set()
                        if unmapped:
                            # Too much clipping
                            continue
                        _, _, _, exons, _ = indels_junctions_exons_mismatches(
                                                        cigar,
                                                        dummy_md, pos, seq,
                                                        drop_deletions=True
                                                    )
                        mapped_index = 0
                        for exon in exons:
                            base_predictions.update(
                                        [(mapped[mapped_index + i], j)
                                                  for i, j in enumerate(
                                                    xrange(
                                                        exon[0], exon[1]
                                                    ))])
                            mapped_index += exon[1] - exon[0]
                        intersected_base_count = max(intersected_base_count,
                                len(
                                    base_predictions.intersection(base_truths)
                                ))
                    basewise_intersection += intersected_base_count
                    if intersected_base_count >= read_length * base_threshold:
                        read_intersection += 1
                    elif dump_incorrect:
                        # Incorrect alignment; write to stderr
                        print >>sys.stderr, '\t'.join(
                                ['.'.join(line) for line in saved]
                            )
                else:
                    raise RuntimeError(
                                'Invalid intermediate line.'
                            )
    return (basewise_retrieved, basewise_relevant, basewise_intersection,
            read_retrieved, read_relevant, read_intersection)
예제 #4
0
def go(input_stream=sys.stdin,
       output_stream=sys.stdout,
       fudge=5,
       stranded=False,
       verbose=False,
       max_refs=300,
       report_multiplier=1.2):
    """ Emits junction combinations associated with reads.

        Soft-clipped Bowtie 2 alignments of read sequences to the transcript
        fragment index are used infer which cojunctions could possibly be
        overlapped by reads. Then maximal cliques of the graph described in
        the maximal_cliques() function are enumerated to obtain which
        junction combinations could possibly be overlapped by reads.

        input_stream: where to retrieve Bowtie 2 output
        output_stream: where to emit exon and junction tuples; typically, this
            is sys.stdout.
        fudge: by how many bases to extend left and right extend sizes
            to accommodate potential indels
        stranded: True iff input reads are strand-specific; this affects
            whether an output partition has a terminal '+' or '-' indicating
            the sense strand. Further, if stranded is True, an alignment is
            returned only if its strand agrees with the junction's strand.
        verbose: True if alignments should occasionally be written to stderr.
        max_refs: maximum number of reference sequences to enumerate per read;
            if more are present, prioritize those sequences that overlap
            the fewest junctions
        report_multiplier: if verbose is True, the line number of an
            alignment written to stderr increases exponentially with base
            report_multiplier.
    """
    output_line_count, next_report_line, i = 0, 0, 0
    for (qname, ), xpartition in xstream(input_stream, 1):
        '''While labeled multireadlet, this list may end up simply a
        unireadlet.'''
        multiread = []
        for tokens in xpartition:
            flag = int(tokens[0])
            if verbose and next_report_line == i:
                print >>sys.stderr, \
                    'SAM output record %d: rdname="%s", flag=%d' % (i,
                                                                    qname,
                                                                    flag)
                next_report_line = int(
                    (next_report_line + 1) * report_multiplier + 1) - 1
            i += 1
            multiread.append((qname, ) + tokens)
        if flag & 4: continue
        cojunctions, all_junctions = defaultdict(set), {}
        for alignment in multiread_with_junctions(multiread, stranded):
            cigar = alignment[5]
            md = [field for field in alignment if field[:5] == 'MD:Z:'][0][5:]
            pos = int(alignment[3])
            seq = alignment[9]
            reversed_complement_seq = seq[::-1].translate(
                _reversed_complement_translation_table)
            if seq < reversed_complement_seq:
                seq_to_print = seq
            else:
                seq_to_print = reversed_complement_seq
            seq_size = len(seq)
            rname = alignment[2]
            sense = [field for field in alignment
                     if field[:5] == 'XS:A:'][0][5:]
            if (rname, sense) not in all_junctions:
                all_junctions[(rname, sense)] = defaultdict(list)
            _, _, junctions, _, _ = indels_junctions_exons_mismatches(
                cigar, md, pos, seq, junctions_only=True)
            cojunctions[(rname, sense)].add(
                tuple([(junction[0], junction[1]) for junction in junctions]))
            for junction in junctions:
                if (junction[0], junction[1]) \
                    not in all_junctions[(rname, sense)]:
                    all_junctions[(rname, sense)][(junction[0], junction[1])] \
                        = [junction[2], junction[3]]
                else:
                    all_junctions[(rname, sense)][(
                        junction[0], junction[1])][0] = max(
                            all_junctions[(rname, sense)][(junction[0],
                                                           junction[1])][0],
                            junction[2])
                    all_junctions[(rname, sense)][(
                        junction[0], junction[1])][1] = max(
                            all_junctions[(rname, sense)][(junction[0],
                                                           junction[1])][1],
                            junction[3])
        for rname, sense in all_junctions:
            to_write = set()
            for cojunction in selected_cojunctions(
                    paths_from_cojunctions(list(cojunctions[(rname, sense)]),
                                           span=(seq_size + fudge)),
                    max_refs=max_refs,
                    seq=seq,
                    rname=rname,
                    sense=sense):
                left_extend_size = all_junctions[(rname,
                                                  sense)][cojunction[0]][0]
                right_extend_size = all_junctions[(rname,
                                                   sense)][cojunction[-1]][1]
                to_write.add(
                    ('{rname}{sense}\t{starts}'
                     '\t{ends}\t{left_size}'
                     '\t{right_size}\t{seq}').format(
                         rname=rname,
                         sense=sense,
                         starts=','.join(
                             [str(junction[0]) for junction in cojunction]),
                         ends=','.join(
                             [str(junction[1]) for junction in cojunction]),
                         left_size=(left_extend_size + fudge),
                         right_size=(right_extend_size + fudge),
                         seq=seq_to_print))
            counter.add('paths_out', len(to_write))
            for line_to_write in to_write:
                print line_to_write
                output_line_count += 1
    output_stream.flush()
    print >> sys.stderr, (
        'cojunction_enum_delegate.py reports %d output lines.' %
        output_line_count)
예제 #5
0
def go(true_bed_stream,
       sam_stream=sys.stdin,
       generous=False,
       base_threshold=0.5,
       clip_threshold=1.0,
       dump_incorrect=False,
       temp_dir=None,
       ignore_spliced_reads=False):
    """ Finds relevant and retrieved instance counts.

        true_bed_stream: file handle for BED output of Flux simulation
        sam_stream: where to read in aligner's mappings
        generous: True iff aligner cuts off /1 or /2 of a given read
        base_threshold: proportion of a read's bases that must align
            correctly for a read to be considered a correct mapping
        clip_threshold: proportion of a read's bases that must be clipped
            for a read to be considered unmapped
        dump_incorrect: write incorrect (read) alignments to stderr
        ignore_spliced_reads: ignores all spliced reads
    """
    from tempdel import remove_temporary_directories
    import tempfile
    import atexit
    if temp_dir is None:
        temp_dir_path = tempfile.mkdtemp()
    else:
        try:
            temp_dir_path = tempfile.mkdtemp(dir=temp_dir)
        except:
            temp_dir_path = tempfile.mkdtemp()
    #print >>sys.stderr, temp_dir_path
    atexit.register(remove_temporary_directories, [temp_dir_path])
    # Store everything in one file, then sort it on read name
    combined_file = os.path.join(temp_dir_path, 'combined.temp')
    with open(combined_file, 'w') as temp_stream:
        if ignore_spliced_reads:
            if generous:
                for line in true_bed_stream:
                    tokens = line.strip().split('\t')
                    if ',' in tokens[-1]: continue  # skip intron line
                    print >> temp_stream, '\t'.join([tokens[3][:-2], '0'] +
                                                    tokens[:3] + tokens[4:])
            else:
                for line in true_bed_stream:
                    tokens = line.strip().split('\t')
                    if ',' in tokens[-1]: continue  # skip intron line
                    print >> temp_stream, '\t'.join([tokens[3], '0'] +
                                                    tokens[:3] + tokens[4:])
            for line in sam_stream:
                if line[0] == '@' or not line.strip(): continue
                tokens = line.strip().split('\t')
                if 'N' in tokens[5]: continue  # skip intron line
                print >> temp_stream, '\t'.join([tokens[0], '1'] + tokens[1:])
        else:
            if generous:
                for line in true_bed_stream:
                    tokens = line.strip().split('\t')
                    print >> temp_stream, '\t'.join([tokens[3][:-2], '0'] +
                                                    tokens[:3] + tokens[4:])
            else:
                for line in true_bed_stream:
                    tokens = line.strip().split('\t')
                    print >> temp_stream, '\t'.join([tokens[3], '0'] +
                                                    tokens[:3] + tokens[4:])
            for line in sam_stream:
                if line[0] == '@' or not line.strip(): continue
                tokens = line.strip().split('\t')
                print >> temp_stream, '\t'.join([tokens[0], '1'] + tokens[1:])
    import subprocess
    sorted_combined_file = os.path.join(temp_dir_path, 'combined.sorted.temp')
    subprocess.check_call(' '.join([
        'sort -T %s -k1,1 -k2,2n' % temp_dir_path, combined_file, '>',
        sorted_combined_file
    ]),
                          bufsize=-1,
                          shell=True)
    basewise_relevant, read_relevant = 0, 0
    # Initialize counters for computing accuracy metrics
    basewise_retrieved, basewise_intersection = 0, 0
    read_retrieved, read_intersection = 0, 0
    with open(sorted_combined_file) as sorted_combined_stream:
        for (name, ), xpartition in xstream(sorted_combined_stream, 1):
            '''Dict mapping read names to alignments
            (chrom, 1-based start, 1-based end)'''
            true_maps = []
            saved = []
            for tokens in xpartition:
                saved.append(tokens)
                if tokens[0] == '0':
                    if len(tokens) < 12:
                        continue
                    chrom = tokens[1]
                    chrom_start = int(tokens[2])
                    chrom_end = int(tokens[3])
                    block_sizes = tokens[10].split(',')
                    block_starts = tokens[11].split(',')
                    # Handle trailing commas
                    try:
                        int(block_sizes[-1])
                    except ValueError:
                        block_sizes = block_sizes[:-1]
                    try:
                        int(block_starts[-1])
                    except ValueError:
                        block_starts = block_starts[:-1]
                    block_count = len(block_sizes)
                    assert block_count == len(block_starts)
                    exons = [(chrom, chrom_start + int(block_starts[i]),
                              chrom_start + int(block_starts[i]) +
                              int(block_sizes[i]))
                             for i in xrange(block_count)]
                    true_maps.append(exons)
                    basewise_relevant += sum(
                        [int(block_size) for block_size in block_sizes])
                    read_relevant += 1
                elif tokens[0] == '1':
                    flag = int(tokens[1])
                    if flag & 256 or flag & 4:
                        '''Secondary alignment or unmapped and thus not
                        retrieved; ignore'''
                        continue
                    cigar, pos, seq = tokens[5], int(tokens[3]), tokens[9]
                    (dummy_md, mapped,
                        unmapped, clip_count, read_length) \
                        = dummy_md_and_mapped_offsets(
                                            cigar,
                                            clip_threshold=clip_threshold
                                        )
                    if unmapped:
                        # Too much clipping
                        continue
                    basewise_retrieved += read_length - clip_count
                    read_retrieved += 1
                    if not true_maps:
                        assert ignore_spliced_reads
                        continue
                    # Try both /1 and /2; choose the best basewise result
                    intersected_base_count = 0
                    for true_map in true_maps:
                        if tokens[2] != true_map[0][0]:
                            '''chr is wrong, but this is still counted as a
                            retrieval above'''
                            continue
                        base_counter, base_truths = 0, set()
                        '''Each tuple in base_truths is
                        (index of base in read, mapped location)'''
                        for block in true_map:
                            base_truths.update([(base_counter + i, j + 1)
                                                for i, j in enumerate(
                                                    xrange(block[1], block[2]))
                                                ])
                            base_counter += block[2] - block[1]
                        base_predictions = set()
                        if unmapped:
                            # Too much clipping
                            continue
                        _, _, _, exons, _ = indels_junctions_exons_mismatches(
                            cigar, dummy_md, pos, seq, drop_deletions=True)
                        mapped_index = 0
                        for exon in exons:
                            base_predictions.update([
                                (mapped[mapped_index + i], j)
                                for i, j in enumerate(xrange(exon[0], exon[1]))
                            ])
                            mapped_index += exon[1] - exon[0]
                        intersected_base_count = max(
                            intersected_base_count,
                            len(base_predictions.intersection(base_truths)))
                    basewise_intersection += intersected_base_count
                    if intersected_base_count >= read_length * base_threshold:
                        read_intersection += 1
                    elif dump_incorrect:
                        # Incorrect alignment; write to stderr
                        print >> sys.stderr, '\t'.join(
                            ['.'.join(line) for line in saved])
                else:
                    raise RuntimeError('Invalid intermediate line.')
    return (basewise_retrieved, basewise_relevant, basewise_intersection,
            read_retrieved, read_relevant, read_intersection)
예제 #6
0
def go(input_stream=sys.stdin, output_stream=sys.stdout, fudge=5,
        stranded=False, verbose=False, max_refs=300, report_multiplier=1.2):
    """ Emits junction combinations associated with reads.

        Soft-clipped Bowtie 2 alignments of read sequences to the transcript
        fragment index are used infer which cojunctions could possibly be
        overlapped by reads. Then maximal cliques of the graph described in
        the maximal_cliques() function are enumerated to obtain which
        junction combinations could possibly be overlapped by reads.

        input_stream: where to retrieve Bowtie 2 output
        output_stream: where to emit exon and junction tuples; typically, this
            is sys.stdout.
        fudge: by how many bases to extend left and right extend sizes
            to accommodate potential indels
        stranded: True iff input reads are strand-specific; this affects
            whether an output partition has a terminal '+' or '-' indicating
            the sense strand. Further, if stranded is True, an alignment is
            returned only if its strand agrees with the junction's strand.
        verbose: True if alignments should occasionally be written to stderr.
        max_refs: maximum number of reference sequences to enumerate per read;
            if more are present, prioritize those sequences that overlap
            the fewest junctions
        report_multiplier: if verbose is True, the line number of an
            alignment written to stderr increases exponentially with base
            report_multiplier.
    """
    output_line_count, next_report_line, i = 0, 0, 0
    for (qname,), xpartition in xstream(input_stream, 1):
        '''While labeled multireadlet, this list may end up simply a
        unireadlet.'''
        multiread = []
        for tokens in xpartition:
            flag = int(tokens[0])
            if verbose and next_report_line == i:
                print >>sys.stderr, \
                    'SAM output record %d: rdname="%s", flag=%d' % (i,
                                                                    qname,
                                                                    flag)
                next_report_line = int((next_report_line + 1)
                                        * report_multiplier + 1) - 1
            i += 1
            multiread.append((qname,) + tokens)
        if flag & 4: continue
        corrected_multiread = multiread_with_junctions(multiread,
                                                        stranded)
        cojunctions, all_junctions = defaultdict(set), {}
        for alignment in multiread_with_junctions(multiread, stranded):
            cigar = alignment[5]
            md = [field for field in alignment
                    if field[:5] == 'MD:Z:'][0][5:]
            pos = int(alignment[3])
            seq = alignment[9]
            reversed_complement_seq = seq[::-1].translate(
                    _reversed_complement_translation_table
                )
            if seq < reversed_complement_seq:
                seq_to_print = seq
            else:
                seq_to_print = reversed_complement_seq
            seq_size = len(seq)
            rname = alignment[2]
            sense = [field for field in alignment
                        if field[:5] == 'XS:A:'][0][5:]
            if (rname, sense) not in all_junctions:
                all_junctions[(rname, sense)] = defaultdict(list)
            _, _, junctions, _, _ = indels_junctions_exons_mismatches(
                                                cigar, md, pos, seq,
                                                junctions_only=True
                                            )
            cojunctions[(rname, sense)].add(
                    tuple([(junction[0], junction[1])
                                for junction in junctions])
                )
            for junction in junctions:
                if (junction[0], junction[1]) \
                    not in all_junctions[(rname, sense)]:
                    all_junctions[(rname, sense)][(junction[0], junction[1])] \
                        = [junction[2], junction[3]]
                else:
                    all_junctions[(rname, sense)][
                            (junction[0], junction[1])
                        ][0] = max(all_junctions[(rname, sense)][
                                (junction[0], junction[1])
                            ][0], junction[2])
                    all_junctions[(rname, sense)][
                            (junction[0], junction[1])
                        ][1] = max(all_junctions[(rname, sense)][
                                (junction[0], junction[1])
                            ][1], junction[3])
        for rname, sense in all_junctions:
            to_write = set()
            for cojunction in selected_cojunctions(paths_from_cojunctions(
                    list(cojunctions[(rname, sense)]), span=(seq_size + fudge)
                ), max_refs=max_refs, seq=seq, rname=rname, sense=sense):
                left_extend_size = all_junctions[(rname, sense)][
                                        cojunction[0]
                                    ][0]
                right_extend_size = all_junctions[(rname, sense)][
                                        cojunction[-1]
                                    ][1]
                to_write.add(('{rname}{sense}\t{starts}'
                       '\t{ends}\t{left_size}'
                       '\t{right_size}\t{seq}').format(
                            rname=rname,
                            sense=sense,
                            starts=','.join(
                                    [str(junction[0])
                                        for junction in cojunction]
                                ),
                            ends=','.join(
                                    [str(junction[1])
                                        for junction in cojunction]
                                ),
                            left_size=(left_extend_size
                                        + fudge),
                            right_size=(right_extend_size
                                        + fudge),
                            seq=seq_to_print
                       ))
            for line_to_write in to_write:
                print line_to_write
                output_line_count += 1
    output_stream.flush()
    print >>sys.stderr, ('cojunction_enum_delegate.py reports %d output lines.'
                            % output_line_count)