Python group_by Examples, Sequencing.utilities.group_by Python Examples

Example #1

0

Show file

File: visualize_structure.py Project: pinardemetci/sequencing

def produce_bowtie2_alignments(
    reads,
    index_prefix,
    genome_dir,
    score_min,
):

    bowtie2_options = {
        'local': True,
        'report_up_to': 10,
        'seed_mismatches': 1,
        'seed_interval_function': 'C,1,0',
        'seed_length': 10,
    }

    sam_file, mappings = mapping_tools.map_bowtie2(index_prefix,
                                                   reads=reads,
                                                   custom_binary=True,
                                                   score_min=score_min,
                                                   yield_mappings=True,
                                                   **bowtie2_options)

    base_lookup = genomes.build_base_lookup(genome_dir, sam_file)

    mapping_groups = utilities.group_by(mappings, lambda m: m.qname)

    for qname, group in mapping_groups:
        group = sorted(group, key=lambda m: (m.tid, m.pos))
        alignments = [
            mapping_to_alignment(mapping, sam_file, base_lookup)
            for mapping in group if not mapping.is_unmapped
        ]
        yield qname, alignments

Example #2

0

Show file

File: visualize_structure.py Project: jeffhussmann/sequencing

def produce_bowtie2_alignments(reads,
                               index_prefix,
                               genome_dir,
                               score_min,
                              ):

    bowtie2_options = {'local': True,
                       #'report_all': True,
                       'report_up_to': 10,
                       'seed_mismatches': 1,
                       'seed_interval_function': 'C,1,0',
                       'seed_length': 10,
                      }

    sam_file, mappings = mapping_tools.map_bowtie2(index_prefix,
                                                   reads=reads,
                                                   custom_binary=True,
                                                   score_min=score_min,
                                                   yield_mappings=True,
                                                   **bowtie2_options)

    region_fetcher = genomes.build_region_fetcher(genome_dir, load_references=True)

    mapping_groups = utilities.group_by(mappings, lambda m: m.qname)
    
    for qname, group in mapping_groups:
        group = sorted(group, key=lambda m: (m.tid, m.pos))
        alignments = [mapping_to_alignment(mapping, sam_file, region_fetcher)
                      for mapping in group if not mapping.is_unmapped]
        yield qname, alignments

Example #3

0

Show file

File: visualize_structure.py Project: jeffhussmann/sequencing

def produce_bowtie2_alignments_old(reads,
                                   sam_fn,
                                   index_prefix,
                                   genome_dir,
                                   score_min,
                                  ):

    bowtie2_options = {'local': True,
                       #'report_all': True,
                       'report_up_to': 10,
                       'seed_mismatches': 1,
                       'seed_interval_function': 'C,1,0',
                       'seed_length': 10,
                       #'threads': 12,
                      }

   
    mapping_tools.map_bowtie2(index_prefix,
                              None,
                              None,
                              sam_fn,
                              unpaired_Reads=reads,
                              custom_binary=True,
                              score_min=score_min,
                              **bowtie2_options)
    
    sam_file = pysam.Samfile(sam_fn)
    region_fetcher = genomes.build_region_fetcher(genome_dir, load_references=True)

    mapping_groups = utilities.group_by(sam_file, lambda m: m.qname)
    
    for qname, group in mapping_groups:
        alignments = [mapping_to_alignment(mapping, sam_file, region_fetcher)
                      for mapping in group if not mapping.is_unmapped]
        yield qname, alignments

Example #4

0

Show file

File: three_p_experiment.py Project: AlexeyG/ribosomes

    def filter_mappings(self):
        num_unmapped = 0
        num_entirely_genomic = 0
        num_nonunique = 0
        num_unique = 0

        nongenomic_lengths = Counter()

        sam_file = pysam.Samfile(self.file_names['accepted_hits'])
    
        region_fetcher = genomes.build_region_fetcher(self.file_names['genome'],
                                                      load_references=True,
                                                      sam_file=sam_file,
                                                     )

        extended_sorter = sam.AlignmentSorter(sam_file.references,
                                              sam_file.lengths,
                                              self.file_names['extended'],
                                             )
        filtered_sorter = sam.AlignmentSorter(sam_file.references,
                                              sam_file.lengths,
                                              self.file_names['extended_filtered'],
                                             )

        extended_mappings = (trim.extend_polyA_end(mapping, region_fetcher) for mapping in sam_file)
        mapping_groups = utilities.group_by(extended_mappings, lambda m: m.qname)

        with extended_sorter, filtered_sorter:
            for qname, group in mapping_groups:
                for m in group:
                    extended_sorter.write(m)

                min_nongenomic_length = min(trim.get_nongenomic_length(m) for m in group)
                nongenomic_lengths[min_nongenomic_length] += 1
                if min_nongenomic_length == 0:
                    num_entirely_genomic += 1
                    continue
                
                nonunique = len(group) > 1 or any(m.mapq < 40 for m in group)
                if nonunique:
                    num_nonunique += 1
                    continue
                
                num_unique += 1
                
                for m in group:
                    filtered_sorter.write(m)

        self.summary.extend(
            [('Mapped with no non-genomic A\'s', num_entirely_genomic),
             ('Nonunique', num_nonunique),
             ('Unique', num_unique),
            ],
        )

        nongenomic_lengths = utilities.counts_to_array(nongenomic_lengths)
        self.write_file('nongenomic_lengths', nongenomic_lengths)

Example #5

0

Show file

def _consolidate_counts(positions_list):
    positions_list = sorted(positions_list)
    consolidated_list = []
    for position, items in utilities.group_by(positions_list,
                                              key=lambda x: x[:4],
                                             ):
        ref_seq_name, ref_pos, ref_char, read_char = position
        count = sum(item[4] for item in items)
        consolidated = (ref_seq_name, ref_pos, ref_char, read_char, count)
        consolidated_list.append(consolidated)
    return consolidated_list

Example #6

0

Show file

File: ref_positions.py Project: jeffhussmann/sequencing

def _consolidate_counts(positions_list):
    positions_list = sorted(positions_list)
    consolidated_list = []
    for position, items in utilities.group_by(positions_list,
                                              key=lambda x: x[:4],
                                             ):
        ref_seq_name, ref_pos, ref_char, read_char = position
        count = sum(item[4] for item in items)
        consolidated = (ref_seq_name, ref_pos, ref_char, read_char, count)
        consolidated_list.append(consolidated)
    return consolidated_list

Example #7

0

Show file

    def collapse_fragments(self):
        get_position = annotation.make_convertor(self.MappingAnnotation,
                                                 self.PositionAnnotation,
                                                )
        get_fragment = annotation.make_convertor(self.MappingAnnotation,
                                                 self.FragmentAnnotation,
                                                )
        amplification_counts = Counter()

        sq_lines = sam.get_sq_lines(self.merged_file_names['sorted_clean_sam'])
        sam_lines = self.get_sorted_sam_lines()

        with open(self.file_names['collapsed_sam'], 'w') as collapsed_fh, \
             open(self.file_names['collisions'], 'w') as collisions_fh:
            
            for sq_line in sq_lines:
                collapsed_fh.write(sq_line)

            position_groups = utilities.group_by(sam_lines, get_position)
            for position_annotation, position_lines in position_groups:
                fragment_counts = Counter()
                position_count = len(position_lines)
                fragment_groups = utilities.group_by(position_lines, get_fragment)
                for fragment_annotation, fragment_lines in fragment_groups:
                    fragment_count = len(fragment_lines)
                    fragment_counts[fragment_count] += 1
                    amplification_counts['{},{}'.format(position_count, fragment_count)] += 1
                    collapsed_annotation = self.CollapsedAnnotation(count=fragment_count, **fragment_annotation)
                    new_line = sam.splice_in_name(fragment_lines[0], collapsed_annotation.identifier)
                    collapsed_fh.write(new_line)
                fragment_counts = utilities.counts_to_array(fragment_counts)
                if position_count > 100:
                    collisions_fh.write(position_annotation.identifier + '\n')
                    collisions_fh.write(','.join(map(str, fragment_counts)) + '\n')

        sam.make_sorted_bam(self.file_names['collapsed_sam'],
                            self.file_names['collapsed_bam'],
                           )

        self.write_file('amplification_counts', amplification_counts)

Example #8

0

Show file

File: fastq.py Project: pinardemetci/sequencing

def shade_background(start, sequence, ax=None, save_as=None):
    ''' Lightly shade the background according to the expected sequence.
    '''
    for p, expected_bases in enumerate(sequence):
        expected_bases = [k for k, _ in utilities.group_by(expected_bases)]
        increment = 1. / len(expected_bases)
        for i, base in enumerate(expected_bases):
            ax.axvspan(
                start + p - 0.5,
                start + p + 0.5,
                ymax=1 - i * increment,
                ymin=1 - (i + 1) * increment,
                facecolor=igv_colors.normalized_rgbs[base],
                alpha=0.3,
                linewidth=0.7,
            )

Example #9

0

Show file

File: TL_seq_experiment.py Project: AlexeyG/ribosomes

    def combine_mappings(self):
        num_unmapped = 0
        num_nonunique = 0
        num_unique = 0

        mappings = pysam.Samfile(self.file_names['accepted_hits'])
        unmapped = pysam.Samfile(self.file_names['unmapped_bam'])
        merged = sam.merge_by_name(mappings, unmapped)
        grouped = utilities.group_by(merged, lambda m: m.qname)

        alignment_sorter = sam.AlignmentSorter(mappings.references,
                                               mappings.lengths,
                                               self.file_names['bam'],
                                              )
        with alignment_sorter:
            for qname, group in grouped:
                unmapped = any(m.is_unmapped for m in group)
                if unmapped:
                    num_unmapped += 1
                    continue

                nonunique = len(group) > 1 or any(m.mapq < 40 for m in group)
                if nonunique:
                    num_nonunique += 1
                else:
                    num_unique += 1

                for mapping in group:
                    alignment_sorter.write(mapping)
            
        self.summary.extend(
            [('Unmapped', num_unmapped),
             ('Nonunique', num_nonunique),
             ('Unique', num_unique),
            ],
        )

Example #10

0

Show file

File: TL_seq_experiment.py Project: sameer-aryal/ribosomes

    def combine_mappings(self):
        num_unmapped = 0
        num_nonunique = 0
        num_unique = 0

        mappings = pysam.Samfile(self.file_names['accepted_hits'])
        unmapped = pysam.Samfile(self.file_names['unmapped_bam'])
        merged = sam.merge_by_name(mappings, unmapped)
        grouped = utilities.group_by(merged, lambda m: m.qname)

        alignment_sorter = sam.AlignmentSorter(
            mappings.references,
            mappings.lengths,
            self.file_names['bam'],
        )
        with alignment_sorter:
            for qname, group in grouped:
                unmapped = any(m.is_unmapped for m in group)
                if unmapped:
                    num_unmapped += 1
                    continue

                nonunique = len(group) > 1 or any(m.mapq < 40 for m in group)
                if nonunique:
                    num_nonunique += 1
                else:
                    num_unique += 1

                for mapping in group:
                    alignment_sorter.write(mapping)

        self.summary.extend([
            ('Unmapped', num_unmapped),
            ('Nonunique', num_nonunique),
            ('Unique', num_unique),
        ], )

Example #11

0

Show file

    def filter_mappings(self):
        num_unmapped = 0
        num_entirely_genomic = 0
        num_nonunique = 0
        num_unique = 0

        nongenomic_lengths = Counter()

        sam_file = pysam.Samfile(self.file_names['accepted_hits'])

        region_fetcher = genomes.build_region_fetcher(
            self.file_names['genome'],
            load_references=True,
            sam_file=sam_file,
        )

        extended_sorter = sam.AlignmentSorter(
            sam_file.references,
            sam_file.lengths,
            self.file_names['extended'],
        )
        filtered_sorter = sam.AlignmentSorter(
            sam_file.references,
            sam_file.lengths,
            self.file_names['extended_filtered'],
        )

        extended_mappings = (trim.extend_polyA_end(mapping, region_fetcher)
                             for mapping in sam_file)
        mapping_groups = utilities.group_by(extended_mappings,
                                            lambda m: m.qname)

        with extended_sorter, filtered_sorter:
            for qname, group in mapping_groups:
                for m in group:
                    extended_sorter.write(m)

                min_nongenomic_length = min(
                    trim.get_nongenomic_length(m) for m in group)
                nongenomic_lengths[min_nongenomic_length] += 1
                if min_nongenomic_length == 0:
                    num_entirely_genomic += 1
                    continue

                nonunique = len(group) > 1 or any(m.mapq < 40 for m in group)
                if nonunique:
                    num_nonunique += 1
                    continue

                num_unique += 1

                for m in group:
                    filtered_sorter.write(m)

        self.summary.extend([
            ('Mapped with no non-genomic A\'s', num_entirely_genomic),
            ('Nonunique', num_nonunique),
            ('Unique', num_unique),
        ], )

        nongenomic_lengths = utilities.counts_to_array(nongenomic_lengths)
        self.write_file('nongenomic_lengths', nongenomic_lengths)

Example #12

0

Show file

File: sw.py Project: pinardemetci/sequencing

def align_reads(
    target_fasta_fn,
    reads,
    bam_fn,
    min_path_length=15,
    error_fn='/dev/null',
    alignment_type='overlap',
):
    ''' Aligns reads to targets in target_fasta_fn by Smith-Waterman, storing
    alignments in bam_fn and yielding unaligned reads.
    '''
    targets = {r.name: r.seq for r in fasta.reads(target_fasta_fn)}

    target_names = sorted(targets)
    target_lengths = [len(targets[n]) for n in target_names]
    alignment_sorter = sam.AlignmentSorter(
        target_names,
        target_lengths,
        bam_fn,
    )
    statistics = Counter()

    with alignment_sorter:
        for original_read in reads:
            statistics['input'] += 1

            alignments = []

            rc_read = fastq.Read(
                original_read.name,
                utilities.reverse_complement(original_read.seq),
                original_read.qual[::-1],
            )

            for read, is_reverse in ([original_read, False], [rc_read, True]):
                qual = fastq.decode_sanger(read.qual)
                for target_name, target_seq in targets.iteritems():
                    alignment = generate_alignments(read.seq, target_seq,
                                                    alignment_type)[0]
                    path = alignment['path']
                    if len(path) >= min_path_length and alignment['score'] / (
                            2. * len(path)) > 0.8:
                        aligned_segment = pysam.AlignedSegment()
                        aligned_segment.seq = read.seq
                        aligned_segment.query_qualities = qual
                        aligned_segment.is_reverse = is_reverse

                        char_pairs = make_char_pairs(path, read.seq,
                                                     target_seq)

                        cigar = sam.aligned_pairs_to_cigar(char_pairs)
                        clip_from_start = first_query_index(path)
                        if clip_from_start > 0:
                            cigar = [(sam.BAM_CSOFT_CLIP, clip_from_start)
                                     ] + cigar
                        clip_from_end = len(
                            read.seq) - 1 - last_query_index(path)
                        if clip_from_end > 0:
                            cigar = cigar + [
                                (sam.BAM_CSOFT_CLIP, clip_from_end)
                            ]
                        aligned_segment.cigar = cigar

                        read_aligned, ref_aligned = zip(*char_pairs)
                        md = sam.alignment_to_MD_string(
                            ref_aligned, read_aligned)
                        aligned_segment.set_tag('MD', md)

                        aligned_segment.set_tag('AS', alignment['score'])
                        aligned_segment.tid = alignment_sorter.get_tid(
                            target_name)
                        aligned_segment.query_name = read.name
                        aligned_segment.next_reference_id = -1
                        aligned_segment.reference_start = first_target_index(
                            path)

                        alignments.append(aligned_segment)

            if alignments:
                statistics['aligned'] += 1

                sorted_alignments = sorted(alignments,
                                           key=lambda m: m.get_tag('AS'),
                                           reverse=True)
                grouped = utilities.group_by(sorted_alignments,
                                             key=lambda m: m.get_tag('AS'))
                _, highest_group = grouped.next()
                primary_already_assigned = False
                for alignment in highest_group:
                    if len(highest_group) == 1:
                        alignment.mapping_quality = 2
                    else:
                        alignment.mapping_quality = 1

                    if not primary_already_assigned:
                        primary_already_assigned = True
                    else:
                        alignment.is_secondary = True

                    alignment_sorter.write(alignment)
            else:
                statistics['unaligned'] += 1

                yield read

        with open(error_fn, 'w') as error_fh:
            for key in ['input', 'aligned', 'unaligned']:
                error_fh.write('{0}: {1:,}\n'.format(key, statistics[key]))

Example #13

0

Show file

    def combine_mappings(self):
        num_unmapped = 0
        num_five_unmapped = 0
        num_three_unmapped = 0
        num_nonunique = 0
        num_discordant = 0
        num_concordant = 0

        five_prime_mappings = pysam.Samfile(self.file_names['five_prime_accepted_hits'])
        five_prime_unmapped = pysam.Samfile(self.file_names['five_prime_unmapped'])
        all_five_prime = sam.merge_by_name(five_prime_mappings, five_prime_unmapped)
        five_prime_grouped = utilities.group_by(all_five_prime, lambda m: m.qname)

        three_prime_mappings = pysam.Samfile(self.file_names['three_prime_accepted_hits'])
        three_prime_unmapped = pysam.Samfile(self.file_names['three_prime_unmapped'])
        all_three_prime = sam.merge_by_name(three_prime_mappings, three_prime_unmapped)
        three_prime_grouped = utilities.group_by(all_three_prime, lambda m: m.qname)

        group_pairs = izip(five_prime_grouped, three_prime_grouped)

        alignment_sorter = sam.AlignmentSorter(five_prime_mappings.references,
                                               five_prime_mappings.lengths,
                                               self.file_names['combined_extended'],
                                              )
        region_fetcher = genomes.build_region_fetcher(self.file_names['genome'],
                                                      load_references=True,
                                                      sam_file=five_prime_mappings,
                                                     )

        with alignment_sorter:
            for (five_qname, five_group), (three_qname, three_group) in group_pairs:
                five_annotation = trim.PayloadAnnotation.from_identifier(five_qname)
                three_annotation = trim.PayloadAnnotation.from_identifier(three_qname)
                if five_annotation['original_name'] != three_annotation['original_name']:
                    # Ensure that the iteration through pairs is in sync.
                    print five_qname, three_qname
                    raise ValueError

                five_unmapped = any(m.is_unmapped for m in five_group)
                three_unmapped = any(m.is_unmapped for m in three_group)
                if five_unmapped:
                    num_five_unmapped += 1
                if three_unmapped:
                    num_three_unmapped += 1
                if five_unmapped or three_unmapped:
                    num_unmapped += 1
                    continue

                five_nonunique = len(five_group) > 1 or any(m.mapq < 40 for m in five_group)
                three_nonunique = len(three_group) > 1 or any(m.mapq < 40 for m in three_group)
                if five_nonunique or three_nonunique:
                    num_nonunique += 1
                    continue
                
                five_m = five_group.pop()
                three_m = three_group.pop()

                five_strand = '-' if five_m.is_reverse else '+'
                three_strand = '-' if three_m.is_reverse else '+'

                tlen = max(five_m.aend, three_m.aend) - min(five_m.pos, three_m.pos)
                discordant = (five_m.tid != three_m.tid) or (five_strand) != (three_strand) or (tlen > 10000) 
                if discordant:
                    num_discordant += 1
                    continue
                
                if five_strand == '+':
                    first_read = five_m
                    second_read = three_m
                elif five_strand == '-':
                    first_read = three_m
                    second_read = five_m
                
                gap = second_read.pos - first_read.aend
                if gap < 0:
                    num_discordant += 1
                    continue
                
                combined_read = pysam.AlignedRead()
                # qname needs to come from three_m to include trimmed As
                combined_read.qname = three_m.qname
                combined_read.tid = five_m.tid
                combined_read.seq = first_read.seq + second_read.seq
                combined_read.qual = first_read.qual + second_read.qual
                combined_read.cigar = first_read.cigar + [(3, gap)] + second_read.cigar
                combined_read.pos = first_read.pos
                combined_read.is_reverse = first_read.is_reverse
                combined_read.mapq = min(first_read.mapq, second_read.mapq)
                combined_read.rnext = -1
                combined_read.pnext = -1
                
                num_concordant += 1

                extended_mapping = trim.extend_polyA_end(combined_read,
                                                         region_fetcher,
                                                        )

                alignment_sorter.write(extended_mapping)

        self.summary.extend(
            [('Unmapped', num_unmapped),
             ('Five prime unmapped', num_five_unmapped),
             ('Three prime unmapped', num_three_unmapped),
             ('Nonunique', num_nonunique),
             ('Discordant', num_discordant),
             ('Concordant', num_concordant),
            ],
        )

Example #14

0

Show file

def group_mapping_pairs(mappings):
    groups = utilities.group_by(mappings, lambda r: r.query_name)
    for query_name, query_mappings in groups:
        R1_group = [m for m in query_mappings if m.is_read1]
        R2_group = [m for m in query_mappings if m.is_read2]
        yield query_name, (R1_group, R2_group)

Example #15

0

Show file

def filter_mappings(
    mappings,
    minimum_mapq=42,
    max_insert_length=1000,
    counts_dict=None,
    verbose=False,
    unmapped_fns=None,
):
    ''' Filters out unmapped, nonuniquely mapped, or discordantly mapped
        reads.
    '''
    pair_counts = {
        'total': 0,
        'unmapped': 0,
        'indel': 0,
        'nonunique': 0,
        'discordant': 0,
        'disoriented': 0,
        'unique': Counter(),
        'mapqs': Counter(),
        'fragment_lengths': Counter(),
        'tids': Counter(),
    }

    if unmapped_fns:
        R1_unmapped_fn, R2_unmapped_fn = unmapped_fns
        R1_unmapped_fh = open(R1_unmapped_fn, 'w')
        R2_unmapped_fh = open(R2_unmapped_fn, 'w')

    for _, aligned_pair in utilities.group_by(mappings, key=lambda m: m.qname):
        if len(aligned_pair) != 2:
            raise ValueError(len(aligned_pair))

        pair_counts['total'] += 1

        R1_aligned, R2_aligned = aligned_pair
        # If R2 is mapped but R1 isn't, R2 gets reported first.
        if not R1_aligned.is_read1:
            R1_aligned, R2_aligned = R2_aligned, R1_aligned

        if (not R1_aligned.is_read1) or (not R2_aligned.is_read2):
            raise ValueError(R1_aligned, R2_aligned)

        pair_counts['mapqs'][R1_aligned.mapq] += 1
        pair_counts['mapqs'][R2_aligned.mapq] += 1

        if R1_aligned.is_unmapped or R2_aligned.is_unmapped:
            pair_counts['unmapped'] += 1

            if verbose:
                logging.info('{0} was unmapped'.format(R1_aligned.qname))

            if unmapped_fns:
                R1_read = sam.mapping_to_Read(R1_aligned)
                R2_read = sam.mapping_to_Read(R2_aligned)
                R1_unmapped_fh.write(str(R1_read))
                R2_unmapped_fh.write(str(R2_read))

        elif is_discordant(R1_aligned, R2_aligned, max_insert_length):
            pair_counts['discordant'] += 1

        else:
            pair_counts['tids'][R1_aligned.tid] += 1

            if is_disoriented(R1_aligned, R2_aligned):
                pair_counts['disoriented'] += 1
            elif R1_aligned.mapq < minimum_mapq or R2_aligned.mapq < minimum_mapq:
                pair_counts['nonunique'] += 1
                if verbose:
                    logging.info('{0} was nonunique, {1}, {2}'.format(
                        R1_aligned.qname, R1_aligned.mapq, R2_aligned.mapq))
            else:
                pair_counts['unique'][R1_aligned.tid] += 1

                fragment_length = abs(R1_aligned.tlen)
                pair_counts['fragment_lengths'][fragment_length] += 1

                if sam.contains_indel_pysam(
                        R1_aligned) or sam.contains_indel_pysam(R2_aligned):
                    pair_counts['indel'] += 1

                yield R1_aligned, R2_aligned

    if counts_dict != None:
        counts_dict.update(pair_counts)

Example #16

0

Show file

File: three_t_fill_experiment.py Project: sameer-aryal/ribosomes

    def combine_mappings(self):
        num_unmapped = 0
        num_R1_unmapped = 0
        num_R2_unmapped = 0
        num_nonunique = 0
        num_discordant = 0
        num_disoriented = 0
        num_concordant = 0

        tlens = Counter()

        R1_mappings = pysam.Samfile(self.file_names['R1_accepted_hits'])
        R1_unmapped = pysam.Samfile(self.file_names['R1_unmapped'])
        all_R1 = sam.merge_by_name(R1_mappings, R1_unmapped)
        R1_grouped = utilities.group_by(all_R1, lambda m: m.qname)

        R2_mappings = pysam.Samfile(self.file_names['R2_accepted_hits'])
        R2_unmapped = pysam.Samfile(self.file_names['R2_unmapped'])
        all_R2 = sam.merge_by_name(R2_mappings, R2_unmapped)
        R2_grouped = utilities.group_by(all_R2, lambda m: m.qname)

        group_pairs = izip(R1_grouped, R2_grouped)

        alignment_sorter = sam.AlignmentSorter(R1_mappings.references,
                                               R1_mappings.lengths,
                                               self.file_names['combined'],
                                              )

        with alignment_sorter:
            for (R1_qname, R1_group), (R2_qname, R2_group) in group_pairs:
                #print R1_qname, R2_qname
                if fastq.get_pair_name(R1_qname) != fastq.get_pair_name(R2_qname):
                    # Ensure that the iteration through pairs is in sync.
                    print R1_qname, R2_qname
                    raise ValueError
                
                R1_unmapped = any(m.is_unmapped for m in R1_group)
                R2_unmapped = any(m.is_unmapped for m in R2_group)
                if R1_unmapped:
                    num_R1_unmapped += 1
                if R2_unmapped:
                    num_R2_unmapped += 1
                if R1_unmapped or R2_unmapped:
                    num_unmapped += 1
                    continue

                R1_nonunique = len(R1_group) > 1 or any(m.mapq < 40 for m in R1_group)
                R2_nonunique = len(R2_group) > 1 or any(m.mapq < 40 for m in R2_group)
                if R1_nonunique or R2_nonunique:
                    num_nonunique += 1
                    continue
                
                R1_m = R1_group.pop()
                R2_m = R2_group.pop()

                R1_strand = sam.get_strand(R1_m)
                R2_strand = sam.get_strand(R2_m)

                tlen = max(R1_m.aend, R2_m.aend) - min(R1_m.pos, R2_m.pos)
                discordant = (R1_m.tid != R2_m.tid) or (R1_strand) == (R2_strand) or (tlen > 10000)
                if discordant:
                    num_discordant += 1
                    continue
                
                # Reminder: the protocol produces anti-sense reads.
                if R1_strand == '-':
                    if R1_m.pos < R2_m.pos:
                        num_disoriented += 1
                        continue

                elif R1_strand == '+':
                    if R2_m.pos < R1_m.pos:
                        num_disoriented += 1
                        continue
                
                combined_read = paired_end.combine_paired_mappings(R1_m, R2_m)
                
                tlens[tlen] += 1

                if combined_read:
                    # Flip combined_read back to the sense strand.
                    if combined_read.is_reverse:
                        combined_read.is_reverse = False
                    else:
                        combined_read.is_reverse = True

                    trim.set_nongenomic_length(combined_read, 0)
                    
                    alignment_sorter.write(combined_read)

                    num_concordant += 1

        self.summary.extend(
            [('Unmapped', num_unmapped),
             ('R1 unmapped', num_R1_unmapped),
             ('R2 unmapped', num_R2_unmapped),
             ('Nonunique', num_nonunique),
             ('Discordant', num_discordant),
             ('Unexpected orientation', num_disoriented),
             ('Concordant', num_concordant),
            ],
        )

        tlens = utilities.counts_to_array(tlens)
        self.write_file('tlens', tlens)