def classify_read_pairs(pe_reads, max_isize,
                        library_type, tid_genome_map,
                        tid_tx_cluster_map):
    """
    examines all the alignments of a single fragment and tries to find ways
    to pair reads together.
    
    annotates all read pairs with an integer tag corresponding to a value
    in the DiscordantTags class
    
    returns a tuple with the following lists:
    1) pairs (r1,r2) aligning to genes (pairs may be discordant)
    2) pairs (r1,r2) aligning to genome (pairs may be discordant)
    3) unpaired reads, if any
    """
    # to satisfy library type reads must either be on 
    # same strand or opposite strands
    concordant_tx_pairs = []
    discordant_tx_pairs = []
    concordant_gene_pairs = []
    discordant_gene_pairs = []
    concordant_genome_pairs = []
    discordant_genome_pairs = []
    # 
    # first, try to pair reads that map to the same transcript, or to the
    # genome within the insert size range
    #
    same_strand = LibraryTypes.same_strand(library_type)
    refdict,clusterdict = map_reads_to_references(pe_reads, tid_tx_cluster_map)
    found_pair = False
    for tid, tid_pe_reads in refdict.iteritems():
        # check if there are alignments involving both reads in a pair
        if len(tid_pe_reads[0]) == 0 or len(tid_pe_reads[1]) == 0:
            # no paired alignments exist at this reference
            continue
        # check if there are alignments involving both reads in a pair        
        for r1 in tid_pe_reads[0]:
            for r2 in tid_pe_reads[1]:
                # read strands must agree with library type
                strand_match = (same_strand == (r1.is_reverse == r2.is_reverse))
                # check to see if this tid is a gene or genomic
                if (tid not in tid_genome_map):
                    # this is a genomic hit so check insert size                                         
                    if r1.pos > r2.pos:
                        isize = r1.aend - r2.pos
                    else:
                        isize = r2.aend - r1.pos
                    if (isize <= max_isize):
                        # these reads can be paired
                        found_pair = True
                        cr1 = copy_read(r1)
                        cr2 = copy_read(r2)
                        # reads are close to each other on same chromosome
                        # so check strand
                        if strand_match:
                            tags = [(DISCORDANT_TAG_NAME, DiscordantTags.CONCORDANT_GENOME)]
                            concordant_genome_pairs.append((cr1,cr2))
                        else:
                            tags = [(DISCORDANT_TAG_NAME, DiscordantTags.DISCORDANT_STRAND_GENOME)]
                            discordant_genome_pairs.append((cr1, cr2))                     
                        pair_reads(cr1,cr2,tags)
                else:
                    # these reads can be paired
                    found_pair = True
                    cr1 = copy_read(r1)
                    cr2 = copy_read(r2)                    
                    # this is a hit to same transcript (gene)
                    # pair the reads if strand comparison is correct
                    if strand_match:
                        tags = [(DISCORDANT_TAG_NAME, DiscordantTags.CONCORDANT_TX)]
                        concordant_tx_pairs.append((cr1,cr2))
                    else:
                        # hit to same gene with wrong strand, which
                        # could happen in certain wacky cases
                        tags = [(DISCORDANT_TAG_NAME, DiscordantTags.DISCORDANT_STRAND_TX)]
                        discordant_tx_pairs.append((cr1,cr2))
                    pair_reads(cr1,cr2,tags)
    # at this point, if we have not been able to find a suitable way
    # to pair the reads, then search within the transcript cluster
    if not found_pair:
        for cluster_id, cluster_pe_reads in clusterdict.iteritems():
            # check if there are alignments involving both reads in a pair
            if len(cluster_pe_reads[0]) == 0 or len(cluster_pe_reads[1]) == 0:
                # no paired alignments in this transcript cluster            
                continue
            for r1 in cluster_pe_reads[0]:
                for r2 in cluster_pe_reads[1]:
                    # check strand compatibility
                    strand_match = (same_strand == (r1.is_reverse == r2.is_reverse))
                    # these reads can be paired
                    found_pair = True
                    cr1 = copy_read(r1)
                    cr2 = copy_read(r2)                    
                    if strand_match:
                        tags = [(DISCORDANT_TAG_NAME, DiscordantTags.CONCORDANT_GENE)]
                        concordant_gene_pairs.append((cr1,cr2))
                    else:
                        tags = [(DISCORDANT_TAG_NAME, DiscordantTags.DISCORDANT_STRAND_GENE)]
                        discordant_gene_pairs.append((cr1,cr2))
                    pair_reads(cr1,cr2,tags)
    # at this point, we have tried all combinations.  if any paired reads
    # are concordant then return them without considering discordant reads 
    gene_pairs = []
    if len(concordant_tx_pairs) > 0:
        gene_pairs = concordant_tx_pairs
    elif len(concordant_gene_pairs) > 0:
        gene_pairs = concordant_gene_pairs
    if len(gene_pairs) > 0 or len(concordant_genome_pairs) > 0:
        return gene_pairs, concordant_genome_pairs, []
    # if no concordant reads in transcripts or genome, return any
    # discordant reads that may violate strand requirements but still
    # remain colocalized on the same gene/chromosome
    gene_pairs = []
    if len(discordant_tx_pairs) > 0:
        gene_pairs = discordant_tx_pairs
    elif len(discordant_gene_pairs) > 0:
        gene_pairs = discordant_gene_pairs    
    if len(gene_pairs) > 0 or len(discordant_genome_pairs) > 0:
        return gene_pairs, discordant_genome_pairs, []
    #
    # at this point, no read pairings were found so the read is 
    # assumed to be discordant.  
    #
    # TODO: now that we know that the reads are discordant, no reason
    # to keep all the mappings hanging around if there is a small subset
    # with a small number of mismatches.  is this the right thing to do
    # here?
    # 
    pe_reads = (select_best_mismatch_strata(pe_reads[0]),
                select_best_mismatch_strata(pe_reads[1]))
    #
    # now we can create all valid combinations of read1/read2 as putative 
    # discordant read pairs 
    #    
    gene_pairs, genome_pairs, combo_pairs = \
        find_discordant_pairs(pe_reads, tid_genome_map, library_type)
    if len(gene_pairs) > 0 or len(genome_pairs) > 0:
        return gene_pairs, genome_pairs, []
    elif len(combo_pairs) > 0:
        return combo_pairs, [], []
    # last resort suggests that there are some complex read mappings that
    # don't make sense and cannot be explained, warranting further 
    # investigation
    return [], [], pe_reads
Example #2
0
def classify_read_pairs(pe_reads, max_isize, library_type, tid_tx_map):
    """
    examines all the alignments of a single fragment and tries to find ways
    to pair reads together.
    
    annotates all read pairs with an integer tag corresponding to a value
    in the DiscordantTags class
    
    returns a tuple containing 3 lists:
    1) concordant (r1,r2) pairs
    2) discordant (r1,r2) pairs
    3) unpaired reads
    """
    # to satisfy library type reads must either be on
    # same strand or opposite strands
    concordant_tx_pairs = []
    discordant_tx_pairs = []
    concordant_cluster_pairs = []
    discordant_cluster_pairs = []
    #
    # first, try to pair reads that map to the same transcript or
    # cluster or overlapping transcripts
    #
    same_strand = LibraryTypes.same_strand(library_type)
    refdict, clusterdict = map_reads_to_references(pe_reads, tid_tx_map)
    found_pair = False
    for tid, tid_pe_reads in refdict.iteritems():
        # check if there are alignments involving both reads in a pair
        if len(tid_pe_reads[0]) == 0 or len(tid_pe_reads[1]) == 0:
            # no paired alignments exist at this reference
            continue
        for r1 in tid_pe_reads[0]:
            for r2 in tid_pe_reads[1]:
                # read strands must agree with library type
                strand_match = (same_strand == (
                    r1.is_reverse == r2.is_reverse))
                # these reads can be paired
                found_pair = True
                cr1 = copy_read(r1)
                cr2 = copy_read(r2)
                # this is a hit to same transcript (gene)
                # pair the reads if strand comparison is correct
                if strand_match:
                    tags = [(DISCORDANT_TAG_NAME, DiscordantTags.CONCORDANT_TX)
                            ]
                    concordant_tx_pairs.append((cr1, cr2))
                else:
                    # hit to same gene with wrong strand, which
                    # could happen in certain wacky cases
                    tags = [(DISCORDANT_TAG_NAME,
                             DiscordantTags.DISCORDANT_STRAND_TX)]
                    discordant_tx_pairs.append((cr1, cr2))
                pair_reads(cr1, cr2, tags)
    # at this point, if we have not been able to find a suitable way
    # to pair the reads, then search within the transcript cluster
    if not found_pair:
        for cluster_id, cluster_pe_reads in clusterdict.iteritems():
            # check if there are alignments involving both reads in a pair
            if len(cluster_pe_reads[0]) == 0 or len(cluster_pe_reads[1]) == 0:
                # no paired alignments in this transcript cluster
                continue
            for r1 in cluster_pe_reads[0]:
                for r2 in cluster_pe_reads[1]:
                    # check strand compatibility
                    strand_match = (same_strand == (
                        r1.is_reverse == r2.is_reverse))
                    # these reads can be paired
                    found_pair = True
                    cr1 = copy_read(r1)
                    cr2 = copy_read(r2)
                    if strand_match:
                        tags = [(DISCORDANT_TAG_NAME,
                                 DiscordantTags.CONCORDANT_GENE)]
                        concordant_cluster_pairs.append((cr1, cr2))
                    else:
                        tags = [(DISCORDANT_TAG_NAME,
                                 DiscordantTags.DISCORDANT_STRAND_GENE)]
                        discordant_cluster_pairs.append((cr1, cr2))
                    pair_reads(cr1, cr2, tags)
    # at this point, we have tried all combinations.  if any paired reads
    # are concordant then return them without considering discordant reads
    gene_pairs = []
    if len(concordant_tx_pairs) > 0:
        gene_pairs = concordant_tx_pairs
    elif len(concordant_cluster_pairs) > 0:
        gene_pairs = concordant_cluster_pairs
    if len(gene_pairs) > 0:
        return gene_pairs, [], []
    # if no concordant reads in transcripts, return any discordant reads
    # that may violate strand requirements but still remain colocalized
    # on the same gene/chromosome
    gene_pairs = []
    if len(discordant_tx_pairs) > 0:
        gene_pairs = discordant_tx_pairs
    elif len(discordant_cluster_pairs) > 0:
        gene_pairs = discordant_cluster_pairs
    if len(gene_pairs) > 0:
        return gene_pairs, [], []
    #
    # at this point, no read pairings were found so the read is
    # assumed to be discordant. now we can create all valid
    # combinations of read1/read2 as putative discordant read pairs
    #
    pairs = find_discordant_pairs(pe_reads, library_type)
    if len(pairs) > 0:
        # sort valid pairs by sum of alignment score and retain the best
        # scoring pairs
        pairs = select_best_scoring_pairs(pairs)
        return [], pairs, []
    #
    # no valid pairs could be found suggesting that these alignments are
    # either artifacts or that the current transcript annotations do not
    # support this pair
    #
    return [], [], pe_reads
Example #3
0
 def get_argument_parser():
     parser = argparse.ArgumentParser(usage="%(prog)s [options] <index> "
                                      "<mate1.fq> <mate2.fq> <output_dir>")
     # required options
     parser.add_argument("index_dir", default=None,
                         help="Location of chimerascan index directory")
     parser.add_argument("read1", default=None,
                         help="Path to read1 FASTQ file")
     parser.add_argument("read2", default=None,
                         help="Path to read2 FASTQ file")
     parser.add_argument("output_dir", default=None,
                         help="Location of output files")
     # standard options
     parser.add_argument('--version', action='version', 
                         version='%s' % __version__)
     parser.add_argument("--config-file", dest="config_file", 
                         help="Load parameters from a XML file "
                         "generated during a previous run ",
                         default=None)
     parser.add_argument("-v", "--verbose", dest="verbose",
                         action="store_true", default=False,
                         help="enable verbose logging output "
                         "[default=%(default)s]")
     parser.add_argument("-p", "--processors", dest="num_processors", 
                         type=int, default=DEFAULT_NUM_PROCESSORS,
                         help="Number of processor cores to allocate to "
                         "chimerascan [default=%(default)s]")
     parser.add_argument("--keep-tmp", dest="keep_tmp", 
                         action="store_true",
                         default=DEFAULT_KEEP_TMP,
                         help="DO NOT delete intermediate files after "
                         "run [default=%(default)s]")
     parser.add_argument("--rm-tmp", dest="keep_tmp", 
                         action="store_false", 
                         help="Delete intermediate files after run "
                         "[default=%s]" % str(not DEFAULT_KEEP_TMP))
     parser.add_argument("--quals", dest="quals",
                         choices=FASTQ_QUAL_FORMATS, 
                         default=DEFAULT_FASTQ_QUAL_FORMAT, metavar="FMT",
                         help="FASTQ quality score format "
                         "[default=%(default)s]")
     parser.add_argument('--library-type', dest="library_type", 
                         choices=LibraryTypes.choices(),
                         default=DEFAULT_LIBRARY_TYPE,
                         help="Library type [default=%(default)s]")
     parser.add_argument("--isize-mean", dest="isize_mean", type=int,
                         default=DEFAULT_ISIZE_MEAN, metavar="N",
                         help="Mean insert size to sample from when "
                         "insert size distribution cannot be determined "
                         "empirically [default=%(default)s]")
     parser.add_argument("--isize-stdev", dest="isize_stdev", type=float,
                         default=DEFAULT_ISIZE_STDEV, metavar="N",
                         help="Insert size standard deviation to sample "
                         "from when insert size distribution cannot be "
                         "determined empirically [default=%(default)s]")
     parser.add_argument("--trim5", type=int, dest="trim5", 
                         default=DEFAULT_TRIM5, metavar="N",
                         help="Trim N bases from 5' end of read")
     parser.add_argument("--trim3", type=int, dest="trim3", 
                         default=DEFAULT_TRIM3, metavar="N",
                         help="Trim N bases from 3' end of read")
     parser.add_argument("--min-fragment-length", type=int, 
                         dest="min_fragment_length", 
                         default=config.DEFAULT_MIN_FRAG_LENGTH,
                         help="Smallest expected fragment length "
                         "[default=%(default)s]")
     parser.add_argument("--max-fragment-length", type=int, 
                         dest="max_fragment_length", 
                         default=config.DEFAULT_MAX_FRAG_LENGTH,
                         help="Largest expected fragment length (reads "
                         "less than this fragment length are assumed to "
                         "be unspliced and contiguous) "
                         "[default=%(default)s]")
     parser.add_argument("--segment-length", type=int, 
                         dest="segment_length", 
                         default=DEFAULT_SEGMENT_LENGTH,
                         metavar="N",
                         help="Override size of soft-clipped read "
                         "segments during discordant alignment phase "
                         "(determined empirically by default)")
     parser.add_argument("--multihits", type=int, 
                         dest="max_multihits", 
                         default=config.DEFAULT_MAX_MULTIHITS,
                         metavar="N",
                         help="Maximum alignments allowed for each "
                         "discordant read")
     parser.add_argument("--local-multihits", type=int, 
                         dest="local_multihits", 
                         default=config.DEFAULT_LOCAL_MULTIHITS,
                         metavar="N",
                         help="Maximum alignments allowed for each "
                         "discordant read")
     parser.add_argument("--local-anchor-length", type=int, 
                         dest="local_anchor_length", 
                         default=config.DEFAULT_LOCAL_ANCHOR_LENGTH,
                         metavar="N",
                         help="Number of bases that read must span "
                         "on each side of a chimera to be considered "
                         "a valid breakpoint read")
     # filtering options
     group = parser.add_argument_group('Filtering options')
     group.add_argument("--filter-num-frags", type=float,
                        default=config.DEFAULT_FILTER_FRAGS,
                        dest="filter_num_frags", metavar="N",
                        help="Filter chimeras with less than N "
                        "aligned fragments [default=%(default)s]")
     group.add_argument("--filter-allele-fraction", type=float, 
                        default=config.DEFAULT_FILTER_ALLELE_FRACTION, 
                        dest="filter_allele_fraction", metavar="X",
                        help="Filter chimeras with expression less than "
                        "the specified fraction of the total expression "
                        "level [default=%(default)s")            
     group.add_argument("--mask-biotypes-file", default="",
                        dest="mask_biotypes_file",
                        help="File containing list of gene biotypes "
                        "to ignore (ex. pseudogenes, rRNA)")
     group.add_argument("--mask-rnames-file", default="",
                        dest="mask_rnames_file",
                        help="File containing list of reference names "
                        "to ignore (ex. MT or chrM)")
     # filtering options
     return parser
 def get_argument_parser():
     parser = argparse.ArgumentParser(usage="%(prog)s [options] <index> "
                                      "<mate1.fq> <mate2.fq> <output_dir>")
     # required options
     parser.add_argument("index_dir",
                         default=None,
                         help="Location of chimerascan index directory")
     parser.add_argument("read1",
                         default=None,
                         help="Path to read1 FASTQ file")
     parser.add_argument("read2",
                         default=None,
                         help="Path to read2 FASTQ file")
     parser.add_argument("output_dir",
                         default=None,
                         help="Location of output files")
     # standard options
     parser.add_argument('--version',
                         action='version',
                         version='%s' % __version__)
     parser.add_argument("--config-file",
                         dest="config_file",
                         help="Load parameters from a XML file "
                         "generated during a previous run ",
                         default=None)
     parser.add_argument("-v",
                         "--verbose",
                         dest="verbose",
                         action="store_true",
                         default=False,
                         help="enable verbose logging output "
                         "[default=%(default)s]")
     parser.add_argument("-p",
                         "--processors",
                         dest="num_processors",
                         type=int,
                         default=DEFAULT_NUM_PROCESSORS,
                         help="Number of processor cores to allocate to "
                         "chimerascan [default=%(default)s]")
     parser.add_argument("--keep-tmp",
                         dest="keep_tmp",
                         action="store_true",
                         default=DEFAULT_KEEP_TMP,
                         help="DO NOT delete intermediate files after "
                         "run [default=%(default)s]")
     parser.add_argument("--rm-tmp",
                         dest="keep_tmp",
                         action="store_false",
                         help="Delete intermediate files after run "
                         "[default=%s]" % str(not DEFAULT_KEEP_TMP))
     parser.add_argument("--quals",
                         dest="quals",
                         choices=FASTQ_QUAL_FORMATS,
                         default=DEFAULT_FASTQ_QUAL_FORMAT,
                         metavar="FMT",
                         help="FASTQ quality score format "
                         "[default=%(default)s]")
     parser.add_argument('--library-type',
                         dest="library_type",
                         choices=LibraryTypes.choices(),
                         default=DEFAULT_LIBRARY_TYPE,
                         help="Library type [default=%(default)s]")
     parser.add_argument("--isize-mean",
                         dest="isize_mean",
                         type=int,
                         default=DEFAULT_ISIZE_MEAN,
                         metavar="N",
                         help="Mean insert size to sample from when "
                         "insert size distribution cannot be determined "
                         "empirically [default=%(default)s]")
     parser.add_argument("--isize-stdev",
                         dest="isize_stdev",
                         type=float,
                         default=DEFAULT_ISIZE_STDEV,
                         metavar="N",
                         help="Insert size standard deviation to sample "
                         "from when insert size distribution cannot be "
                         "determined empirically [default=%(default)s]")
     parser.add_argument("--trim5",
                         type=int,
                         dest="trim5",
                         default=DEFAULT_TRIM5,
                         metavar="N",
                         help="Trim N bases from 5' end of read")
     parser.add_argument("--trim3",
                         type=int,
                         dest="trim3",
                         default=DEFAULT_TRIM3,
                         metavar="N",
                         help="Trim N bases from 3' end of read")
     parser.add_argument("--min-fragment-length",
                         type=int,
                         dest="min_fragment_length",
                         default=config.DEFAULT_MIN_FRAG_LENGTH,
                         help="Smallest expected fragment length "
                         "[default=%(default)s]")
     parser.add_argument("--max-fragment-length",
                         type=int,
                         dest="max_fragment_length",
                         default=config.DEFAULT_MAX_FRAG_LENGTH,
                         help="Largest expected fragment length (reads "
                         "less than this fragment length are assumed to "
                         "be unspliced and contiguous) "
                         "[default=%(default)s]")
     parser.add_argument("--segment-length",
                         type=int,
                         dest="segment_length",
                         default=DEFAULT_SEGMENT_LENGTH,
                         metavar="N",
                         help="Override size of soft-clipped read "
                         "segments during discordant alignment phase "
                         "(determined empirically by default)")
     parser.add_argument("--multihits",
                         type=int,
                         dest="max_multihits",
                         default=config.DEFAULT_MAX_MULTIHITS,
                         metavar="N",
                         help="Maximum alignments allowed for each "
                         "discordant read")
     parser.add_argument("--local-multihits",
                         type=int,
                         dest="local_multihits",
                         default=config.DEFAULT_LOCAL_MULTIHITS,
                         metavar="N",
                         help="Maximum alignments allowed for each "
                         "discordant read")
     parser.add_argument("--local-anchor-length",
                         type=int,
                         dest="local_anchor_length",
                         default=config.DEFAULT_LOCAL_ANCHOR_LENGTH,
                         metavar="N",
                         help="Number of bases that read must span "
                         "on each side of a chimera to be considered "
                         "a valid breakpoint read")
     # filtering options
     group = parser.add_argument_group('Filtering options')
     group.add_argument("--filter-num-frags",
                        type=float,
                        default=config.DEFAULT_FILTER_FRAGS,
                        dest="filter_num_frags",
                        metavar="N",
                        help="Filter chimeras with less than N "
                        "aligned fragments [default=%(default)s]")
     group.add_argument("--filter-allele-fraction",
                        type=float,
                        default=config.DEFAULT_FILTER_ALLELE_FRACTION,
                        dest="filter_allele_fraction",
                        metavar="X",
                        help="Filter chimeras with expression less than "
                        "the specified fraction of the total expression "
                        "level [default=%(default)s")
     group.add_argument("--mask-biotypes-file",
                        default="",
                        dest="mask_biotypes_file",
                        help="File containing list of gene biotypes "
                        "to ignore (ex. pseudogenes, rRNA)")
     group.add_argument("--mask-rnames-file",
                        default="",
                        dest="mask_rnames_file",
                        help="File containing list of reference names "
                        "to ignore (ex. MT or chrM)")
     # filtering options
     return parser
def classify_read_pairs(pe_reads, max_isize,
                        library_type, 
                        tid_tx_map):
    """
    examines all the alignments of a single fragment and tries to find ways
    to pair reads together.
    
    annotates all read pairs with an integer tag corresponding to a value
    in the DiscordantTags class
    
    returns a tuple containing 3 lists:
    1) concordant (r1,r2) pairs
    2) discordant (r1,r2) pairs
    3) unpaired reads
    """
    # to satisfy library type reads must either be on 
    # same strand or opposite strands
    concordant_tx_pairs = []
    discordant_tx_pairs = []
    concordant_cluster_pairs = []
    discordant_cluster_pairs = []
    # 
    # first, try to pair reads that map to the same transcript or 
    # cluster or overlapping transcripts
    #
    same_strand = LibraryTypes.same_strand(library_type)
    refdict, clusterdict = map_reads_to_references(pe_reads, tid_tx_map)
    found_pair = False
    for tid, tid_pe_reads in refdict.iteritems():
        # check if there are alignments involving both reads in a pair
        if len(tid_pe_reads[0]) == 0 or len(tid_pe_reads[1]) == 0:
            # no paired alignments exist at this reference
            continue
        for r1 in tid_pe_reads[0]:
            for r2 in tid_pe_reads[1]:
                # read strands must agree with library type
                strand_match = (same_strand == (r1.is_reverse == r2.is_reverse))
                # these reads can be paired
                found_pair = True
                cr1 = copy_read(r1)
                cr2 = copy_read(r2)                    
                # this is a hit to same transcript (gene)
                # pair the reads if strand comparison is correct
                if strand_match:
                    tags = [(DISCORDANT_TAG_NAME, DiscordantTags.CONCORDANT_TX)]
                    concordant_tx_pairs.append((cr1,cr2))
                else:
                    # hit to same gene with wrong strand, which
                    # could happen in certain wacky cases
                    tags = [(DISCORDANT_TAG_NAME, DiscordantTags.DISCORDANT_STRAND_TX)]
                    discordant_tx_pairs.append((cr1,cr2))
                pair_reads(cr1,cr2,tags)
    # at this point, if we have not been able to find a suitable way
    # to pair the reads, then search within the transcript cluster
    if not found_pair:
        for cluster_id, cluster_pe_reads in clusterdict.iteritems():
            # check if there are alignments involving both reads in a pair
            if len(cluster_pe_reads[0]) == 0 or len(cluster_pe_reads[1]) == 0:
                # no paired alignments in this transcript cluster            
                continue
            for r1 in cluster_pe_reads[0]:
                for r2 in cluster_pe_reads[1]:
                    # check strand compatibility
                    strand_match = (same_strand == (r1.is_reverse == r2.is_reverse))
                    # these reads can be paired
                    found_pair = True
                    cr1 = copy_read(r1)
                    cr2 = copy_read(r2)                    
                    if strand_match:
                        tags = [(DISCORDANT_TAG_NAME, DiscordantTags.CONCORDANT_GENE)]
                        concordant_cluster_pairs.append((cr1,cr2))
                    else:
                        tags = [(DISCORDANT_TAG_NAME, DiscordantTags.DISCORDANT_STRAND_GENE)]
                        discordant_cluster_pairs.append((cr1,cr2))
                    pair_reads(cr1,cr2,tags)
    # at this point, we have tried all combinations.  if any paired reads
    # are concordant then return them without considering discordant reads 
    gene_pairs = []
    if len(concordant_tx_pairs) > 0:
        gene_pairs = concordant_tx_pairs
    elif len(concordant_cluster_pairs) > 0:
        gene_pairs = concordant_cluster_pairs
    if len(gene_pairs) > 0:
        return gene_pairs, [], []
    # if no concordant reads in transcripts, return any discordant reads 
    # that may violate strand requirements but still remain colocalized 
    # on the same gene/chromosome
    gene_pairs = []
    if len(discordant_tx_pairs) > 0:
        gene_pairs = discordant_tx_pairs
    elif len(discordant_cluster_pairs) > 0:
        gene_pairs = discordant_cluster_pairs    
    if len(gene_pairs) > 0:
        return gene_pairs, [], []
    #
    # at this point, no read pairings were found so the read is 
    # assumed to be discordant. now we can create all valid 
    # combinations of read1/read2 as putative discordant read pairs 
    #    
    pairs = find_discordant_pairs(pe_reads, library_type)
    if len(pairs) > 0:        
        # sort valid pairs by sum of alignment score and retain the best 
        # scoring pairs
        pairs = select_best_scoring_pairs(pairs)
        return [], pairs, []
    # 
    # no valid pairs could be found suggesting that these alignments are
    # either artifacts or that the current transcript annotations do not
    # support this pair
    # 
    return [], [], pe_reads