Python LibraryTypes Examples

Programming Language: Python

Namespace/Package Name: chimerascan.lib.base

Class/Type: LibraryTypes

Examples at hotexamples.com: 5

Python LibraryTypes - 5 examples found. These are the top rated real world Python examples of chimerascan.lib.base.LibraryTypes extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

same_strand(2)

choices(1)

Example #1

Show file

File: find_discordant_reads_v0.4.4.py Project: marcopavoni/chimerascan

def classify_read_pairs(pe_reads, max_isize,
                        library_type, tid_genome_map,
                        tid_tx_cluster_map):
    """
    examines all the alignments of a single fragment and tries to find ways
    to pair reads together.
    
    annotates all read pairs with an integer tag corresponding to a value
    in the DiscordantTags class
    
    returns a tuple with the following lists:
    1) pairs (r1,r2) aligning to genes (pairs may be discordant)
    2) pairs (r1,r2) aligning to genome (pairs may be discordant)
    3) unpaired reads, if any
    """
    # to satisfy library type reads must either be on 
    # same strand or opposite strands
    concordant_tx_pairs = []
    discordant_tx_pairs = []
    concordant_gene_pairs = []
    discordant_gene_pairs = []
    concordant_genome_pairs = []
    discordant_genome_pairs = []
    # 
    # first, try to pair reads that map to the same transcript, or to the
    # genome within the insert size range
    #
    same_strand = LibraryTypes.same_strand(library_type)
    refdict,clusterdict = map_reads_to_references(pe_reads, tid_tx_cluster_map)
    found_pair = False
    for tid, tid_pe_reads in refdict.iteritems():
        # check if there are alignments involving both reads in a pair
        if len(tid_pe_reads[0]) == 0 or len(tid_pe_reads[1]) == 0:
            # no paired alignments exist at this reference
            continue
        # check if there are alignments involving both reads in a pair        
        for r1 in tid_pe_reads[0]:
            for r2 in tid_pe_reads[1]:
                # read strands must agree with library type
                strand_match = (same_strand == (r1.is_reverse == r2.is_reverse))
                # check to see if this tid is a gene or genomic
                if (tid not in tid_genome_map):
                    # this is a genomic hit so check insert size                                         
                    if r1.pos > r2.pos:
                        isize = r1.aend - r2.pos
                    else:
                        isize = r2.aend - r1.pos
                    if (isize <= max_isize):
                        # these reads can be paired
                        found_pair = True
                        cr1 = copy_read(r1)
                        cr2 = copy_read(r2)
                        # reads are close to each other on same chromosome
                        # so check strand
                        if strand_match:
                            tags = [(DISCORDANT_TAG_NAME, DiscordantTags.CONCORDANT_GENOME)]
                            concordant_genome_pairs.append((cr1,cr2))
                        else:
                            tags = [(DISCORDANT_TAG_NAME, DiscordantTags.DISCORDANT_STRAND_GENOME)]
                            discordant_genome_pairs.append((cr1, cr2))                     
                        pair_reads(cr1,cr2,tags)
                else:
                    # these reads can be paired
                    found_pair = True
                    cr1 = copy_read(r1)
                    cr2 = copy_read(r2)                    
                    # this is a hit to same transcript (gene)
                    # pair the reads if strand comparison is correct
                    if strand_match:
                        tags = [(DISCORDANT_TAG_NAME, DiscordantTags.CONCORDANT_TX)]
                        concordant_tx_pairs.append((cr1,cr2))
                    else:
                        # hit to same gene with wrong strand, which
                        # could happen in certain wacky cases
                        tags = [(DISCORDANT_TAG_NAME, DiscordantTags.DISCORDANT_STRAND_TX)]
                        discordant_tx_pairs.append((cr1,cr2))
                    pair_reads(cr1,cr2,tags)
    # at this point, if we have not been able to find a suitable way
    # to pair the reads, then search within the transcript cluster
    if not found_pair:
        for cluster_id, cluster_pe_reads in clusterdict.iteritems():
            # check if there are alignments involving both reads in a pair
            if len(cluster_pe_reads[0]) == 0 or len(cluster_pe_reads[1]) == 0:
                # no paired alignments in this transcript cluster            
                continue
            for r1 in cluster_pe_reads[0]:
                for r2 in cluster_pe_reads[1]:
                    # check strand compatibility
                    strand_match = (same_strand == (r1.is_reverse == r2.is_reverse))
                    # these reads can be paired
                    found_pair = True
                    cr1 = copy_read(r1)
                    cr2 = copy_read(r2)                    
                    if strand_match:
                        tags = [(DISCORDANT_TAG_NAME, DiscordantTags.CONCORDANT_GENE)]
                        concordant_gene_pairs.append((cr1,cr2))
                    else:
                        tags = [(DISCORDANT_TAG_NAME, DiscordantTags.DISCORDANT_STRAND_GENE)]
                        discordant_gene_pairs.append((cr1,cr2))
                    pair_reads(cr1,cr2,tags)
    # at this point, we have tried all combinations.  if any paired reads
    # are concordant then return them without considering discordant reads 
    gene_pairs = []
    if len(concordant_tx_pairs) > 0:
        gene_pairs = concordant_tx_pairs
    elif len(concordant_gene_pairs) > 0:
        gene_pairs = concordant_gene_pairs
    if len(gene_pairs) > 0 or len(concordant_genome_pairs) > 0:
        return gene_pairs, concordant_genome_pairs, []
    # if no concordant reads in transcripts or genome, return any
    # discordant reads that may violate strand requirements but still
    # remain colocalized on the same gene/chromosome
    gene_pairs = []
    if len(discordant_tx_pairs) > 0:
        gene_pairs = discordant_tx_pairs
    elif len(discordant_gene_pairs) > 0:
        gene_pairs = discordant_gene_pairs    
    if len(gene_pairs) > 0 or len(discordant_genome_pairs) > 0:
        return gene_pairs, discordant_genome_pairs, []
    #
    # at this point, no read pairings were found so the read is 
    # assumed to be discordant.  
    #
    # TODO: now that we know that the reads are discordant, no reason
    # to keep all the mappings hanging around if there is a small subset
    # with a small number of mismatches.  is this the right thing to do
    # here?
    # 
    pe_reads = (select_best_mismatch_strata(pe_reads[0]),
                select_best_mismatch_strata(pe_reads[1]))
    #
    # now we can create all valid combinations of read1/read2 as putative 
    # discordant read pairs 
    #    
    gene_pairs, genome_pairs, combo_pairs = \
        find_discordant_pairs(pe_reads, tid_genome_map, library_type)
    if len(gene_pairs) > 0 or len(genome_pairs) > 0:
        return gene_pairs, genome_pairs, []
    elif len(combo_pairs) > 0:
        return combo_pairs, [], []
    # last resort suggests that there are some complex read mappings that
    # don't make sense and cannot be explained, warranting further 
    # investigation
    return [], [], pe_reads

Example #2

Show file

def classify_read_pairs(pe_reads, max_isize, library_type, tid_tx_map):
    """
    examines all the alignments of a single fragment and tries to find ways
    to pair reads together.
    
    annotates all read pairs with an integer tag corresponding to a value
    in the DiscordantTags class
    
    returns a tuple containing 3 lists:
    1) concordant (r1,r2) pairs
    2) discordant (r1,r2) pairs
    3) unpaired reads
    """
    # to satisfy library type reads must either be on
    # same strand or opposite strands
    concordant_tx_pairs = []
    discordant_tx_pairs = []
    concordant_cluster_pairs = []
    discordant_cluster_pairs = []
    #
    # first, try to pair reads that map to the same transcript or
    # cluster or overlapping transcripts
    #
    same_strand = LibraryTypes.same_strand(library_type)
    refdict, clusterdict = map_reads_to_references(pe_reads, tid_tx_map)
    found_pair = False
    for tid, tid_pe_reads in refdict.iteritems():
        # check if there are alignments involving both reads in a pair
        if len(tid_pe_reads[0]) == 0 or len(tid_pe_reads[1]) == 0:
            # no paired alignments exist at this reference
            continue
        for r1 in tid_pe_reads[0]:
            for r2 in tid_pe_reads[1]:
                # read strands must agree with library type
                strand_match = (same_strand == (
                    r1.is_reverse == r2.is_reverse))
                # these reads can be paired
                found_pair = True
                cr1 = copy_read(r1)
                cr2 = copy_read(r2)
                # this is a hit to same transcript (gene)
                # pair the reads if strand comparison is correct
                if strand_match:
                    tags = [(DISCORDANT_TAG_NAME, DiscordantTags.CONCORDANT_TX)
                            ]
                    concordant_tx_pairs.append((cr1, cr2))
                else:
                    # hit to same gene with wrong strand, which
                    # could happen in certain wacky cases
                    tags = [(DISCORDANT_TAG_NAME,
                             DiscordantTags.DISCORDANT_STRAND_TX)]
                    discordant_tx_pairs.append((cr1, cr2))
                pair_reads(cr1, cr2, tags)
    # at this point, if we have not been able to find a suitable way
    # to pair the reads, then search within the transcript cluster
    if not found_pair:
        for cluster_id, cluster_pe_reads in clusterdict.iteritems():
            # check if there are alignments involving both reads in a pair
            if len(cluster_pe_reads[0]) == 0 or len(cluster_pe_reads[1]) == 0:
                # no paired alignments in this transcript cluster
                continue
            for r1 in cluster_pe_reads[0]:
                for r2 in cluster_pe_reads[1]:
                    # check strand compatibility
                    strand_match = (same_strand == (
                        r1.is_reverse == r2.is_reverse))
                    # these reads can be paired
                    found_pair = True
                    cr1 = copy_read(r1)
                    cr2 = copy_read(r2)
                    if strand_match:
                        tags = [(DISCORDANT_TAG_NAME,
                                 DiscordantTags.CONCORDANT_GENE)]
                        concordant_cluster_pairs.append((cr1, cr2))
                    else:
                        tags = [(DISCORDANT_TAG_NAME,
                                 DiscordantTags.DISCORDANT_STRAND_GENE)]
                        discordant_cluster_pairs.append((cr1, cr2))
                    pair_reads(cr1, cr2, tags)
    # at this point, we have tried all combinations.  if any paired reads
    # are concordant then return them without considering discordant reads
    gene_pairs = []
    if len(concordant_tx_pairs) > 0:
        gene_pairs = concordant_tx_pairs
    elif len(concordant_cluster_pairs) > 0:
        gene_pairs = concordant_cluster_pairs
    if len(gene_pairs) > 0:
        return gene_pairs, [], []
    # if no concordant reads in transcripts, return any discordant reads
    # that may violate strand requirements but still remain colocalized
    # on the same gene/chromosome
    gene_pairs = []
    if len(discordant_tx_pairs) > 0:
        gene_pairs = discordant_tx_pairs
    elif len(discordant_cluster_pairs) > 0:
        gene_pairs = discordant_cluster_pairs
    if len(gene_pairs) > 0:
        return gene_pairs, [], []
    #
    # at this point, no read pairings were found so the read is
    # assumed to be discordant. now we can create all valid
    # combinations of read1/read2 as putative discordant read pairs
    #
    pairs = find_discordant_pairs(pe_reads, library_type)
    if len(pairs) > 0:
        # sort valid pairs by sum of alignment score and retain the best
        # scoring pairs
        pairs = select_best_scoring_pairs(pairs)
        return [], pairs, []
    #
    # no valid pairs could be found suggesting that these alignments are
    # either artifacts or that the current transcript annotations do not
    # support this pair
    #
    return [], [], pe_reads

Example #3

Show file

File: chimerascan_run.py Project: BioXiao/chimerascan

 def get_argument_parser():
     parser = argparse.ArgumentParser(usage="%(prog)s [options] <index> "
                                      "<mate1.fq> <mate2.fq> <output_dir>")
     # required options
     parser.add_argument("index_dir", default=None,
                         help="Location of chimerascan index directory")
     parser.add_argument("read1", default=None,
                         help="Path to read1 FASTQ file")
     parser.add_argument("read2", default=None,
                         help="Path to read2 FASTQ file")
     parser.add_argument("output_dir", default=None,
                         help="Location of output files")
     # standard options
     parser.add_argument('--version', action='version', 
                         version='%s' % __version__)
     parser.add_argument("--config-file", dest="config_file", 
                         help="Load parameters from a XML file "
                         "generated during a previous run ",
                         default=None)
     parser.add_argument("-v", "--verbose", dest="verbose",
                         action="store_true", default=False,
                         help="enable verbose logging output "
                         "[default=%(default)s]")
     parser.add_argument("-p", "--processors", dest="num_processors", 
                         type=int, default=DEFAULT_NUM_PROCESSORS,
                         help="Number of processor cores to allocate to "
                         "chimerascan [default=%(default)s]")
     parser.add_argument("--keep-tmp", dest="keep_tmp", 
                         action="store_true",
                         default=DEFAULT_KEEP_TMP,
                         help="DO NOT delete intermediate files after "
                         "run [default=%(default)s]")
     parser.add_argument("--rm-tmp", dest="keep_tmp", 
                         action="store_false", 
                         help="Delete intermediate files after run "
                         "[default=%s]" % str(not DEFAULT_KEEP_TMP))
     parser.add_argument("--quals", dest="quals",
                         choices=FASTQ_QUAL_FORMATS, 
                         default=DEFAULT_FASTQ_QUAL_FORMAT, metavar="FMT",
                         help="FASTQ quality score format "
                         "[default=%(default)s]")
     parser.add_argument('--library-type', dest="library_type", 
                         choices=LibraryTypes.choices(),
                         default=DEFAULT_LIBRARY_TYPE,
                         help="Library type [default=%(default)s]")
     parser.add_argument("--isize-mean", dest="isize_mean", type=int,
                         default=DEFAULT_ISIZE_MEAN, metavar="N",
                         help="Mean insert size to sample from when "
                         "insert size distribution cannot be determined "
                         "empirically [default=%(default)s]")
     parser.add_argument("--isize-stdev", dest="isize_stdev", type=float,
                         default=DEFAULT_ISIZE_STDEV, metavar="N",
                         help="Insert size standard deviation to sample "
                         "from when insert size distribution cannot be "
                         "determined empirically [default=%(default)s]")
     parser.add_argument("--trim5", type=int, dest="trim5", 
                         default=DEFAULT_TRIM5, metavar="N",
                         help="Trim N bases from 5' end of read")
     parser.add_argument("--trim3", type=int, dest="trim3", 
                         default=DEFAULT_TRIM3, metavar="N",
                         help="Trim N bases from 3' end of read")
     parser.add_argument("--min-fragment-length", type=int, 
                         dest="min_fragment_length", 
                         default=config.DEFAULT_MIN_FRAG_LENGTH,
                         help="Smallest expected fragment length "
                         "[default=%(default)s]")
     parser.add_argument("--max-fragment-length", type=int, 
                         dest="max_fragment_length", 
                         default=config.DEFAULT_MAX_FRAG_LENGTH,
                         help="Largest expected fragment length (reads "
                         "less than this fragment length are assumed to "
                         "be unspliced and contiguous) "
                         "[default=%(default)s]")
     parser.add_argument("--segment-length", type=int, 
                         dest="segment_length", 
                         default=DEFAULT_SEGMENT_LENGTH,
                         metavar="N",
                         help="Override size of soft-clipped read "
                         "segments during discordant alignment phase "
                         "(determined empirically by default)")
     parser.add_argument("--multihits", type=int, 
                         dest="max_multihits", 
                         default=config.DEFAULT_MAX_MULTIHITS,
                         metavar="N",
                         help="Maximum alignments allowed for each "
                         "discordant read")
     parser.add_argument("--local-multihits", type=int, 
                         dest="local_multihits", 
                         default=config.DEFAULT_LOCAL_MULTIHITS,
                         metavar="N",
                         help="Maximum alignments allowed for each "
                         "discordant read")
     parser.add_argument("--local-anchor-length", type=int, 
                         dest="local_anchor_length", 
                         default=config.DEFAULT_LOCAL_ANCHOR_LENGTH,
                         metavar="N",
                         help="Number of bases that read must span "
                         "on each side of a chimera to be considered "
                         "a valid breakpoint read")
     # filtering options
     group = parser.add_argument_group('Filtering options')
     group.add_argument("--filter-num-frags", type=float,
                        default=config.DEFAULT_FILTER_FRAGS,
                        dest="filter_num_frags", metavar="N",
                        help="Filter chimeras with less than N "
                        "aligned fragments [default=%(default)s]")
     group.add_argument("--filter-allele-fraction", type=float, 
                        default=config.DEFAULT_FILTER_ALLELE_FRACTION, 
                        dest="filter_allele_fraction", metavar="X",
                        help="Filter chimeras with expression less than "
                        "the specified fraction of the total expression "
                        "level [default=%(default)s")            
     group.add_argument("--mask-biotypes-file", default="",
                        dest="mask_biotypes_file",
                        help="File containing list of gene biotypes "
                        "to ignore (ex. pseudogenes, rRNA)")
     group.add_argument("--mask-rnames-file", default="",
                        dest="mask_rnames_file",
                        help="File containing list of reference names "
                        "to ignore (ex. MT or chrM)")
     # filtering options
     return parser

Example #4

Show file

File: chimerascan_run.py Project: marcopavoni/chimerascan

 def get_argument_parser():
     parser = argparse.ArgumentParser(usage="%(prog)s [options] <index> "
                                      "<mate1.fq> <mate2.fq> <output_dir>")
     # required options
     parser.add_argument("index_dir",
                         default=None,
                         help="Location of chimerascan index directory")
     parser.add_argument("read1",
                         default=None,
                         help="Path to read1 FASTQ file")
     parser.add_argument("read2",
                         default=None,
                         help="Path to read2 FASTQ file")
     parser.add_argument("output_dir",
                         default=None,
                         help="Location of output files")
     # standard options
     parser.add_argument('--version',
                         action='version',
                         version='%s' % __version__)
     parser.add_argument("--config-file",
                         dest="config_file",
                         help="Load parameters from a XML file "
                         "generated during a previous run ",
                         default=None)
     parser.add_argument("-v",
                         "--verbose",
                         dest="verbose",
                         action="store_true",
                         default=False,
                         help="enable verbose logging output "
                         "[default=%(default)s]")
     parser.add_argument("-p",
                         "--processors",
                         dest="num_processors",
                         type=int,
                         default=DEFAULT_NUM_PROCESSORS,
                         help="Number of processor cores to allocate to "
                         "chimerascan [default=%(default)s]")
     parser.add_argument("--keep-tmp",
                         dest="keep_tmp",
                         action="store_true",
                         default=DEFAULT_KEEP_TMP,
                         help="DO NOT delete intermediate files after "
                         "run [default=%(default)s]")
     parser.add_argument("--rm-tmp",
                         dest="keep_tmp",
                         action="store_false",
                         help="Delete intermediate files after run "
                         "[default=%s]" % str(not DEFAULT_KEEP_TMP))
     parser.add_argument("--quals",
                         dest="quals",
                         choices=FASTQ_QUAL_FORMATS,
                         default=DEFAULT_FASTQ_QUAL_FORMAT,
                         metavar="FMT",
                         help="FASTQ quality score format "
                         "[default=%(default)s]")
     parser.add_argument('--library-type',
                         dest="library_type",
                         choices=LibraryTypes.choices(),
                         default=DEFAULT_LIBRARY_TYPE,
                         help="Library type [default=%(default)s]")
     parser.add_argument("--isize-mean",
                         dest="isize_mean",
                         type=int,
                         default=DEFAULT_ISIZE_MEAN,
                         metavar="N",
                         help="Mean insert size to sample from when "
                         "insert size distribution cannot be determined "
                         "empirically [default=%(default)s]")
     parser.add_argument("--isize-stdev",
                         dest="isize_stdev",
                         type=float,
                         default=DEFAULT_ISIZE_STDEV,
                         metavar="N",
                         help="Insert size standard deviation to sample "
                         "from when insert size distribution cannot be "
                         "determined empirically [default=%(default)s]")
     parser.add_argument("--trim5",
                         type=int,
                         dest="trim5",
                         default=DEFAULT_TRIM5,
                         metavar="N",
                         help="Trim N bases from 5' end of read")
     parser.add_argument("--trim3",
                         type=int,
                         dest="trim3",
                         default=DEFAULT_TRIM3,
                         metavar="N",
                         help="Trim N bases from 3' end of read")
     parser.add_argument("--min-fragment-length",
                         type=int,
                         dest="min_fragment_length",
                         default=config.DEFAULT_MIN_FRAG_LENGTH,
                         help="Smallest expected fragment length "
                         "[default=%(default)s]")
     parser.add_argument("--max-fragment-length",
                         type=int,
                         dest="max_fragment_length",
                         default=config.DEFAULT_MAX_FRAG_LENGTH,
                         help="Largest expected fragment length (reads "
                         "less than this fragment length are assumed to "
                         "be unspliced and contiguous) "
                         "[default=%(default)s]")
     parser.add_argument("--segment-length",
                         type=int,
                         dest="segment_length",
                         default=DEFAULT_SEGMENT_LENGTH,
                         metavar="N",
                         help="Override size of soft-clipped read "
                         "segments during discordant alignment phase "
                         "(determined empirically by default)")
     parser.add_argument("--multihits",
                         type=int,
                         dest="max_multihits",
                         default=config.DEFAULT_MAX_MULTIHITS,
                         metavar="N",
                         help="Maximum alignments allowed for each "
                         "discordant read")
     parser.add_argument("--local-multihits",
                         type=int,
                         dest="local_multihits",
                         default=config.DEFAULT_LOCAL_MULTIHITS,
                         metavar="N",
                         help="Maximum alignments allowed for each "
                         "discordant read")
     parser.add_argument("--local-anchor-length",
                         type=int,
                         dest="local_anchor_length",
                         default=config.DEFAULT_LOCAL_ANCHOR_LENGTH,
                         metavar="N",
                         help="Number of bases that read must span "
                         "on each side of a chimera to be considered "
                         "a valid breakpoint read")
     # filtering options
     group = parser.add_argument_group('Filtering options')
     group.add_argument("--filter-num-frags",
                        type=float,
                        default=config.DEFAULT_FILTER_FRAGS,
                        dest="filter_num_frags",
                        metavar="N",
                        help="Filter chimeras with less than N "
                        "aligned fragments [default=%(default)s]")
     group.add_argument("--filter-allele-fraction",
                        type=float,
                        default=config.DEFAULT_FILTER_ALLELE_FRACTION,
                        dest="filter_allele_fraction",
                        metavar="X",
                        help="Filter chimeras with expression less than "
                        "the specified fraction of the total expression "
                        "level [default=%(default)s")
     group.add_argument("--mask-biotypes-file",
                        default="",
                        dest="mask_biotypes_file",
                        help="File containing list of gene biotypes "
                        "to ignore (ex. pseudogenes, rRNA)")
     group.add_argument("--mask-rnames-file",
                        default="",
                        dest="mask_rnames_file",
                        help="File containing list of reference names "
                        "to ignore (ex. MT or chrM)")
     # filtering options
     return parser

Example #5

Show file

File: find_discordant_reads.py Project: BioXiao/chimerascan

def classify_read_pairs(pe_reads, max_isize,
                        library_type, 
                        tid_tx_map):
    """
    examines all the alignments of a single fragment and tries to find ways
    to pair reads together.
    
    annotates all read pairs with an integer tag corresponding to a value
    in the DiscordantTags class
    
    returns a tuple containing 3 lists:
    1) concordant (r1,r2) pairs
    2) discordant (r1,r2) pairs
    3) unpaired reads
    """
    # to satisfy library type reads must either be on 
    # same strand or opposite strands
    concordant_tx_pairs = []
    discordant_tx_pairs = []
    concordant_cluster_pairs = []
    discordant_cluster_pairs = []
    # 
    # first, try to pair reads that map to the same transcript or 
    # cluster or overlapping transcripts
    #
    same_strand = LibraryTypes.same_strand(library_type)
    refdict, clusterdict = map_reads_to_references(pe_reads, tid_tx_map)
    found_pair = False
    for tid, tid_pe_reads in refdict.iteritems():
        # check if there are alignments involving both reads in a pair
        if len(tid_pe_reads[0]) == 0 or len(tid_pe_reads[1]) == 0:
            # no paired alignments exist at this reference
            continue
        for r1 in tid_pe_reads[0]:
            for r2 in tid_pe_reads[1]:
                # read strands must agree with library type
                strand_match = (same_strand == (r1.is_reverse == r2.is_reverse))
                # these reads can be paired
                found_pair = True
                cr1 = copy_read(r1)
                cr2 = copy_read(r2)                    
                # this is a hit to same transcript (gene)
                # pair the reads if strand comparison is correct
                if strand_match:
                    tags = [(DISCORDANT_TAG_NAME, DiscordantTags.CONCORDANT_TX)]
                    concordant_tx_pairs.append((cr1,cr2))
                else:
                    # hit to same gene with wrong strand, which
                    # could happen in certain wacky cases
                    tags = [(DISCORDANT_TAG_NAME, DiscordantTags.DISCORDANT_STRAND_TX)]
                    discordant_tx_pairs.append((cr1,cr2))
                pair_reads(cr1,cr2,tags)
    # at this point, if we have not been able to find a suitable way
    # to pair the reads, then search within the transcript cluster
    if not found_pair:
        for cluster_id, cluster_pe_reads in clusterdict.iteritems():
            # check if there are alignments involving both reads in a pair
            if len(cluster_pe_reads[0]) == 0 or len(cluster_pe_reads[1]) == 0:
                # no paired alignments in this transcript cluster            
                continue
            for r1 in cluster_pe_reads[0]:
                for r2 in cluster_pe_reads[1]:
                    # check strand compatibility
                    strand_match = (same_strand == (r1.is_reverse == r2.is_reverse))
                    # these reads can be paired
                    found_pair = True
                    cr1 = copy_read(r1)
                    cr2 = copy_read(r2)                    
                    if strand_match:
                        tags = [(DISCORDANT_TAG_NAME, DiscordantTags.CONCORDANT_GENE)]
                        concordant_cluster_pairs.append((cr1,cr2))
                    else:
                        tags = [(DISCORDANT_TAG_NAME, DiscordantTags.DISCORDANT_STRAND_GENE)]
                        discordant_cluster_pairs.append((cr1,cr2))
                    pair_reads(cr1,cr2,tags)
    # at this point, we have tried all combinations.  if any paired reads
    # are concordant then return them without considering discordant reads 
    gene_pairs = []
    if len(concordant_tx_pairs) > 0:
        gene_pairs = concordant_tx_pairs
    elif len(concordant_cluster_pairs) > 0:
        gene_pairs = concordant_cluster_pairs
    if len(gene_pairs) > 0:
        return gene_pairs, [], []
    # if no concordant reads in transcripts, return any discordant reads 
    # that may violate strand requirements but still remain colocalized 
    # on the same gene/chromosome
    gene_pairs = []
    if len(discordant_tx_pairs) > 0:
        gene_pairs = discordant_tx_pairs
    elif len(discordant_cluster_pairs) > 0:
        gene_pairs = discordant_cluster_pairs    
    if len(gene_pairs) > 0:
        return gene_pairs, [], []
    #
    # at this point, no read pairings were found so the read is 
    # assumed to be discordant. now we can create all valid 
    # combinations of read1/read2 as putative discordant read pairs 
    #    
    pairs = find_discordant_pairs(pe_reads, library_type)
    if len(pairs) > 0:        
        # sort valid pairs by sum of alignment score and retain the best 
        # scoring pairs
        pairs = select_best_scoring_pairs(pairs)
        return [], pairs, []
    # 
    # no valid pairs could be found suggesting that these alignments are
    # either artifacts or that the current transcript annotations do not
    # support this pair
    # 
    return [], [], pe_reads