Beispiel #1
0
def getReadStartEnd(read_str):
    """
    Returns the start and end position
    """
    #    elems = read_str.split("_")
    elems = convertCoordStr(read_str)

    return elems[1], elems[2]
Beispiel #2
0
def getRegionCoord(region_coord_str):
    region_coord = convertCoordStr(region_coord_str)
    #   region_coord_str_elems = region_coord_str.split("_")
    #   region_coord = (region_coord_str_elems[0],
    #                   int(region_coord_str_elems[1]),
    #                   int(region_coord_str_elems[2]))

    return region_coord
def getRegionCoord(region_coord_str):
    region_coord = convertCoordStr(region_coord_str)
#   region_coord_str_elems = region_coord_str.split("_")
#   region_coord = (region_coord_str_elems[0],
#                   int(region_coord_str_elems[1]),    
#                   int(region_coord_str_elems[2]))

    return region_coord
def getReadStartEnd(read_str):
    """
    Returns the start and end position
    """
#    elems = read_str.split("_")
    elems = convertCoordStr(read_str)

    return elems[1], elems[2]
Beispiel #5
0
def getSSOrder(alt_start_or_end, inclusion_start, inclusion_end,
               exclusion_str_list):

    unordered_pos = []
    for_ordered_inclusion_pos = None

    if alt_start_or_end == "alt_start":
        unordered_pos.append(inclusion_start)
        for_ordered_inclusion_pos = inclusion_start
        for exclusion_str in exclusion_str_list:
            chr, start, end = convertCoordStr(exclusion_str)
            unordered_pos.append(start)
    else:
        unordered_pos.append(inclusion_end)
        for_ordered_inclusion_pos = inclusion_end
        for exclusion_str in exclusion_str_list:
            chr, start, end = convertCoordStr(exclusion_str)
            unordered_pos.append(end)

    ordered_pos = list(unordered_pos)
    ordered_pos.sort()

    return ordered_pos, ordered_pos.index(for_ordered_inclusion_pos)
def getSSOrder(alt_start_or_end, inclusion_start, inclusion_end,
               exclusion_str_list):

    unordered_pos = []
    for_ordered_inclusion_pos = None

    if alt_start_or_end == "alt_start":
        unordered_pos.append(inclusion_start)
        for_ordered_inclusion_pos = inclusion_start
        for exclusion_str in exclusion_str_list:
            chr, start, end = convertCoordStr(exclusion_str)
            unordered_pos.append(start)
    else:
        unordered_pos.append(inclusion_end)
        for_ordered_inclusion_pos = inclusion_end
        for exclusion_str in exclusion_str_list:
            chr, start, end = convertCoordStr(exclusion_str)
            unordered_pos.append(end)

    ordered_pos = list(unordered_pos)
    ordered_pos.sort()

    return ordered_pos, ordered_pos.index(for_ordered_inclusion_pos)
Beispiel #7
0
def getAA_ADInclIsoformLen(event_str, jcn_seq_len):
    event_str_list = event_str.split("\t")

    excl_jcns = event_str_list[5].split(";")

    chr, incl_start, incl_end = convertCoordStr(event_str_list[6])

    alt_start_or_end = determineAltStartOrEnd(incl_start, incl_end, excl_jcns)

    ordered_pos, inclusion_pos_idx = getSSOrder(alt_start_or_end, incl_start,
                                                incl_end, excl_jcns)

    isoform_lengths = getAD_AA_isoform_lengths(alt_start_or_end, ordered_pos,
                                               jcn_seq_len)

    return isoform_lengths[inclusion_pos_idx]
def getAA_ADInclIsoformLen(event_str, jcn_seq_len):
    event_str_list = event_str.split("\t")

    excl_jcns = event_str_list[5].split(";")
    
    chr, incl_start, incl_end = convertCoordStr(event_str_list[6])

    alt_start_or_end = determineAltStartOrEnd(incl_start, incl_end,
                                                  excl_jcns)

    ordered_pos, inclusion_pos_idx = getSSOrder(alt_start_or_end, incl_start, incl_end, excl_jcns)
     
    isoform_lengths = getAD_AA_isoform_lengths(alt_start_or_end,
                                               ordered_pos, jcn_seq_len)

    return isoform_lengths[inclusion_pos_idx]
Beispiel #9
0
def parse_jcn_str(jcn_str):
    """
        (chr, chrStart, chrEnd, strand, blockLens,
         secondBlockStart) = 
    """
    (chr, intron_start_str, intron_end_str) = convertCoordStr(jcn_str)

    intron_start = int(intron_start_str)
    intron_end = int(intron_end_str)

    chrStart = intron_start - DEF_JCN_OVERHANG - 1
    chrEnd = intron_end + DEF_JCN_OVERHANG

    blockLens = DEF_JCN_OVERHANG

    length = chrEnd - chrStart
    secondBlockStart = length - blockLens

    return chr, chrStart, chrEnd, blockLens, secondBlockStart
def parse_jcn_str(jcn_str):
    """
        (chr, chrStart, chrEnd, strand, blockLens,
         secondBlockStart) = 
    """
    (chr, intron_start_str, intron_end_str) = convertCoordStr(jcn_str)

    intron_start = int(intron_start_str)
    intron_end = int(intron_end_str)

    chrStart = intron_start - DEF_JCN_OVERHANG - 1
    chrEnd = intron_end + DEF_JCN_OVERHANG

    blockLens = DEF_JCN_OVERHANG

    length = chrEnd - chrStart
    secondBlockStart = length - blockLens

    return chr, chrStart, chrEnd, blockLens, secondBlockStart
Beispiel #11
0
def getInclIsoformLen(event_str, jcn_seq_len):
    """
    Length of the inclusion isoform equals sum of junction lengths + exon
    lengths
    """
    event_str_list = event_str.split("\t")
    jcns = elems_split(event_str_list[6])
    jcns.extend(elems_split(event_str_list[9]))

    exon_len = 0
    exons = elems_split(event_str_list[8])
    for exon in exons:
        if exon == "" or exon == "None":
            continue
        chr, exon_start, exon_end = convertCoordStr(exon)

        this_len = exon_end - exon_start + 1
        exon_len += this_len

    return exon_len + (jcn_seq_len * (len(jcns)))
def getInclIsoformLen(event_str, jcn_seq_len):
    """
    Length of the inclusion isoform equals sum of junction lengths + exon
    lengths
    """
    event_str_list = event_str.split("\t")
    jcns = elems_split(event_str_list[6])
    jcns.extend(elems_split(event_str_list[9]))

    exon_len = 0
    exons = elems_split(event_str_list[8])
    for exon in exons:
        if exon == "" or exon == "None":
            continue
        chr, exon_start, exon_end = convertCoordStr(exon)        

        this_len = exon_end - exon_start + 1 
        exon_len += this_len

    return exon_len + (jcn_seq_len * (len(jcns)))
Beispiel #13
0
def findLargestRegion(coords_string):

    first_list = coords_string.split(";")
    full_list = []
    for elem1 in first_list:
        for elem2 in elem1.split(","):
            full_list.append(elem2)

    leftmost = INFINITY
    rightmost = -1

    for coord in full_list:
        chr, start, end = convertCoordStr(coord)

        if start < leftmost:
            leftmost = start

        if end > rightmost:
            rightmost = end

    return leftmost, rightmost
Beispiel #14
0
def findLargestRegion(coords_string):

    first_list = coords_string.split(";")
    full_list = []
    for elem1 in first_list:
        for elem2 in elem1.split(","):
            full_list.append(elem2)

    leftmost = INFINITY
    rightmost = -1

    for coord in full_list:
        chr, start, end = convertCoordStr(coord)

        if start < leftmost:
            leftmost = start

        if end > rightmost:
            rightmost = end

    return leftmost, rightmost
def disambiguateJcnStr(chr_seq, line_list, majority_rules):
    """
    Will use splice site sequence to infer strand
    """

    try:
        chr, start, end = convertCoordStr(line_list[3])
    except:
        print "Junction BED file must have intron position in 4th column."
        sys.exit(1)    

    intron_seq = chr_seq[start-1:end]

    if intron_seq.startswith("GT") and intron_seq.endswith("AG"):
        line_list[5] = "+"
    elif intron_seq.startswith("CT") and intron_seq.endswith("AC"):
        line_list[5] = "-"
    # Other common splice site sequence
    elif intron_seq.startswith("GC") and intron_seq.endswith("AG"):
        line_list[5] = "+"
    elif intron_seq.startswith("CT") and intron_seq.endswith("GC"):
        line_list[5] = "-"
    # minor spliceosome
    elif intron_seq.startswith("AT") and intron_seq.endswith("AC"):
        line_list[5] = "+"
    elif intron_seq.startswith("GT") and intron_seq.endswith("AT"):
        line_list[5] = "-"
    # Priority to 5' splice site since there is more information
    # there
    elif intron_seq.startswith("GT"):
        line_list[5] = "+"
    elif intron_seq.endswith("AC"):
        line_list[5] = "-"
    elif intron_seq.endswith("AG"):
        line_list[5] = "+"
    elif intron_seq.startswith("CT"):
        line_list[5] = "-"
    else:
        if not majority_rules: # Strand will resolved later if majority_rules
            print "Cannot find strand for %s" % line_list[3]
Beispiel #16
0
def disambiguateJcnStr(chr_seq, line_list, majority_rules):
    """
    Will use splice site sequence to infer strand
    """

    try:
        chr, start, end = convertCoordStr(line_list[3])
    except:
        print("Junction BED file must have intron position in 4th column.")
        sys.exit(1)

    intron_seq = chr_seq[start - 1:end]

    if intron_seq.startswith("GT") and intron_seq.endswith("AG"):
        line_list[5] = "+"
    elif intron_seq.startswith("CT") and intron_seq.endswith("AC"):
        line_list[5] = "-"
    # Other common splice site sequence
    elif intron_seq.startswith("GC") and intron_seq.endswith("AG"):
        line_list[5] = "+"
    elif intron_seq.startswith("CT") and intron_seq.endswith("GC"):
        line_list[5] = "-"
    # minor spliceosome
    elif intron_seq.startswith("AT") and intron_seq.endswith("AC"):
        line_list[5] = "+"
    elif intron_seq.startswith("GT") and intron_seq.endswith("AT"):
        line_list[5] = "-"
    # Priority to 5' splice site since there is more information
    # there
    elif intron_seq.startswith("GT"):
        line_list[5] = "+"
    elif intron_seq.endswith("AC"):
        line_list[5] = "-"
    elif intron_seq.endswith("AG"):
        line_list[5] = "+"
    elif intron_seq.startswith("CT"):
        line_list[5] = "-"
    else:
        if not majority_rules:  # Strand will resolved later if majority_rules
            print(("Cannot find strand for %s" % line_list[3]))
Beispiel #17
0
def main():

    opt_parser = OptionParser()

    # Add Options. Required options should have default=None
    opt_parser.add_option("-i",
                          dest="intron_coords",
                          type="string",
                          help="""File of intron coordinates.  Format:
                                  type, chr, strand, start, end""",
                          default=None)
    opt_parser.add_option("-b",
                          dest="bed_intron_coords",
                          type="string",
                          help="BED file of intron coordinates.",
                          default=None)
    opt_parser.add_option("-a",
                          dest="read_alignments",
                          type="string",
                          help="""File of alignments to genome. 
                                  Format:
                                  chr, start, strand""",
                          default=None)
    opt_parser.add_option("-f",
                          dest="flanking_dist",
                          type="int",
                          help="""Distance away from exon intron junction to
                                  check for reads in.""",
                          default=None)
    opt_parser.add_option("-o",
                          dest="offsets",
                          type="int",
                          help="""Minimum number of offsets required at each
                                  exon/intron junction. Default=1""",
                          default=1)
    opt_parser.add_option("-l",
                          dest="read_length",
                          type="int",
                          help="Length of the reads.",
                          default=1)
    opt_parser.add_option("--out_dir",
                          dest="out_dir",
                          type="string",
                          help="Output files are put here.",
                          default=None)
    opt_parser.add_option("--out_prefix",
                          dest="prefix",
                          type="string",
                          help="Prefix attached to all output files.",
                          default=None)

    (options, args) = opt_parser.parse_args()

    # validate the command line arguments
    opt_parser.check_required("-a")
    opt_parser.check_required("-f")
    opt_parser.check_required("-l")
    opt_parser.check_required("--out_dir")
    opt_parser.check_required("--out_prefix")

    # Check that the COUNTING_SCRIPT path is valid
    if not os.path.exists(COUNTING_SCRIPT):
        print("Please change COUNTING_SCRIPT path.")
        opt_parser.print_help()
        sys.exit(1)

    if options.intron_coords and options.bed_intron_coords:
        print("Only one type of intron coord can be used as input.")
        opt_parser.print_help()
        sys.exit(1)

    if (not options.intron_coords) and (not options.bed_intron_coords):
        print(" Need to specify intron coordinates. See options -i or -b")
        opt_parser.print_help()
        sys.exit(1)

    intron_coords = None
    isBedFormat = False
    if options.intron_coords:
        intron_coords = open(options.intron_coords)
    if options.bed_intron_coords:
        intron_coords = open(options.bed_intron_coords)
        isBedFormat = True

    read_alignments = options.read_alignments

    read_length = options.read_length

    flanking_dist = options.flanking_dist
    offsets = options.offsets

    prefix = options.prefix
    out_dir = options.out_dir

    if not out_dir.endswith("/"):
        out_dir += "/"

    if not os.path.exists(out_dir):
        print("Output directory does not exist")
        sys.exit(1)

    # Intermediate Output Files
    out_coords_file = out_dir + prefix + "_intron_exon_junction_coords.out"
    out_coords = open(out_coords_file, "w")

    out_read_assoc_file = out_dir + prefix + "_intron_exon_junction_coords_w_read.out"

    # Final output
    out_file_name = out_dir + prefix + "_intron_exon_junction_counts.txt"
    out_file = open(out_file_name, "w")

    confident_ie_name = out_dir + prefix + "_confident_ie.txt"
    confident_ie_file = open(confident_ie_name, "w")

    # {intron_coord: {"left": (chr, start, end),
    #                 "right": (chr, start, end)}
    # "left" and "right" being the region at the left or right side of the
    # junction, around the exon/intron junction
    # The dict is the above but reverse mapping
    left_region_coord2intron = {}
    right_region_coord2intron = {}

    # {intron_coord_str:{"left":{pos:count},
    #                    "right":{pos:count}}
    intron_dict = {}

    regions_set = set([])

    for line in intron_coords:
        line = formatLine(line)

        if isBedFormat:
            if line.startswith("track"):
                continue
            chr, start_str, end_str = parseBEDLine(line)
        else:
            type, chr, strand, start_str, end_str = line.split("\t")

        if chr.startswith("chr"):
            chr = chr.replace("chr", "")

        intron_coord_str = "%s:%s-%s" % (chr, start_str, end_str)

        if intron_coord_str not in intron_dict:
            intron_dict[intron_coord_str] = {"left": {}, "right": {}}

        start = int(start_str)
        end = int(end_str)

        left_coord = (chr, start - flanking_dist, start + flanking_dist - 1)

        right_coord = (chr, end - flanking_dist + 1, end + flanking_dist)

        updateDictOfLists(left_region_coord2intron, left_coord,
                          intron_coord_str)
        updateDictOfLists(right_region_coord2intron, right_coord,
                          intron_coord_str)

        regions_set.add(left_coord)
        regions_set.add(right_coord)

    # Print out regions out_coords
    for region_coord in regions_set:

        out_line = "%s\t%d\t%d\n" % (region_coord[0], region_coord[1],
                                     region_coord[2])

        out_coords.write(out_line)

    out_coords.close()

    # Used to make unique name for tmp file in case a shared directory is being
    # used for runs.
    rand_num = random.randrange(1, 100000)

    # Get Read Counts
    print("Getting Counts in Region")
    cmd = "python %s --reads %s -l %d --coords %s -o %stmp%d.txt --read_assoc %s" % (
        COUNTING_SCRIPT, read_alignments, read_length, out_coords_file,
        out_dir, rand_num, out_read_assoc_file)
    print(cmd)
    #    runCmd(cmd, SHELL)
    os.system(cmd)

    # Remove the tmp file
    #    runCmd("rm %stmp%d.txt" % (out_dir, rand_num), SHELL)
    os.system("rm %stmp%d.txt" % (out_dir, rand_num))

    print("Getting Left and Right Counts")
    # Parse read_assoc_file to get information
    read_assoc_file = open(out_read_assoc_file)

    for line in read_assoc_file:
        line = formatLine(line)

        line_list = line.split("\t")

        read_start, read_end = getReadStartEnd(line_list[1])

        region_coord = getRegionCoord(line_list[2])
        intron_coord_list = getIntronStartEnds(left_region_coord2intron,
                                               right_region_coord2intron,
                                               region_coord)

        if region_coord in left_region_coord2intron:
            for intron_str in left_region_coord2intron[region_coord]:
                # Put in left dictionaries
                if read_end not in intron_dict[intron_str]["left"]:
                    intron_dict[intron_str]["left"][read_end] = 1
                else:
                    intron_dict[intron_str]["left"][read_end] += 1

        if region_coord in right_region_coord2intron:
            for intron_str in right_region_coord2intron[region_coord]:
                # Check right dictionary
                if read_end not in intron_dict[intron_str]["right"]:
                    intron_dict[intron_str]["right"][read_end] = 1
                else:
                    intron_dict[intron_str]["right"][read_end] += 1

    # Print output
    confident_ie_set = set([])
    for intron_str in intron_dict:
        #       chr, intron_start_str, intron_end_str = intron_str.split("_")
        #       intron_start = int(intron_start_str)
        #       intron_end = int(intron_end_str)
        chr, intron_start, intron_end = convertCoordStr(intron_str)

        # Get left_counts
        if len(intron_dict[intron_str]["left"]) >= offsets:
            left_count = getTotalCounts(intron_dict[intron_str]["left"])
            confident_ie = "%s:%d-%d" % (chr, intron_start - 1, intron_start)
            confident_ie_set.add(confident_ie)
        else:
            left_count = 0

        # Get right counts
        if len(intron_dict[intron_str]["right"]) >= offsets:
            right_count = getTotalCounts(intron_dict[intron_str]["right"])
            confident_ie = "%s:%d-%d" % (chr, intron_end, intron_end + 1)
            confident_ie_set.add(confident_ie)
        else:
            right_count = 0

        if left_count == 0 and right_count == 0:
            continue

        print_line = "%s\t%d\t%d\n" % (intron_str, left_count, right_count)

        out_file.write(print_line)

    # Now print out confident set of ie
    for ie in confident_ie_set:
        confident_ie_file.write("%s\n" % ie)

    confident_ie_file.close()

    sys.exit(0)
def main():
	
    opt_parser = OptionParser()
   
    # Add Options. Required options should have default=None
    opt_parser.add_option("-i",
                          dest="intron_coords",
                          type="string",
                          help="""File of intron coordinates.  Format:
                                  type, chr, strand, start, end""",
                          default=None)
    opt_parser.add_option("-b",
                          dest="bed_intron_coords",
                          type="string",
                          help="BED file of intron coordinates.",
                          default=None)
    opt_parser.add_option("-a",
                          dest="read_alignments",
                          type="string",
                          help="""File of alignments to genome. 
                                  Format:
                                  chr, start, strand""",
                          default=None)
    opt_parser.add_option("-f",
                          dest="flanking_dist",
                          type="int",
                          help="""Distance away from exon intron junction to
                                  check for reads in.""",
                          default=None)
    opt_parser.add_option("-o",
                          dest="offsets",
                          type="int",
                          help="""Minimum number of offsets required at each
                                  exon/intron junction. Default=1""",
                          default=1)
    opt_parser.add_option("-l",
                          dest="read_length",
                          type="int",
                          help="Length of the reads.",
                          default=1)
    opt_parser.add_option("--out_dir",
                          dest="out_dir",
                          type="string",
                          help="Output files are put here.",
                          default=None)
    opt_parser.add_option("--out_prefix",
                          dest="prefix",
                          type="string",
                          help="Prefix attached to all output files.",
                          default=None)

    (options, args) = opt_parser.parse_args()
	
    # validate the command line arguments
    opt_parser.check_required("-a")
    opt_parser.check_required("-f")
    opt_parser.check_required("-l")
    opt_parser.check_required("--out_dir")
    opt_parser.check_required("--out_prefix")

    # Check that the COUNTING_SCRIPT path is valid
    if not os.path.exists(COUNTING_SCRIPT):
        print "Please change COUNTING_SCRIPT path."
        opt_parser.print_help()
        sys.exit(1)

    if options.intron_coords and options.bed_intron_coords:
        print "Only one type of intron coord can be used as input." 
        opt_parser.print_help()
        sys.exit(1)

    if (not options.intron_coords) and (not options.bed_intron_coords):   
        print " Need to specify intron coordinates. See options -i or -b"
        opt_parser.print_help()
        sys.exit(1)

    intron_coords = None
    isBedFormat = False
    if options.intron_coords:
        intron_coords = open(options.intron_coords)
    if options.bed_intron_coords:
        intron_coords = open(options.bed_intron_coords)
        isBedFormat = True
        
    read_alignments = options.read_alignments

    read_length = options.read_length

    flanking_dist = options.flanking_dist
    offsets = options.offsets

    prefix = options.prefix
    out_dir = options.out_dir

    if not out_dir.endswith("/"):
        out_dir += "/"

    if not os.path.exists(out_dir):
        print "Output directory does not exist"
        sys.exit(1)

    # Intermediate Output Files
    out_coords_file = out_dir + prefix + "_intron_exon_junction_coords.out"
    out_coords = open(out_coords_file, "w")

    out_read_assoc_file = out_dir + prefix + "_intron_exon_junction_coords_w_read.out"
    
    # Final output
    out_file_name = out_dir + prefix + "_intron_exon_junction_counts.txt"
    out_file = open(out_file_name, "w")

    confident_ie_name = out_dir + prefix + "_confident_ie.txt"
    confident_ie_file = open(confident_ie_name, "w")
   
    # {intron_coord: {"left": (chr, start, end),
    #                 "right": (chr, start, end)}
    # "left" and "right" being the region at the left or right side of the
    # junction, around the exon/intron junction
    # The dict is the above but reverse mapping
    left_region_coord2intron = {}     
    right_region_coord2intron = {}     

    # {intron_coord_str:{"left":{pos:count}, 
    #                    "right":{pos:count}}
    intron_dict = {}

    regions_set = set([])

    for line in intron_coords:
        line = formatLine(line)

        if isBedFormat:
            if line.startswith("track"):
                continue
            chr, start_str, end_str = parseBEDLine(line)
        else:
            type, chr, strand, start_str, end_str = line.split("\t")

        if chr.startswith("chr"):
            chr = chr.replace("chr", "")

        intron_coord_str = "%s:%s-%s" % (chr, start_str, end_str)

        if intron_coord_str not in intron_dict:
            intron_dict[intron_coord_str] = {"left": {},
                                             "right": {}}

        start = int(start_str)
        end = int(end_str)

        left_coord = (chr,
                      start - flanking_dist,
                      start + flanking_dist - 1)

        right_coord = (chr,
                       end - flanking_dist + 1,
                       end + flanking_dist)

        updateDictOfLists(left_region_coord2intron, left_coord,
                          intron_coord_str)
        updateDictOfLists(right_region_coord2intron, right_coord,
                          intron_coord_str)

        regions_set.add(left_coord)
        regions_set.add(right_coord)

    # Print out regions out_coords
    for region_coord in regions_set:

        out_line = "%s\t%d\t%d\n" % (region_coord[0],
                                     region_coord[1],
                                     region_coord[2])

        out_coords.write(out_line)

    out_coords.close()

    # Used to make unique name for tmp file in case a shared directory is being
    # used for runs.
    rand_num = random.randrange(1,100000)

    # Get Read Counts
    print "Getting Counts in Region"
    cmd = "python %s --reads %s -l %d --coords %s -o %stmp%d.txt --read_assoc %s" % (COUNTING_SCRIPT,
                                                                          read_alignments,
                                                                          read_length,
                                                                          out_coords_file,
                                                                          out_dir,
                                                                          rand_num,
                                                                          out_read_assoc_file)
    print cmd
#    runCmd(cmd, SHELL)
    os.system(cmd)

    # Remove the tmp file
#    runCmd("rm %stmp%d.txt" % (out_dir, rand_num), SHELL)
    os.system("rm %stmp%d.txt" % (out_dir, rand_num))

    print "Getting Left and Right Counts"
    # Parse read_assoc_file to get information
    read_assoc_file = open(out_read_assoc_file)

    for line in read_assoc_file:
        line = formatLine(line)

        line_list = line.split("\t")

        read_start, read_end = getReadStartEnd(line_list[1]) 

        region_coord = getRegionCoord(line_list[2])
        intron_coord_list = getIntronStartEnds(left_region_coord2intron,
                                               right_region_coord2intron,
                                               region_coord)

       
        if region_coord in left_region_coord2intron: 
            for intron_str in left_region_coord2intron[region_coord]:
                # Put in left dictionaries
                if read_end not in intron_dict[intron_str]["left"]:
                    intron_dict[intron_str]["left"][read_end] = 1
                else:
                    intron_dict[intron_str]["left"][read_end] += 1

        if region_coord in right_region_coord2intron:
            for intron_str in right_region_coord2intron[region_coord]:
                # Check right dictionary
                if read_end not in intron_dict[intron_str]["right"]:
                    intron_dict[intron_str]["right"][read_end] = 1
                else:
                    intron_dict[intron_str]["right"][read_end] += 1

    # Print output
    confident_ie_set = set([])
    for intron_str in intron_dict:
#       chr, intron_start_str, intron_end_str = intron_str.split("_")
#       intron_start = int(intron_start_str)
#       intron_end = int(intron_end_str) 
        chr, intron_start, intron_end = convertCoordStr(intron_str)

        # Get left_counts
        if len(intron_dict[intron_str]["left"]) >= offsets:
            left_count = getTotalCounts(intron_dict[intron_str]["left"])
            confident_ie = "%s:%d-%d" % (chr, intron_start - 1, intron_start)
            confident_ie_set.add(confident_ie)
        else:
            left_count = 0

        # Get right counts
        if len(intron_dict[intron_str]["right"]) >= offsets:
            right_count = getTotalCounts(intron_dict[intron_str]["right"])
            confident_ie = "%s:%d-%d" % (chr, intron_end, intron_end + 1)
            confident_ie_set.add(confident_ie)
        else:
            right_count = 0

        if left_count == 0 and right_count == 0:
            continue

        print_line = "%s\t%d\t%d\n" % (intron_str,
                                     left_count,
                                     right_count)

        out_file.write(print_line)

    # Now print out confident set of ie
    for ie in confident_ie_set:
        confident_ie_file.write("%s\n" % ie)

    confident_ie_file.close()
	
    sys.exit(0)