def filter_encompassing_chimeras(input_file, output_file, gene_file,
                                 max_multimap=1,
                                 multimap_cov_ratio=0.10,
                                 max_isize=1000,
                                 strand_pval=0.01,
                                 keep_overlap=False):
    logging.debug("Filtering chimeras")
    logging.debug("Must have a read with <= %d multimaps" % (max_multimap))
    logging.debug("Coverage to reads ratio >= %f" % (multimap_cov_ratio))
    logging.debug("Insert size < %d" % (max_isize))
    logging.debug("Strand balance p-value > %f" % (strand_pval))
    # first perform basic filtering
    tmpfile1 = make_temp(base_dir=os.path.dirname(output_file),
                         suffix='.bedpe')
    fh = open(tmpfile1, "w")
    for c in Chimera.parse(open(input_file)):
        res = filter_multimapping(c, max_multimap=max_multimap, 
                                  multimap_cov_ratio=multimap_cov_ratio)
        res = res and filter_insert_size(c, max_isize)
        if not keep_overlap:
            res = res and filter_overlapping(c)
        res = res and filter_strand_balance(c, strand_pval)
        if res:
            print >>fh, '\t'.join(map(str, c.to_list()))
    fh.close()
    logging.debug("Building gene/genome index")
    ggmap = build_gene_to_genome_map(open(gene_file))
    logging.debug("Finding junction permiscuity")
    juncmap5p, juncmap3p = collect_permiscuity_stats(tmpfile1, ggmap)
    fh = open(output_file, "w")
    for c in Chimera.parse(open(tmpfile1)):
        frac5p, frac3p = calc_permiscuity(c, juncmap5p, juncmap3p, ggmap)
        c.mate5p.frac = frac5p
        c.mate3p.frac = frac3p
        print >>fh, '\t'.join(map(str, c.to_list()))
    fh.close()
    # delete tmp files
    os.remove(tmpfile1)
def filter_spanning_chimeras(input_file, output_file, gene_file,
                             mate_pval, max_isize):
    '''
    processes chimera isoforms and chooses the one with the 
    highest coverage and omits the rest
    '''
    # apply more filters
    tmpfile = make_temp(os.path.dirname(output_file), suffix='.bedpe')
    fh = open(tmpfile, "w")
    for c in SpanningChimera.parse(open(input_file)):
        res = filter_insert_size(c, max_isize)
        if res:
            print >>fh, '\t'.join(['\t'.join(map(str,c.to_list()))])
    fh.close()
    # choose best isoform from remaining isoforms
    logging.debug("Building gene/genome index")
    ggmap = build_gene_to_genome_map(open(gene_file))
    logging.debug("Choosing highest coverage chimeras")
    fh = open(output_file, "w")
    for c in choose_highest_coverage_chimeras(tmpfile, ggmap):
        print >>fh, '\t'.join(['\t'.join(map(str,c.to_list()))])
    fh.close()
    # remove temporary file
    os.remove(tmpfile)
def filter_spanning_chimeras(input_file, output_file, gene_file, mate_pval,
                             max_isize):
    '''
    processes chimera isoforms and chooses the one with the 
    highest coverage and omits the rest
    '''
    # apply more filters
    tmpfile = make_temp(os.path.dirname(output_file), suffix='.bedpe')
    fh = open(tmpfile, "w")
    for c in SpanningChimera.parse(open(input_file)):
        res = filter_insert_size(c, max_isize)
        if res:
            print >> fh, '\t'.join(['\t'.join(map(str, c.to_list()))])
    fh.close()
    # choose best isoform from remaining isoforms
    logging.debug("Building gene/genome index")
    ggmap = build_gene_to_genome_map(open(gene_file))
    logging.debug("Choosing highest coverage chimeras")
    fh = open(output_file, "w")
    for c in choose_highest_coverage_chimeras(tmpfile, ggmap):
        print >> fh, '\t'.join(['\t'.join(map(str, c.to_list()))])
    fh.close()
    # remove temporary file
    os.remove(tmpfile)