コード例 #1
0
ファイル: predict.py プロジェクト: marcasriv/ribotish
def _pred_gene(ps):  ### trans
    '''Main function of ORF prediction in given transcript
  '''
    g, candidates, pf = ps
    es, j = [], [0, 0]
    tpfs = {}  #trans profiles
    genome = fa.Fa(genomefapath)
    has_tis = len(tisbampaths) > 0

    load = True
    if candidates is not None:
        if len(candidates) == 0: return es, j, tpfs, g
        if len(pf) >= len(candidates): load = False
    if len(pf) >= len(g.trans): load = False
    if load:
        tismbl = ribo.multiRiboGene(g,
                                    tisbampaths,
                                    offdict=tisoffdict,
                                    compatible=compatible,
                                    mis=compatiblemis,
                                    paired=paired)
        ribombl = ribo.multiRiboGene(g,
                                     ribobampaths,
                                     offdict=riboffdict,
                                     compatible=compatible,
                                     mis=compatiblemis,
                                     paired=paired)

    for t in g.trans:
        #if candidates is not None and t.id not in candidates : continue
        tl = t.cdna_length()
        if tl < ribo.minTransLen: continue  # return es, j, tpfs, g ##
        #ttis = ribo.multiRibo(t, tisbampaths, offdict = tisoffdict, compatible = compatible)
        #tribo = ribo.multiRibo(t, ribobampaths, offdict = riboffdict, compatible = compatible)
        if not load:
            if t.id in pf:
                tispf, ribopf = pf[t.id]
                ttis = ribo.Ribo(t)
                tribo = ribo.Ribo(t)
                if has_tis: ttis.dict2cnts(tispf)
                tribo.dict2cnts(ribopf)
            else:
                print(
                    'Warning: transcript {} {} {} not in input trans profile! '
                    .format(t.gid, t.id, t.symbol))
                continue
        else:
            ttis = ribo.Ribo(t,
                             bamload=tismbl,
                             compatible=compatible,
                             mis=compatiblemis)
            tribo = ribo.Ribo(t,
                              bamload=ribombl,
                              compatible=compatible,
                              mis=compatiblemis)

        score = ttis.abdscore()
        ip = ribo.pidx(score, slp)
        if verbose >= 2: print(io.tabjoin(g.id, t.id, ttis.total, tribo.total))
        cds1 = t.cds_start(cdna=True)
        cds2 = t.cds_stop(cdna=True)
        tsq = genome.transSeq(t)

        if transprofile is not None:
            tid = '{}\t{}\t{}'.format(t.gid, t.id, t.symbol)
            tpfs[tid] = '{}\t{}'.format(ttis.cnts_dict_str(),
                                        tribo.cnts_dict_str())

        if has_tis and tis2ribo: tribo.merge(ttis)  ##

        # user provided candidates
        if candidates is not None:
            if t.id not in candidates: continue
            for tis, stop in candidates[t.id]:
                j[0] += 1
                j[1] += 1
                if has_tis: tp = ttis.tis_test(tis, paras[ip][0], paras[ip][1])
                else: tp = None
                if enrichtest: rp = tribo.enrich_test(tis, stop)
                else: rp = tribo.frame_test(tis, stop)
                if tp is not None and tp > tpth: continue
                if rp > fpth: continue  # or fisher > fspth
                minp = rp
                if tp is not None and tp < minp: minp = tp
                if minp > minpth: continue
                fsp, fss = stat.fisher_method([tp, rp])  #
                if fsp > fspth: continue
                has_stop = tsq[stop - 3:stop] in orf.cstop
                e = getResult(t, tis, stop, cds1, cds2, tsq,
                              [ip, ttis.cnts[tis], tp, rp, 'N', fsp], has_stop)
                es.append(e)

        else:  #all possible ORFs
            orfs = orf.orflist(tsq, minaalen=minaalen, tail=tl)
            for o in orfs:
                starts = o.starts
                if alt: starts += o.altstarts
                starts.sort()
                if longest and not has_tis: starts = starts[0:1]
                ol = len(starts)
                if ol == 0: continue
                tps = [None] * ol
                rps = [1] * ol
                if has_tis:
                    allz_tis = max(
                        ttis.cnts[starts[0]:o.stop:3]) == 0  # all zeros
                else:
                    allz_tis = True
                allz_ribo = max(
                    tribo.cnts[starts[0]:o.stop:3]) == 0  # all zeros
                if allz_tis and allz_ribo: continue
                for i, tis in enumerate(starts):
                    if has_tis:
                        tps[i] = ttis.tis_test(tis, paras[ip][0], paras[ip][1])
                    if not allz_ribo:
                        allz_ribo = max(tribo.cnts[tis:o.stop:3]) == 0
                    if not allz_ribo:
                        if enrichtest: rps[i] = tribo.enrich_test(tis, o.stop)
                        else: rps[i] = tribo.frame_test(tis, o.stop)
                rst = pvalStatus(rps)
                for i, tis in enumerate(starts):
                    if tps[i] is not None and tps[i] > tpth: continue
                    if rps[i] > fpth: continue  # or fishers[i] > fspth
                    minp = rps[i]
                    if tps[i] is not None and tps[i] < minp: minp = tps[i]
                    if minp > minpth: continue
                    if tps[i] is None or tps[i] > minpth:
                        if longest:
                            if i > 0: continue
                        else:
                            if framelocalbest and rst[i] == 'N': continue
                            if framebest and rst[i][0] != 'T': continue
                    fsp, fss = stat.fisher_method([tps[i], rps[i]])  #
                    if fsp > fspth: continue
                    e = getResult(
                        t, tis, o.stop, cds1, cds2, tsq,
                        [ip, ttis.cnts[tis], tps[i], rps[i], rst[i], fsp],
                        o.has_stop_codon)
                    #tistype = tisType(tis, o.stop, cds1, cds2)
                    #orfstr = '{}\t{}\t{}'.format(tsq[tis:tis+3],tis,o.stop)
                    #tid = "%s\t%s\t%s\t%s\t%s:%d-%d:%s\t%s\t%s" % (t.gid, t.id, t.symbol, t.genetype, t.chr, t.genome_pos(tis), t.genome_pos(o.stop), t.strand, orfstr, tistype)
                    #values = [ip, ttis.cnts[tis], tps[i], rps[i], rst[i]] # , fishers[i]]
                    #e = exp.Exp(tid, values)
                    #e.length = (o.stop - tis) / 3 - 1
                    #e.sq = tsq[tis:o.stop]
                    #e.chr, e.strand, e.tistype = t.chr, t.strand, tistype
                    #if e.tistype == 'Extended' : e.cr = interval.cds_region_trans(t, tis, tis+3)
                    #else : e.cr = interval.cds_region_trans(t, tis, o.stop)
                    es.append(e)
                #if has_tis :
                j[1] += ol
                j[0] += 1

    return es, j, tpfs, g
コード例 #2
0
def run(args):
    '''Main function for differential TIS
  '''
    global ipth, iqth, tis1bampaths, tis2bampaths, tis1offdict, tis2offdict, compatible, compatiblemis, paired
    ipth, iqth = args.ipth, args.iqth
    tis1bampaths = args.tis1bampaths
    tis2bampaths = args.tis2bampaths
    ribo.maxNH, ribo.minMapQ, ribo.secondary = args.maxNH, args.minMapQ, args.secondary
    compatible = not args.nocompatible
    compatiblemis = args.compatiblemis
    paired = args.paired
    if len(tis1bampaths) < len(args.tis1paths) or len(tis2bampaths) < len(
            args.tis2paths):  # == 0 :
        print('Missing bam file input!')
        exit(1)
    if args.chrmap is not None:
        chrmap = {}
        for lst in io.splitIter(args.chrmap):
            chrmap[lst[0]] = lst[1]
            chrmap[lst[1]] = lst[0]
        bam.chrmap = chrmap
        fa.chrmap = chrmap

    global tis1bampathslist, tis2bampathslist, tis1offdictlist, tis2offdictlist
    tis1bampathslist = [s.split(';') for s in args.tis1bampaths]
    tis2bampathslist = [s.split(';') for s in args.tis2bampaths]
    if args.tis1para is None: tis1paralist = [None] * len(args.tis1paths)
    else: tis1paralist = [s.split(';') for s in args.tis1para]
    if args.tis2para is None: tis2paralist = [None] * len(args.tis2paths)
    else: tis2paralist = [s.split(';') for s in args.tis2para]

    tis1offdictlist = [
        find_offset(bampaths, para)
        for bampaths, para in zip(tis1bampathslist, tis1paralist)
    ]
    tis2offdictlist = [
        find_offset(bampaths, para)
        for bampaths, para in zip(tis2bampathslist, tis2paralist)
    ]

    if len(args.tis1labels) < len(args.tis1paths):
        for i in range(len(args.tis1labels), len(args.tis1paths)):
            args.tis1labels.append(args.tis1paths[i])
    if len(args.tis2labels) < len(args.tis2paths):
        for i in range(len(args.tis2labels), len(args.tis2paths)):
            args.tis2labels.append(args.tis2paths[i])
    title = args.tis1labels + args.tis2labels
    tis_title = ['TIS_' + lab for lab in title]
    rna_title = ['RNA_' + lab for lab in title]
    l = len(title)

    if args.rnaseq is not None:
        if args.verbose: print("Loading RNASeq data...")
        rna_profile = exp.Profile()
        for lst in io.splitIter(args.rnaseq):
            try:
                values = map(int, lst[1:])
            except:
                try:
                    values = map(float, lst[1:])
                    print('Error: RNASeq data should be integers {}.'.format(
                        lst))
                    sys.exit(1)
                except ValueError:
                    pass
                continue
            m = min(values)
            if m < 0:
                print('Error: RNASeq data should be non-negative integers {}.'.
                      format(lst))
                sys.exit(1)
            e = exp.Exp(lst[0], values)
            rna_profile.add_exp(e)
        if args.verbose: print("{} genes.".format(len(rna_profile)))

    if args.verbose: print("Loading {} TIS data...".format(l))
    gname, gpos, gsig = {}, {}, {}
    tall = []  # {}, {}
    gid = {}
    anno = {}  # annotated TIS
    for i, fname in enumerate(args.tis1paths + args.tis2paths):
        n = 0
        tdata = {}
        for lst in io.splitIter(fname):
            try:
                tis = (lst[1], int(lst[6]))
            except:
                continue
            gid[lst[1]] = lst[0]
            cnt, pval, qval = int(lst[10]), float(lst[11]), float(lst[args.qi])
            tdata[tis] = cnt, pval, qval
            lst[4] = get_tis(lst[4])
            if lst[8] == 'Annotated': anno[lst[4]] = 1  # genome position
            if sig(tdata[tis]):
                n += 1
                #lst[4] = get_tis(lst[4])
                gname[tis] = '\t'.join(lst[:9])  # information for the TIS
                gpos[tis] = lst[4]
                if tis not in gsig: gsig[tis] = [0] * len(tis_title)
                gsig[tis][i] = 1
        if args.verbose: print("{} TISs in {}.".format(n, fname))
        tall.append(tdata)

    profile = exp.Profile()
    profile2 = exp.Profile()  # uniq TISs for TMM
    trans_for_bam = {}  # TIS genes need to be analyzed
    es = {}
    uniq_gpos = {}
    for tis in gname:  # t1 :
        values = []
        for i, tdata in enumerate(tall):
            if tis not in tdata:
                if tis[0] not in trans_for_bam:
                    trans_for_bam[tis[0]] = [{} for j in range(l)]
                trans_for_bam[tis[0]][i][tis[1]] = None
                values.append(None)
            else:
                values.append(tdata[tis][0])
        if args.rnaseq is not None:
            if tis[0] in rna_profile.exps:
                values += rna_profile.exps[tis[0]].data  # trans level
            elif gid[tis[0]] in rna_profile.exps:
                values += rna_profile.exps[gid[tis[0]]].data  # gene level
            else:
                print('Warning: transcript {} {} is not found in RNA file!'.
                      format(gid[tis[0]], tis[0]))
                values += [0] * l  # len(title)

        e = exp.Exp(gname[tis], values)
        e.tis = tis
        es[tis] = e
        profile.add_exp(e)
        if gpos[tis] not in uniq_gpos:
            if args.normanno and gpos[tis] not in anno: continue
            if args.normcomm:
                for tdata in tall:
                    if tis not in tdata or not sig(tdata[tis]): break
                else: profile2.add_exp(e)
            #elif args.normanno :
            #if gpos[tis] in anno : profile2.add_exp(e)
            else:
                profile2.add_exp(e)
            uniq_gpos[gpos[tis]] = 1

    if args.verbose: print("{} TISs in total.".format(len(profile)))
    if not args.normcomm:
        uns = [0] * len(tis_title)
        for i in range(len(tis_title)):
            uns[i] = len([i for e in profile2 if gsig[e.tis][i] == 1])
        m = min(uns)
        elst = list(profile2.exps.values())
        profile3 = exp.Profile()
        for i in range(len(tis_title)):
            for e in elst:
                e.value[0:2] = [gsig[e.tis][i], e.data[i]]
            elst.sort(reverse=True)
            for j in range(m):
                if elst[j].id not in profile3.exps: profile3.add_exp(elst[j])
        profile2 = profile3

    if args.verbose: print("Reading bams...")
    trans_iter = io.transIter(args.genepath,
                              fileType=args.geneformat,
                              verbose=args.verbose,
                              filt=trans_for_bam)
    para_iter = transPara(trans_iter, trans_for_bam)
    if args.numProc <= 1: pred_iter = itertools.imap(_get_tis, para_iter)
    else:
        from multiprocessing import Pool
        pool = Pool(processes=args.numProc - 1)
        pred_iter = pool.imap_unordered(_get_tis, para_iter, chunksize=5)
    for result in pred_iter:
        tid, pos_cnt = result  # r1, r2 = result
        for i, pc in enumerate(pos_cnt):
            for pos in pc:
                tis = tid, pos
                es[tis].data[i] = pc[pos]

    if len(args.tis1paths) > 1 or len(
            args.tis2paths) > 1 or args.export is not None:
        if args.export is None: args.export = 'tisdiff_export.txt'
        if args.verbose:
            print('Export TIS counts table to {}.'.format(args.export))
        exfile = open(args.export, 'w')
        if args.rnaseq is None:
            exfile.write(io.tabjoin('TIS', tis_title) +
                         '\n')  # args.tis1labels, args.tis2labels)+'\n')
        else:
            exfile.write(io.tabjoin('TIS', tis_title, rna_title) + '\n')
        for tis in gname:
            s = '{}_{}_{}\t'.format(tis[0], tis[1], gpos[tis])
            s += io.tabjoin(es[tis].data)
            exfile.write(s + '\n')
        return
    if args.scalefactor is not None: scale = args.scalefactor
    else:
        if args.verbose: print('Estimate scale factor...')
        f = profile2.TMM(i1=0, i2=1)
        if args.verbose: print('TIS TMM log2 f = {}'.format(f))
        scale = 2**(-f)
    if args.rnaseq is not None:
        if args.rnascale is not None: scale_r = args.rnascale
        else:
            fr = rna_profile.TMM(i1=0, i2=1)  # for only one replicate
            if args.verbose: print('RNASeq TMM log2 f = {}'.format(fr))
            scale_r = 2**(-fr)

    if args.verbose: print('Diff test...')

    exps = profile.exps.values()
    for e in exps:
        if args.rnaseq is None:
            p = 1 / (scale + 1)
            x, y = e.data[0], e.data[1]  # [2]
            n = x + y
            if x == 0: fc, alt = 'INF', 'less'
            elif y == 0: fc, alt = 0, 'greater'
            else:
                fc = y / (1.0 * x * scale)  # / y
                if scale * x <= y: alt = 'less'
                else: alt = 'greater'
            pv = stat.binom_test(n, x, p=p, alt=alt)
        else:
            x, y, r1, r2 = e.data[0:4]
            if x == 0: fc, alt = 'INF', 'less'
            elif y == 0 or r1 == 0: fc, alt = 0, 'greater'
            elif r2 == 0: fc, alt = 'INF', 'less'
            else:
                fc = y / (1.0 * x * scale) / (r2 / (1.0 * r1 * scale_r))
                if fc >= 1: alt = 'less'  # test x
                else: alt = 'greater'
            if args.chi2:
                pv = ribo.TIStest_chi2(x, y, r1, r2, scale, scale_r, alt=alt)
            elif args.betabinom:
                pv = ribo.TIStest_betaBinom(x,
                                            y,
                                            r1,
                                            r2,
                                            scale,
                                            scale_r,
                                            alt=alt)
            else:
                pv = ribo.TIStest_FisherExact(x,
                                              y,
                                              r1,
                                              r2,
                                              scale,
                                              scale_r,
                                              alt=alt)
        pv *= 2  # two tailed
        if pv > 1: pv = 1
        e.data.append(fc)
        e.data.append(pv)

    result = profile.BHcorrection(-1, append=True)  # (5)

    if args.verbose: print('Output...')
    outfile = open(args.output, 'w')
    s = "Gid\tTid\tSymbol\tGeneType\tGenomePos\tStartCodon\tStart\tStop\tTisType\t"
    s += '\t'.join(tis_title)
    if args.rnaseq is not None: s += '\t' + '\t'.join(rna_title)
    s += '\tFoldChange\tDiffPvalue\tDiffQvalue\n'
    outfile.write(s)
    for e in profile:
        fc = e.data[-3]
        e.is_q = e.is_fc = True
        if fc != 'INF' and fc != 0 and max(fc, 1 / fc) < args.foldchange:
            e.is_fc = False
        if e.data[-2] > args.opth or e.data[-1] > args.oqth:
            e.is_q = False
        if e.is_q and e.is_fc:
            outfile.write(str(e) + '\n')

    # Plot
    if args.plotout is not None:
        if args.verbose: print("Ploting...")
        from zbio import plot
        plot.figure(figsize=args.figsize)
        if args.rnaseq is not None:
            qd1 = [
                math.log(e.data[0] + 1, 2) - math.log(e.data[2] + 1, 2)
                for e in exps if e.is_q and e.is_fc
            ]
            qd2 = [
                math.log(e.data[1] + 1, 2) - math.log(e.data[3] + 1, 2)
                for e in exps if e.is_q and e.is_fc
            ]
            pd1 = [
                math.log(e.data[0] + 1, 2) - math.log(e.data[2] + 1, 2)
                for e in exps if e.is_q and not e.is_fc
            ]
            pd2 = [
                math.log(e.data[1] + 1, 2) - math.log(e.data[3] + 1, 2)
                for e in exps if e.is_q and not e.is_fc
            ]
            nd1 = [
                math.log(e.data[0] + 1, 2) - math.log(e.data[2] + 1, 2)
                for e in exps if not e.is_q
            ]
            nd2 = [
                math.log(e.data[1] + 1, 2) - math.log(e.data[3] + 1, 2)
                for e in exps if not e.is_q
            ]
        else:
            qd1 = [
                math.log(e.data[0] + 1, 2) for e in exps if e.is_q and e.is_fc
            ]
            qd2 = [
                math.log(e.data[1] + 1, 2) for e in exps if e.is_q and e.is_fc
            ]
            pd1 = [
                math.log(e.data[0] + 1, 2) for e in exps
                if e.is_q and not e.is_fc
            ]
            pd2 = [
                math.log(e.data[1] + 1, 2) for e in exps
                if e.is_q and not e.is_fc
            ]
            nd1 = [math.log(e.data[0] + 1, 2) for e in exps if not e.is_q]
            nd2 = [math.log(e.data[1] + 1, 2) for e in exps if not e.is_q]
        plot.scatter(qd1,
                     qd2,
                     alpha=0.1,
                     edgecolors='none',
                     color='r',
                     label='q < {} & FC > {}'.format(args.oqth,
                                                     args.foldchange))
        plot.scatter(pd1,
                     pd2,
                     alpha=0.1,
                     edgecolors='none',
                     color='y',
                     label='q < {} & FC <= {}'.format(args.oqth,
                                                      args.foldchange))
        plot.scatter(nd1,
                     nd2,
                     alpha=0.1,
                     edgecolors='none',
                     color='g',
                     label='q >= {}'.format(args.oqth))
        plot.legend(loc='upper left', frameon=False)
        plot.xlabel(title[0])
        plot.ylabel(title[1])
        if args.rnaseq is not None: d = (fr - f) / 2
        else: d = -f / 2
        m1 = max(min(qd1 + pd1 + nd1), min(qd2 + pd2 + nd2))
        m2 = min(max(qd1 + pd1 + nd1), max(qd2 + pd2 + nd2))
        plot.plot([m1 - d, m2 - d], [m1 + d, m2 + d], color='k', linestyle=':')
        d2 = d - math.log(args.foldchange, 2) / 2
        plot.plot([m1 - d2, m2 - d2], [m1 + d2, m2 + d2],
                  color='r',
                  linestyle=':')
        d2 = d + math.log(args.foldchange, 2) / 2
        plot.plot([m1 - d2, m2 - d2], [m1 + d2, m2 + d2],
                  color='r',
                  linestyle=':')
        plot.save(args.plotout)

        if args.plotma is not None:
            exps = profile2.exps.values()
            plot.figure(figsize=args.figsize)
            plot.axhline(f)
            ms = [e.M for e in exps if hasattr(e, 'select') and e.select]
            aa = [e.A for e in exps if hasattr(e, 'select') and e.select]
            plot.scatter(aa, ms, alpha=0.1, edgecolors='none', color='r')
            ms = [e.M for e in exps if hasattr(e, 'select') and not e.select]
            aa = [e.A for e in exps if hasattr(e, 'select') and not e.select]
            plot.scatter(aa, ms, alpha=0.1, edgecolors='none', color='b')
            plot.save(args.plotma)
コード例 #3
0
ファイル: predict.py プロジェクト: marcasriv/ribotish
def run(args):
    '''Main function for ORF finding
  '''
    # prepare
    global tisbampaths, tisoffdict, ribobampaths, riboffdict, genomefapath, compatible, compatiblemis
    global minaalen, enrichtest, slp, paras, verbose, alt, title, tis2ribo, gfilter
    global tpth, fpth, minpth, fspth, framebest, framelocalbest, longest, transprofile, TIS_types  #fspth
    global paired, seq, aaseq, blocks  # showtime
    paired, seq, aaseq, blocks = args.paired, args.seq, args.aaseq, args.blocks
    ribo.maxNH, ribo.minMapQ, ribo.secondary = args.maxNH, args.minMapQ, args.secondary
    tisbampaths = args.tisbampaths
    ribobampaths = args.ribobampaths
    if len(tisbampaths) == 0 and len(ribobampaths) == 0:
        print('No bam file input!')
        exit(1)
    genomefapath = args.genomefapath
    compatible = not args.nocompatible
    compatiblemis = args.compatiblemis
    minaalen = args.minaalen
    enrichtest = args.enrichtest
    transprofile = args.transprofile
    harrwidth = None
    TIS_types = [
        'Annotated', 'Truncated', 'Extended', "5'UTR", "3'UTR", 'Internal',
        'Novel'
    ]
    if args.chrmap is not None:
        chrmap = {}
        for lst in io.splitIter(args.chrmap):
            chrmap[lst[0]] = lst[1]
            chrmap[lst[1]] = lst[0]
        bam.chrmap = chrmap
        fa.chrmap = chrmap
    if args.harrwidth is not None: harrwidth = args.harrwidth
    elif args.harr: harrwidth = 15
    verbose = args.verbose
    alt = args.alt
    if args.altcodons is not None:
        alt = True
        if args.altcodons[0].upper() == 'ALL': orf.cstartlike = orf.allcodons
        else: orf.cstartlike = [c.upper() for c in args.altcodons]
    tpth, fpth, minpth, framebest, framelocalbest = args.tpth, args.fpth, args.minpth, args.framebest, args.framelocalbest  # fspth
    fspth = args.fspth
    longest = args.longest
    tis2ribo = args.tis2ribo
    parts = [0.1 * (i + 1) for i in range(args.nparts)]
    gfilter = None
    if args.genefilter is not None:
        gfilter = {}
        for gid in args.genefilter:
            gfilter[gid] = 1
    flank = 3  ##
    tisoffdict = find_offset(args.tisbampaths, args.tispara)
    riboffdict = find_offset(args.ribobampaths, args.ribopara)
    if len(args.ribobampaths) == 0:
        print(
            'No regular RiboSeq data input. TIS data will also be used as regular RiboSeq data.'
        )
        tis2ribo = True
    if len(args.tisbampaths) == 1:
        if args.inestpath is None:
            path = args.tisbampaths[0] + '.bgest.txt'
            if isfile(path): args.inestpath = path
            else: args.estpath = path
    if args.agenepath is None: args.agenepath = args.genepath

    # load genome, fasta file indexing
    if args.verbose: print("{} Loading genome...".format(time.ctime()))
    genome = fa.Fa(args.genomefapath, verbose=args.verbose)

    # TIS background estimation
    if len(args.tisbampaths) == 0:
        print('No input TIS data!')
        paras, slp = [(1, 0.5)], [1]  # No TIS input
    elif args.inestpath is None:  #== '' :
        print("{} Estimating TIS background parameters...".format(
            time.ctime()))
        if args.verbose:
            print(
                "TIS background estimation result will be saved to {}".format(
                    args.estpath))
        if args.numProc > 1:
            from multiprocessing import Process
            import multiprocessing.pool

            class NoDaemonProcess(Process):
                # make 'daemon' attribute always return False
                def _get_daemon(self):
                    return False

                def _set_daemon(self, value):
                    pass

                daemon = property(_get_daemon, _set_daemon)

            class MyPool(multiprocessing.pool.Pool):
                Process = NoDaemonProcess

            pool = MyPool(1)  # This is for memory efficiency
            paras, slp, data = pool.apply(ribo.estimateTISbg,
                                          args=(args.agenepath,
                                                args.tisbampaths,
                                                args.genomefapath),
                                          kwds={
                                              'parts': parts,
                                              'offdict': tisoffdict,
                                              'numProc': args.numProc,
                                              'verbose': args.verbose,
                                              'geneformat': args.geneformat,
                                              'harrwidth': harrwidth,
                                              'paired': paired
                                          })
            pool.close()
        else:
            paras, slp, data = ribo.estimateTISbg(args.genepath,
                                                  args.tisbampaths,
                                                  args.genomefapath,
                                                  parts=parts,
                                                  offdict=tisoffdict,
                                                  numProc=1,
                                                  verbose=verbose,
                                                  geneformat=args.geneformat,
                                                  harrwidth=harrwidth,
                                                  paired=paired)
        estfile = open(args.estpath, 'w')
        for i in range(len(parts)):
            estfile.write("{}\t{}\t{}\t{}\t{}\n".format(
                paras[i][0], paras[i][1], parts[i], slp[i], data[i]))
        estfile.close()

    else:
        inestfile = open(args.inestpath, 'r')
        paras, slp = [], []
        for l in inestfile:
            lst = l.strip().split('\t')
            paras.append((float(lst[0]), float(lst[1])))
            slp.append(eval(lst[3]))

    if args.inprofile is not None and not isfile(args.inprofile):
        print('inprofile {} not found!'.format(args.inprofile))
        if args.transprofile is None:
            transprofile = args.inprofile

    if args.numProc > 1:
        from multiprocessing import Pool
        pool = Pool(processes=args.numProc - 1)

    cds_regions = {}
    known_tis = {}
    if args.agenepath != args.genepath:
        if verbose: print('Loading CDS annotation...')
        for g in io.geneIter(args.agenepath,
                             fileType=args.geneformat,
                             chrs=genome.idx,
                             verbose=args.verbose):
            if g.chr not in cds_regions:
                cds_regions[g.chr] = {
                    '+': [interval.Interval() for i in range(3)],
                    '-': [interval.Interval() for i in range(3)]
                }
                known_tis[g.chr] = {'+': {}, '-': {}}
            for t in g.trans:
                cr = interval.cds_region_trans(t)
                for i in range(3):
                    cds_regions[t.chr][t.strand][i].lst += cr[i].lst
            #for t in g.trans :
                tis = t.cds_start(cdna=False)
                if tis is not None: known_tis[t.chr][t.strand][tis] = 1

    inorf = None
    if args.input is not None:
        if verbose: print('Loading candidates...')
        inorf = {}
        infile = open(args.input, 'r')
        for l in infile:
            lst = l.strip().split()
            tid, tis, stop = lst[0], int(lst[1]), int(lst[2])
            #if gfilter is not None and tid not in gfilter : continue
            if tid not in inorf: inorf[tid] = []
            inorf[tid].append([tis, stop])
    inprofile = None
    if args.inprofile is not None:
        if isfile(args.inprofile):
            if verbose: print('Loading transcript profile...')
            inprofile = {}
            for lst in io.splitIter(args.inprofile):
                try:
                    gid, tid, tispf, ribopf = lst[0], lst[1], eval(
                        lst[3]), eval(lst[4])
                except:
                    continue
                if gid not in inprofile: inprofile[gid] = {}
                inprofile[gid][tid] = tispf, ribopf

    print("{} Predicting...".format(time.ctime()))
    profile = exp.Profile()
    title = ['TISGroup', 'TISCounts', 'TISPvalue', 'RiboPvalue', 'RiboPStatus']
    j = [0, 0]  # total number of ORF/TIS for BH correction
    gene_iter = io.geneIter(args.genepath,
                            fileType=args.geneformat,
                            chrs=genome.idx,
                            verbose=args.verbose)
    para_iter = genePara(gene_iter, inorf, inprofile)
    if args.numProc <= 1: pred_iter = itertools.imap(_pred_gene, para_iter)
    else:
        #from multiprocessing import Pool
        #pool = Pool(processes = args.numProc - 1)
        pred_iter = pool.imap_unordered(_pred_gene, para_iter, chunksize=5)
    if transprofile is not None:
        tpfile = open(transprofile, 'w')
        tpfile.write('Gid\tTid\tSymbol\tTISProf\tRiboProf\n')

    for result in pred_iter:
        es, ji, tpfs, g = result
        j[0] += ji[0]
        j[1] += ji[1]
        for e in es:
            profile.add_exp(e)
            if verbose >= 2: print('{} {}'.format(time.ctime(), str(e)))
        if transprofile is not None:
            for tid in tpfs:
                tpfile.write(io.tabjoin(tid, tpfs[tid]) + '\n')
        if g.chr not in cds_regions:
            cds_regions[g.chr] = {
                '+': [interval.Interval() for i in range(3)],
                '-': [interval.Interval() for i in range(3)]
            }
            known_tis[g.chr] = {'+': {}, '-': {}}
        for t in g.trans:
            cr = interval.cds_region_trans(t)
            for i in range(3):
                cds_regions[t.chr][t.strand][i].lst += cr[i].lst
        #for t in g.trans :
            tis = t.cds_start(cdna=False)
            if tis is not None: known_tis[t.chr][t.strand][tis] = 1

    for chr in cds_regions:
        for strand in cds_regions[chr]:
            for i in range(3):
                cds_regions[chr][strand][i].check()
    print("{} Checking overlap with known CDS..".format(time.ctime()))
    for e in profile:
        if e.tistype == 0: continue
        elif e.gtis in known_tis[e.chr][e.strand]: e.id += ':Known'
        elif e.tistype > 1:  # ["5'UTR", "3'UTR", "Inside", "Novel", 'Extended'] :
            #coding_overlap = False
            for i in range(3):
                its = cds_regions[e.chr][e.strand][i].intersect(
                    e.cr[i]
                )  # e.cr[i].intersect(cds_regions[e.chr][e.strand][i])
                if its.rlen() > 0:
                    #coding_overlap = True
                    e.id += ':CDSFrameOverlap'
                    break

    print("{} BH correcting...".format(time.ctime()))
    profile.BHcorrection(2, total=j[1],
                         append=True)  # Calculate BH FDR of TIS p value
    profile.BHcorrection(3, total=j[0], append=True)  # Frame p value
    i = 1
    if len(tisbampaths) == 0: i = 0
    profile.BHcorrection(5, total=j[i],
                         append=True)  # Calculate BH FDR for Fisher's p value

    outfile = open(args.output, 'w')
    s = "Gid\tTid\tSymbol\tGeneType\tGenomePos\tStartCodon\tStart\tStop\tTisType\t"
    s += '\t'.join(title)
    s += '\tFisherPvalue\tTISQvalue\tFrameQvalue\tFisherQvalue\tAALen'
    if seq: s += '\tSeq'
    if aaseq: s += '\tAASeq'
    if blocks: s += '\tBlocks'
    s += '\n'
    outfile.write(s)

    if args.allresult is not None and args.allresult.upper() == 'OFF':
        allout = None
    elif args.fsqth == 1:
        allout = None
    else:
        if args.allresult is None:
            lst = args.output.split('.')
            if lst[-1] == 'txt': args.allresult = args.output[:-4] + '_all.txt'
            else: args.allresult = args.output + '_all.txt'
        allout = open(args.allresult, 'w')
        allout.write(s)

    for e in profile:
        #if e.q > args.fsqth : continue
        if len(tisbampaths) == 0:
            e.data[5], e.data[8] = None, None  # No Fisher's
        s = "%s\t%d" % (e, e.length)
        if seq: s += '\t' + e.sq
        if aaseq: s += '\t' + e.aa
        if blocks: s += '\t' + e.blocks
        s += '\n'
        if allout is not None: allout.write(s)
        if e.q <= args.fsqth:
            outfile.write(s)  # "%s\t%d\n" % (e, e.length)) #, e.sq))