Esempio n. 1
0
File: roc.py Progetto: pwwang/ceQTL
def colselect(infile, col, cut, rev, tag):
    """Select the columns from input file"""
    pTsvColSelect1 = pTsvColSelect.copy(tag=tag)
    if str(col).isdigit():
        pTsvColSelect1.input = [infile]
        pTsvColSelect1.args.cols = [0, int(col)]
        start = end = pTsvColSelect1
    else:
        pTsvHeader1 = pTsvHeader.copy(tag=tag)
        pTsvHeader1.input = [infile]
        start = pTsvHeader1

        pShell1 = pShell.copy(tag=tag)
        pShell1.depends = pTsvHeader1
        pShell1.args.cmd = """
        head -1 {{i.infile | quote}} > {{o.outfile | quote}}
        echo "%s" >> {{o.outfile | quote}}
        """ % col

        pTsvColSelect1.depends = pShell1
        pTsvColSelect1.input = 'infile:file, colfile:file'
        pTsvColSelect1.input = lambda ch: ch.insert(0, infile)
        start = pTsvHeader1
        end = pTsvColSelect1

    if cut and cut < 1:
        pTsv1 = pTsv.copy(tag=tag)
        pTsv1.depends = pTsvColSelect1
        pTsv1.args.row = 'lambda row: float(row[%r]) < %f' % (col, cut)
        end = pTsv1
    elif cut and cut > 1:
        pSort1 = pSort.copy(tag=tag)
        pSort1.depends = pTsvColSelect1
        pSort1.args.inopts.skip = 1
        pSort1.args.params.k = '2g%s' % ('' if rev else 'r')

        pShell2 = pShell.copy(tag=tag)
        pShell2.depends = pSort1
        pShell2.args.cmd = "head -n %d {{i.infile | quote}} > {{o.outfile | quote}}" % (
            int(cut) + 1)
        end = pShell2

    if rev:
        pTsv2 = pTsv.copy(tag=tag)
        pTsv2.depends = end
        pTsv2.args.row = 'lambda row: row.__setitem__({0!r}, "-" + row[{0!r}])'.format(
            col)
        end = pTsv2
    return start, end
Esempio n. 2
0
def colselect(infile, col, cut):
    """Select the columns from input file"""
    tag = Path(infile).stem.split(".")[0]
    pTsvColSelect1 = pTsvColSelect.copy(tag=tag)
    if str(col).isdigit():
        pTsvColSelect1.input = [infile]
        pTsvColSelect1.args.cols = [0, int(col)]
        start = pTsvColSelect1
    else:
        pTsvHeader1 = pTsvHeader.copy(tag=tag)
        pTsvHeader1.input = [infile]
        start = pTsvHeader1

        pShell1 = pShell.copy(tag=tag)
        pShell1.depends = pTsvHeader1
        pShell1.args.cmd = """
        head -1 {{i.infile | quote}} > {{o.outfile | quote}}
        echo "%s" >> {{o.outfile | quote}}
        """ % col

        pTsvColSelect1.depends = pShell1
        pTsvColSelect1.input = lambda ch: ch.insert(0, infile)

    pTsv1 = pTsv.copy(tag=tag)
    pTsv1.depends = pTsvColSelect1
    pTsv1.args.row = "lambda row: float(row[1]) < %f" % cut
    return [start], [pTsv1]
Esempio n. 3
0
def ceQTL_filter_row(args, pCefile, hasAdjPval, starts):
    if not args.row:
        return pCefile

    pFilterRow = pTsv.copy()
    pFilterRow.depends = pCefile
    pFilterRow.args.helper = args.helper
    pFilterRow.args.row = args.row
    if hasAdjPval:
        pFilterRow.args.inopts.row = 'lambda r: setattr(r, "Pval", float(r.Pval)) or setattr(r, "AdjPval", float(r.AdjPval)) or r'
    else:
        pFilterRow.args.inopts.row = 'lambda r: setattr(r, "Pval", float(r.Pval)) or r'
    return pFilterRow
Esempio n. 4
0
def ceQTL_atsnp(args):

    pGTMat2Bed.input = [args.cefile]
    pGTMat2Bed.args.inopts.cnames = False
    pGTMat2Bed.args.inopts.skip = 1
    pGTMat2Bed.args.inopts.delimit = '.'
    pGTMat2Bed.args.name = 'full'
    pGTMat2Bed.args.ncol = 8

    pSortSnp = pSort.copy()
    pSortSnp.depends = pGTMat2Bed
    pSortSnp.args.unique = True

    pSortByTF.input = [args.cefile]

    pFilterTFs = pTsvJoin.copy()
    pFilterTFs.depends = pSortByTF
    pFilterTFs.input = lambda ch: [ch.insert(0, args.tflist).flatten()]
    pFilterTFs.args.inopts.cnames = False
    pFilterTFs.args.inopts.skip = [0, 1]
    pFilterTFs.args.inopts.delimit = ['\t', '.']
    pFilterTFs.args.outopts.cnames = False
    pFilterTFs.args.match = 'lambda r1, r2: TsvJoin.compare(r1[1], r2[1])'
    pFilterTFs.args.do = 'lambda out, r1, r2: out.write(r1)'

    pAtSnp.depends = pFilterTFs, pSortSnp
    pAtSnp.args.tfmotifs = args.motifdb
    pAtSnp.args.fdr = False
    pAtSnp.args.plot = False
    pAtSnp.args.nthread = args.nthread
    setOutfile(pAtSnp, args.outfile)

    if args.man:
        pToMan = pTsv.copy()
        pToMan.depends = pAtSnp
        pToMan.args.outopts.cnames = False
        # [chr1, 12496021, rs6541023, 0.04]
        pToMan.args.helper = 'snprec = lambda x: [x[0], int(x[1]) - 1, x[1], x[2], 0, "+"]'
        pToMan.args.row = 'lambda r: snprec(r.Snp.split("_")[:3]) + [r.Pval_Diff]'

        pBedSort.depends = pToMan
        pBedSort.args.chrorder = params.chrorder.value

        pManhattan.depends = pBedSort
        if args.hifile:
            pManhattan.input = lambda ch: ch.cbind(args.hifile)
        pManhattan.args.gsize = params.gsize.value
        setOutfile(pManhattan, args.man)

    PyPPL().start(pGTMat2Bed, pSortByTF).run()
Esempio n. 5
0
def pipeline(opts):
    """Construct the pipeline"""

    start, end = colselect(opts.infile, opts.col, opts.cut)

    # duplicate gold standard columns
    pTsvGold = pTsv.copy()
    pTsvGold.input = [opts.gold]
    pTsvGold.args.inopts.cnames = False
    pTsvGold.args.row = 'lambda row: [row[0], 1]'
    start = [pTsvGold, start]

    # add header
    pTsvReplaceHeader.depends = pTsvGold
    pTsvReplaceHeader.args.inopts.cnames = False
    pTsvReplaceHeader.args.cnames = ['ROWNAME', 'GOLD']

    pTsvCbind.depends = pTsvReplaceHeader, end
    pTsvCbind.input = (lambda *chs: [[ch.get() for ch in chs]])

    # prepare file for hypergeometric test
    # replace NA with 0
    # replace pvalues with 1 (presence)
    pPrepHG = pTsv.copy()
    pPrepHG.depends = pTsvCbind
    pPrepHG.args.row = ('lambda row: row.__setitem__(1, int(row[1] == "1")) '
                        'or row.__setitem__(2, int(row[2] != "NA"))')

    pHypergeom.depends = pPrepHG
    pHypergeom.args.intype = 'raw'
    if str(opts.bign).isdigit():
        pHypergeom.args.N = int(opts.bign)
    else:
        bign = cmdy.wc(l = opts.bign).strip().split()[0]
        pHypergeom.args.N = int(bign)

    return start
Esempio n. 6
0
File: roc.py Progetto: pwwang/ceQTL
def pipeline(opts):
    """Construct the pipeline"""

    starts = []
    ends = []
    for i, infile in enumerate(opts.infiles):
        start, end = colselect(infile, opts.cols[i],
                               opts.cuts[i] if opts.cuts else None, opts.rev,
                               "in%s" % (i + 1))
        starts.append(start)
        ends.append(end)

    # duplicate gold standard columns
    pTsvGold = pTsv.copy()
    pTsvGold.input = [opts.gold]
    pTsvGold.args.inopts.cnames = False
    pTsvGold.args.row = 'lambda row: [row[0], 1]'
    starts.append(pTsvGold)

    pROC.args.params.bestCut = False
    pROC.args.ggs.theme_bw = {}

    pTsvCbind.args.inopts.dup = 'ignore'

    if not opts.sep:
        pSort.depends = ends
        pSort.input = lambda *chs: [ch.get() for ch in chs]
        pSort.args.inopts.skip = 1

        pTsvJoin.depends = pSort
        pTsvJoin.input = lambda ch: [ch.flatten()]
        #pTsvJoin.args.outopts.cnames = ["ROWNAME"] + [Path(infile).stem for infile in opts.infiles]
        pTsvJoin.args.inopts.cnames = True
        pTsvJoin.args.do = "lambda writer, *rs: writer.write([rs[0][0]] + [r[1] for r in rs])"

        pTsvCbind.depends = pTsvGold, pTsvJoin
        pTsvCbind.input = lambda ch1, ch2: [ch1.cbind(ch2).flatten()]
        pTsvCbind.args.inopts.cnames = False
        pTsvCbind.args.inopts.rnames = True
        pTsvCbind.args.fill = True
        pTsvCbind.args.fn2cname = "NULL"

        # set the non-hit record to 0
        # and remove NAs from predictions
        pTsvGoldFalse = pTsv.copy()
        pTsvGoldFalse.depends = pTsvCbind
        pTsvGoldFalse.args.row = ('lambda row: False '
                                  'if any(r == "NA" for r in row[2:]) '
                                  'else row.__setitem__(1, 0) '
                                  'if row[1] == "NA" '
                                  'else None')

        pTsvReplaceHeader.depends = pTsvGoldFalse
        pTsvReplaceHeader.args.cnames = ["ROWNAME", "GOLD"] + opts.names

        pROC.depends = pTsvReplaceHeader
    else:
        # add header
        pTsvReplaceHeader.depends = pTsvGold
        pTsvReplaceHeader.args.inopts.cnames = False
        pTsvReplaceHeader.args.cnames = ['ROWNAME', 'GOLD']

        ends.insert(0, pTsvReplaceHeader)
        pTsvCbind.depends = ends
        pTsvCbind.output = 'outfile:file:{{i.infiles | [-1] | stem2 }}.cbound.txt'
        pTsvCbind.input = (
            lambda ch_gold, *chs: [ch_gold.cbind(ch).flatten() for ch in chs])

        pTsvGoldFalse = pTsv.copy()
        pTsvGoldFalse.depends = pTsvCbind
        pTsvGoldFalse.config.export_dir = opts.outdir
        pTsvGoldFalse.args.row = ('lambda row: False '
                                  'if any(r == "NA" for r in row[2:]) '
                                  'else row.__setitem__(1, 0) '
                                  'if row[1] == "NA" '
                                  'else None')

        pROC.depends = pTsvGoldFalse
        pROC.config.export_dir = opts.outdir

    return starts