Python IOTools.iterate Examples, CGAT.IOTools.iterate Python Examples

Example #1

0

Show file

    def read():

        rx_window = re.compile(options.pattern_window)
        # filter any of the DESeq/EdgeR message that end up at the top of the
        # output file

        for data in IOTools.iterate(options.stdin):

            contig, start, end = rx_window.match(data.test_id).groups()
            start, end = list(map(int, (start, end)))

            yield DATA._make((data.test_id,
                              contig, start, end,
                              data.treatment_name,
                              float(data.treatment_mean),
                              float(data.treatment_std),
                              data.control_name,
                              float(data.control_mean),
                              float(data.control_std),
                              float(data.pvalue),
                              float(data.qvalue),
                              float(data.l2fold),
                              float(data.fold),
                              int(data.significant),
                              data.status,
                              0))

Example #2

0

Show file

File: PipelineWindows.py Project: BioinformaticsArchive/cgat

def outputAllWindows( infile, outfile ):
    '''output all Windows as a bed file with the l2fold change
    as a score.
    '''
    outf = IOTools.openFile( outfile, "w" )
    for line in IOTools.iterate( IOTools.openFile( infile ) ):
        outf.write( "\t".join( (line.contig, line.start, line.end, "%6.4f" % float(line.l2fold ))) + "\n" ) 

    outf.close()

Example #3

0

Show file

File: PipelineWindows.py Project: yangjl/cgat

def outputAllWindows(infile, outfile):
    '''output all Windows as a bed file with the l2fold change
    as a score.
    '''
    outf = IOTools.openFile(outfile, "w")
    for line in IOTools.iterate(IOTools.openFile(infile)):
        outf.write("\t".join((line.contig, line.start, line.end,
                              "%6.4f" % float(line.l2fold))) + "\n")

    outf.close()

Example #4

0

Show file

File: PipelineWindows.py Project: yangjl/cgat

def buildDMRStats(infile, outfile, method):
    '''build dmr summary statistics.
    '''
    results = collections.defaultdict(lambda: collections.defaultdict(int))

    status = collections.defaultdict(lambda: collections.defaultdict(int))
    x = 0
    for line in IOTools.iterate(IOTools.openFile(infile)):
        key = (line.treatment_name, line.control_name)
        r, s = results[key], status[key]
        r["tested"] += 1
        s[line.status] += 1

        is_significant = line.significant == "1"
        up = float(line.l2fold) > 0
        down = float(line.l2fold) < 0
        fold2up = float(line.l2fold) > 1
        fold2down = float(line.l2fold) < -1
        fold2 = fold2up or fold2down

        if up: r["up"] += 1
        if down: r["down"] += 1
        if fold2up: r["l2fold_up"] += 1
        if fold2down: r["l2fold_down"] += 1

        if is_significant:
            r["significant"] += 1
            if up: r["significant_up"] += 1
            if down: r["significant_down"] += 1
            if fold2: r["fold2"] += 1
            if fold2up: r["significant_l2fold_up"] += 1
            if fold2down: r["significant_l2fold_down"] += 1

    header1, header2 = set(), set()
    for r in results.values():
        header1.update(r.keys())
    for s in status.values():
        header2.update(s.keys())

    header = ["method", "treatment", "control"]
    header1 = list(sorted(header1))
    header2 = list(sorted(header2))

    outf = IOTools.openFile(outfile, "w")
    outf.write("\t".join(header + header1 + header2) + "\n")

    for treatment, control in results.keys():
        key = (treatment, control)
        r = results[key]
        s = status[key]
        outf.write("%s\t%s\t%s\t" % (method, treatment, control))
        outf.write("\t".join([str(r[x]) for x in header1]) + "\t")
        outf.write("\t".join([str(s[x]) for x in header2]) + "\n")

Example #5

0

Show file

File: PipelineWindows.py Project: BioinformaticsArchive/cgat

def buildDMRStats( infile, outfile, method ):
    '''build dmr summary statistics.
    '''
    results = collections.defaultdict( lambda : collections.defaultdict(int) )

    status =  collections.defaultdict( lambda : collections.defaultdict(int) )
    x = 0
    for line in IOTools.iterate( IOTools.openFile( infile ) ):
        key = (line.treatment_name, line.control_name )
        r,s = results[key], status[key]
        r["tested"] += 1
        s[line.status] += 1

        is_significant = line.significant == "1"
        up = float(line.l2fold) > 0
        down = float(line.l2fold) < 0
        fold2up = float(line.l2fold) > 1
        fold2down = float(line.l2fold) < -1
        fold2 = fold2up or fold2down

        if up: r["up"] += 1
        if down: r["down"] += 1
        if fold2up: r["l2fold_up"] += 1
        if fold2down: r["l2fold_down"] += 1

        if is_significant:
            r["significant"] += 1
            if up: r["significant_up"] += 1
            if down: r["significant_down"] += 1
            if fold2: r["fold2"] += 1
            if fold2up: r["significant_l2fold_up"] += 1
            if fold2down: r["significant_l2fold_down"] += 1
            
    header1, header2 = set(), set()
    for r in results.values(): header1.update( r.keys() )
    for s in status.values(): header2.update( s.keys() )
    
    header = ["method", "treatment", "control" ]
    header1 = list(sorted(header1))
    header2 = list(sorted(header2))

    outf = IOTools.openFile( outfile, "w" )
    outf.write( "\t".join(header + header1 + header2) + "\n" )

    for treatment,control in results.keys():
        key = (treatment,control)
        r = results[key]
        s = status[key]
        outf.write( "%s\t%s\t%s\t" % (method,treatment, control))
        outf.write( "\t".join( [str(r[x]) for x in header1 ] ) + "\t" )
        outf.write( "\t".join( [str(s[x]) for x in header2 ] ) + "\n" )

Example #6

0

Show file

File: PipelineWindows.py Project: Acribbs/CGATPipelines

def outputAllWindows(infile, outfile):
    '''output all windows as a bed file with the l2fold change
    as a score.

    Arguments
    ---------
    infile : string
        Input filename in :term:`tsv` format. Typically the output
        from :mod:`scripts/runExpression`.
    outfile : string
        Output filename in :term:`bed` format.
    '''
    outf = IOTools.openFile(outfile, "w")
    for line in IOTools.iterate(IOTools.openFile(infile)):
        outf.write("\t".join((line.contig, line.start, line.end,
                              "%6.4f" % float(line.l2fold))) + "\n")

    outf.close()

Example #7

0

Show file

File: PipelineWindows.py Project: gjaime/CGATPipelines

def outputAllWindows(infile, outfile):
    '''output all windows as a bed file with the l2fold change
    as a score.

    Arguments
    ---------
    infile : string
        Input filename in :term:`tsv` format. Typically the output
        from :mod:`scripts/runExpression`.
    outfile : string
        Output filename in :term:`bed` format.
    '''
    outf = IOTools.openFile(outfile, "w")
    for line in IOTools.iterate(IOTools.openFile(infile)):
        outf.write("\t".join(
            (line.contig, line.start, line.end,
             "%6.4f" % float(line.l2fold))) + "\n")

    outf.close()

Example #8

0

Show file

File: PipelineWindows.py Project: gjaime/CGATPipelines

def buildDMRStats(infiles, outfile, method, fdr_threshold=None):
    '''build dmr summary statistics.

    This method works from output files created by Expression.py
    (method="deseq" or method="edger") or runMEDIPS (method="medips")

    This method counts the number of up/down, 2fold up/down, etc.
    genes in output from (:mod:`scripts/runExpression`).

    This method also creates diagnostic plots in the
    <exportdir>/<method> directory.

    Arguments
    ---------
    infiles ; list
        List of tabs with DMR output
    outfile : string
        Output filename. Tab separated file summarizing
    method : string
        Method name
    fdr_threshold : float
        FDR threshold to apply. Currently unused.
    '''
    results = collections.defaultdict(lambda: collections.defaultdict(int))
    status = collections.defaultdict(lambda: collections.defaultdict(int))

    # deseq/edger
    def f_significant(x):
        return x.significant == "1"

    def f_up(x):
        return float(x.l2fold) > 0

    def f_down(x):
        return float(x.l2fold) < 0

    def f_fold2up(x):
        return float(x.l2fold) > 1

    def f_fold2down(x):
        return float(x.l2fold) < -1

    def f_key(x):
        return (x.treatment_name, x.control_name)

    def f_status(x):
        return x.status

    outf = IOTools.openFile(outfile, "w")

    is_first = True
    for infile in infiles:

        xx = 0
        for line in IOTools.iterate(IOTools.openFile(infile)):
            key = f_key(line)

            r, s = results[key], status[key]
            r["tested"] += 1
            ss = f_status(line)
            s[ss] += 1

            if ss != "OK":
                continue

            is_significant = f_significant(line)
            up = f_up(line)
            down = f_down(line)
            fold2up = f_fold2up(line)
            fold2down = f_fold2down(line)
            fold2 = fold2up or fold2down

            if up:
                r["up"] += 1
            if down:
                r["down"] += 1
            if fold2up:
                r["l2fold_up"] += 1
            if fold2down:
                r["l2fold_down"] += 1

            if is_significant:
                r["significant"] += 1
                if up:
                    r["significant_up"] += 1
                if down:
                    r["significant_down"] += 1
                if fold2:
                    r["fold2"] += 1
                if fold2up:
                    r["significant_l2fold_up"] += 1
                if fold2down:
                    r["significant_l2fold_down"] += 1

            if xx > 10000:
                break

        if is_first:
            is_first = False
            header1, header2 = set(), set()
            for r in results.values():
                header1.update(r.keys())
            for s in status.values():
                header2.update(s.keys())

            header = ["method", "treatment", "control"]
            header1 = list(sorted(header1))
            header2 = list(sorted(header2))

            outf.write("\t".join(header + header1 + header2) + "\n")

        for treatment, control in results.keys():
            key = (treatment, control)
            r = results[key]
            s = status[key]
            outf.write("%s\t%s\t%s\t" % (method, treatment, control))
            outf.write("\t".join([str(r[x]) for x in header1]) + "\t")
            outf.write("\t".join([str(s[x]) for x in header2]) + "\n")

Example #9

0

Show file

File: PipelineWindows.py Project: dormeight/CGATPipelines

def buildDMRStats(infiles, outfile, method, fdr_threshold=None):
    '''build dmr summary statistics.

    This method works from output files created by Expression.py
    (method="deseq" or method="edger") or runMEDIPS (method="medips")

    This method counts the number of up/down, 2fold up/down, etc.
    genes in output from (:mod:`scripts/runExpression`).

    This method also creates diagnostic plots in the
    <exportdir>/<method> directory.

    Arguments
    ---------
    infiles ; list
        List of tabs with DMR output
    outfile : string
        Output filename. Tab separated file summarizing
    method : string
        Method name
    fdr_threshold : float
        FDR threshold to apply. Currently unused.
    '''
    results = collections.defaultdict(lambda: collections.defaultdict(int))
    status = collections.defaultdict(lambda: collections.defaultdict(int))

    # deseq/edger
    def f_significant(x):
        return x.significant == "1"

    def f_up(x):
        return float(x.l2fold) > 0

    def f_down(x):
        return float(x.l2fold) < 0

    def f_fold2up(x):
        return float(x.l2fold) > 1

    def f_fold2down(x):
        return float(x.l2fold) < -1

    def f_key(x):
        return (x.treatment_name, x.control_name)

    def f_status(x):
        return x.status

    outf = IOTools.openFile(outfile, "w")

    is_first = True
    for infile in infiles:

        xx = 0
        for line in IOTools.iterate(IOTools.openFile(infile)):
            key = f_key(line)

            r, s = results[key], status[key]
            r["tested"] += 1
            ss = f_status(line)
            s[ss] += 1

            if ss != "OK":
                continue

            is_significant = f_significant(line)
            up = f_up(line)
            down = f_down(line)
            fold2up = f_fold2up(line)
            fold2down = f_fold2down(line)
            fold2 = fold2up or fold2down

            if up:
                r["up"] += 1
            if down:
                r["down"] += 1
            if fold2up:
                r["l2fold_up"] += 1
            if fold2down:
                r["l2fold_down"] += 1

            if is_significant:
                r["significant"] += 1
                if up:
                    r["significant_up"] += 1
                if down:
                    r["significant_down"] += 1
                if fold2:
                    r["fold2"] += 1
                if fold2up:
                    r["significant_l2fold_up"] += 1
                if fold2down:
                    r["significant_l2fold_down"] += 1

            if xx > 10000:
                break

        if is_first:
            is_first = False
            header1, header2 = set(), set()
            for r in results.values():
                header1.update(r.keys())
            for s in status.values():
                header2.update(s.keys())

            header = ["method", "treatment", "control"]
            header1 = list(sorted(header1))
            header2 = list(sorted(header2))

            outf.write("\t".join(header + header1 + header2) + "\n")

        for treatment, control in results.keys():
            key = (treatment, control)
            r = results[key]
            s = status[key]
            outf.write("%s\t%s\t%s\t" % (method, treatment, control))
            outf.write("\t".join([str(r[x]) for x in header1]) + "\t")
            outf.write("\t".join([str(s[x]) for x in header2]) + "\n")

Example #10

0

Show file

File: PipelineWindows.py Project: BioXiao/CGATPipelines

def buildDMRStats(infiles, outfile, method, fdr_threshold=None):
    '''build dmr summary statistics.

    This method works from output files created by Expression.py
    (method="deseq" or method="edger") or runMEDIPS (method="medips")
    '''
    results = collections.defaultdict(lambda: collections.defaultdict(int))
    status = collections.defaultdict(lambda: collections.defaultdict(int))

    # deseq/edger
    def f_significant(x):
        return x.significant == "1"

    def f_up(x):
        return float(x.l2fold) > 0

    def f_down(x):
        return float(x.l2fold) < 0

    def f_fold2up(x):
        return float(x.l2fold) > 1

    def f_fold2down(x):
        return float(x.l2fold) < -1

    def f_key(x):
        return (x.treatment_name, x.control_name)

    def f_status(x):
        return x.status

    outf = IOTools.openFile(outfile, "w")

    is_first = True
    for infile in infiles:

        xx = 0
        for line in IOTools.iterate(IOTools.openFile(infile)):
            key = f_key(line)

            r, s = results[key], status[key]
            r["tested"] += 1
            ss = f_status(line)
            s[ss] += 1

            if ss != "OK":
                continue

            is_significant = f_significant(line)
            up = f_up(line)
            down = f_down(line)
            fold2up = f_fold2up(line)
            fold2down = f_fold2down(line)
            fold2 = fold2up or fold2down

            if up:
                r["up"] += 1
            if down:
                r["down"] += 1
            if fold2up:
                r["l2fold_up"] += 1
            if fold2down:
                r["l2fold_down"] += 1

            if is_significant:
                r["significant"] += 1
                if up:
                    r["significant_up"] += 1
                if down:
                    r["significant_down"] += 1
                if fold2:
                    r["fold2"] += 1
                if fold2up:
                    r["significant_l2fold_up"] += 1
                if fold2down:
                    r["significant_l2fold_down"] += 1

            if xx > 10000:
                break

        if is_first:
            is_first = False
            header1, header2 = set(), set()
            for r in results.values():
                header1.update(r.keys())
            for s in status.values():
                header2.update(s.keys())

            header = ["method", "treatment", "control"]
            header1 = list(sorted(header1))
            header2 = list(sorted(header2))

            outf.write("\t".join(header + header1 + header2) + "\n")

        for treatment, control in results.keys():
            key = (treatment, control)
            r = results[key]
            s = status[key]
            outf.write("%s\t%s\t%s\t" % (method, treatment, control))
            outf.write("\t".join([str(r[x]) for x in header1]) + "\t")
            outf.write("\t".join([str(s[x]) for x in header2]) + "\n")