Python IOTools.writeMatrix Examples

Programming Language: Python

Namespace/Package Name: CGAT

Class/Type: IOTools

Method/Function: writeMatrix

Examples at hotexamples.com: 6

Python IOTools.writeMatrix - 6 examples found. These are the top rated real world Python examples of CGAT.IOTools.writeMatrix extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

openFile(30)

ReadMap(23)

ReadList(21)

isEmpty(14)

writeLines(9)

readMap(9)

which(8)

getInvertedDictionary(7)

readList(7)

prettyPercent(7)

zapFile(6)

convertDictionary(6)

snip(5)

FilePool(5)

iterate(5)

getNumLines(4)

readTable(4)

flatten(4)

readMultiMap(3)

str2val(3)

touchFile(3)

writeMatrix(3)

isComplete(2)

getLastLine(2)

readMatrix(2)

val2str(2)

human2bytes(1)

force_str(1)

cloneFile(1)

prettyFloat(1)

Example #1

Show file

File: GO.py Project: wangdi2014/cgat

    def _output(section, subsection, valuef, dtype):

        # fold change matrix
        matrix, row_headers = buildMatrix(results, valuef=valuef, dtype=dtype)

        outfile = getFileName(options,
                              go=test_ontology,
                              section=section,
                              set='%s_all' % subsection)

        IOTools.writeMatrix(outfile,
                            matrix,
                            row_headers,
                            col_headers,
                            row_header="category")

        outfile = getFileName(options,
                              go=test_ontology,
                              section=section,
                              set='%s_alldesc' % subsection)

        IOTools.writeMatrix(
            outfile,
            matrix,
            ["%s:%s" % (x, go2info[x].mDescription) for x in row_headers],
            col_headers,
            row_header="category")

Example #2

Show file

File: GO.py Project: CGATOxford/cgat

    def _output(section, subsection, valuef, dtype):

        # fold change matrix
        matrix, row_headers = buildMatrix(results,
                                          valuef=valuef,
                                          dtype=dtype)

        outfile = getFileName(options,
                              go=test_ontology,
                              section=section,
                              set='%s_all' % subsection)

        IOTools.writeMatrix(
            outfile, matrix, row_headers, col_headers, row_header="category")

        outfile = getFileName(options,
                              go=test_ontology,
                              section=section,
                              set='%s_alldesc' % subsection)

        IOTools.writeMatrix(outfile, matrix,
                            ["%s:%s" % (x, go2info[x].mDescription)
                             for x in row_headers],
                            col_headers, row_header="category")

Example #3

Show file

File: pipeline_genesets.py Project: gjaime/CGATPipelines

def buildGeneListMatrix(infiles, outfile):
    '''build a gene list matrix for simple pathway analysis
    based on hypergeometric test.

    A gene list is derived from a gene set by
    applying thresholds to the input data set. The
    thresholds are defined in the configuration file.
    '''

    genesets = []
    backgrounds = []
    headers = []
    for infile in infiles:
        genelist = pandas.read_csv(
            IOTools.openFile(infile),
            index_col=0,
            sep='\t')

        track = P.snip(os.path.basename(infile), ".tsv.gz")
        headers.append(track)

        field = PARAMS[P.matchParameter("%s_foreground_field" % track)]
        min_threshold = PARAMS[P.matchParameter(
            "%s_foreground_min_threshold" % track)]
        max_threshold = PARAMS[P.matchParameter(
            "%s_foreground_max_threshold" % track)]
        genesets.append(set(genelist[
            (genelist[field] >= min_threshold) &
            (genelist[field] <= max_threshold)].index))

        E.info('%s: foreground: %f <= %s <= %f' % (track,
                                                   min_threshold,
                                                   field,
                                                   max_threshold))

        field = PARAMS[P.matchParameter("%s_background_field" % track)]
        min_threshold = PARAMS[P.matchParameter(
            "%s_background_min_threshold" % track)]
        max_threshold = PARAMS[P.matchParameter(
            "%s_background_max_threshold" % track)]

        E.info('%s: background: %f <= %s <= %f' % (track,
                                                   min_threshold,
                                                   field,
                                                   max_threshold))
        backgrounds.append(set(genelist[
            (genelist[field] >= min_threshold) &
            (genelist[field] <= max_threshold)].index))

        E.info("%s: fg=%i, bg=%i" % (track,
                                     len(genesets[-1]),
                                     len(backgrounds[-1])))

    E.info("writing gene list matrix")
    with IOTools.openFile(outfile, "w") as outf:
        SetTools.writeSets(outf, genesets, labels=headers)
    with IOTools.openFile(outfile + ".bg.tsv.gz", "w") as outf:
        SetTools.writeSets(outf, backgrounds, labels=headers)

    E.info("writing intersection/union matrix")
    # build set intersection matrix
    matrix = SetTools.unionIntersectionMatrix(genesets)
    with IOTools.openFile(outfile + ".matrix.gz", "w") as outf:
        IOTools.writeMatrix(outf, matrix, headers, headers)
    matrix = SetTools.unionIntersectionMatrix(backgrounds)
    with IOTools.openFile(outfile + ".bg.matrix.gz", "w") as outf:
        IOTools.writeMatrix(outf, matrix, headers, headers)

Example #4

Show file

File: bam2peakshape.py Project: zpeng1989/cgat

def writeMatricesForSortOrder(features_per_interval, bins, foreground_track,
                              control_tracks, shifted, sort_order):
    '''output one or more matrices for each sort sorder.

    For each sort order output the forerground. If there
    are additional controls and shifted section, output
    these as well

    The files will named:
    matrix_<track>_<sortorder>

    '''
    if "name" in features_per_interval[0].interval:
        names = [x.interval.name for x in features_per_interval]
    else:
        names = map(str, range(1, len(features_per_interval) + 1))

    bins = ["%i" % x for x in bins]
    sort_order = re.sub("-", "_", sort_order)

    # write foreground
    IOTools.writeMatrix(E.openOutputFile("matrix_%s_%s.gz" %
                                         (foreground_track, sort_order)),
                        [x.foreground.counts for x in features_per_interval],
                        row_headers=names,
                        col_headers=bins,
                        row_header="name")

    # write controls
    for idx, track in enumerate(control_tracks):
        IOTools.writeMatrix(
            E.openOutputFile("matrix_%s_%s.gz" % (track, sort_order)),
            [x.controls[idx].counts for x in features_per_interval],
            row_headers=names,
            col_headers=bins,
            row_header="name")

    # write shifted matrix
    if shifted:
        IOTools.writeMatrix(E.openOutputFile("matrix_shift_%s.gz" %
                                             (sort_order)),
                            [x.shifted.counts for x in features_per_interval],
                            row_headers=names,
                            col_headers=bins,
                            row_header="name")

    # output a combined matrix
    if len(control_tracks) > 0 or shifted:
        rows = []
        for row in features_per_interval:
            l = [row.foreground.counts]
            l.extend(
                [row.controls[x].counts for x in range(len(control_tracks))])
            if shifted:
                l.append(row.shifted.counts)
            rows.append(numpy.concatenate(l))

        n = 1 + len(control_tracks)
        if shifted:
            n += 1

        # make column names unique and make sure they can be sorted
        # lexicographically
        all_bins = []
        for x in range(n):
            all_bins.extend(["%i:%s" % (x, b) for b in bins])

        IOTools.writeMatrix(E.openOutputFile("matrix_sidebyside_%s.gz" %
                                             (sort_order)),
                            rows,
                            row_headers=names,
                            col_headers=all_bins,
                            row_header="name")

Example #5

Show file

def buildGeneListMatrix(infiles, outfile):
    '''build a gene list matrix for simple pathway analysis
    based on hypergeometric test.

    A gene list is derived from a gene set by
    applying thresholds to the input data set. The
    thresholds are defined in the configuration file.
    '''

    genesets = []
    backgrounds = []
    headers = []
    for infile in infiles:
        genelist = pandas.read_csv(IOTools.openFile(infile),
                                   index_col=0,
                                   sep='\t')

        track = P.snip(os.path.basename(infile), ".tsv.gz")
        headers.append(track)

        field = PARAMS[P.matchParameter("%s_foreground_field" % track)]
        min_threshold = PARAMS[P.matchParameter("%s_foreground_min_threshold" %
                                                track)]
        max_threshold = PARAMS[P.matchParameter("%s_foreground_max_threshold" %
                                                track)]
        genesets.append(
            set(genelist[(genelist[field] >= min_threshold)
                         & (genelist[field] <= max_threshold)].index))

        E.info('%s: foreground: %f <= %s <= %f' %
               (track, min_threshold, field, max_threshold))

        field = PARAMS[P.matchParameter("%s_background_field" % track)]
        min_threshold = PARAMS[P.matchParameter("%s_background_min_threshold" %
                                                track)]
        max_threshold = PARAMS[P.matchParameter("%s_background_max_threshold" %
                                                track)]

        E.info('%s: background: %f <= %s <= %f' %
               (track, min_threshold, field, max_threshold))
        backgrounds.append(
            set(genelist[(genelist[field] >= min_threshold)
                         & (genelist[field] <= max_threshold)].index))

        E.info("%s: fg=%i, bg=%i" %
               (track, len(genesets[-1]), len(backgrounds[-1])))

    E.info("writing gene list matrix")
    with IOTools.openFile(outfile, "w") as outf:
        SetTools.writeSets(outf, genesets, labels=headers)
    with IOTools.openFile(outfile + ".bg.tsv.gz", "w") as outf:
        SetTools.writeSets(outf, backgrounds, labels=headers)

    E.info("writing intersection/union matrix")
    # build set intersection matrix
    matrix = SetTools.unionIntersectionMatrix(genesets)
    with IOTools.openFile(outfile + ".matrix.gz", "w") as outf:
        IOTools.writeMatrix(outf, matrix, headers, headers)
    matrix = SetTools.unionIntersectionMatrix(backgrounds)
    with IOTools.openFile(outfile + ".bg.matrix.gz", "w") as outf:
        IOTools.writeMatrix(outf, matrix, headers, headers)

Example #6

Show file

File: bam2peakshape.py Project: Q-KIM/cgat

def writeMatricesForSortOrder(features_per_interval,
                              bins,
                              foreground_track,
                              control_tracks,
                              shifted,
                              sort_order):
    '''output one or more matrices for each sort sorder.

    For each sort order output the forerground. If there
    are additional controls and shifted section, output
    these as well

    The files will named:
    matrix_<track>_<sortorder>

    '''
    if "name" in features_per_interval[0].interval:
        names = [x.interval.name for x in features_per_interval]
    else:
        names = map(str, range(1, len(features_per_interval) + 1))

    bins = ["%i" % x for x in bins]
    sort_order = re.sub("-", "_", sort_order)

    # write foreground
    IOTools.writeMatrix(
        E.openOutputFile("matrix_%s_%s.gz" % (foreground_track, sort_order)),
        [x.foreground.counts for x in features_per_interval],
        row_headers=names,
        col_headers=bins,
        row_header="name")

    # write controls
    for idx, track in enumerate(control_tracks):
        IOTools.writeMatrix(
            E.openOutputFile("matrix_%s_%s.gz" % (track, sort_order)),
            [x.controls[idx].counts for x in features_per_interval],
            row_headers=names,
            col_headers=bins,
            row_header="name")

    # write shifted matrix
    if shifted:
        IOTools.writeMatrix(
            E.openOutputFile("matrix_shift_%s.gz" % (sort_order)),
            [x.shifted.counts for x in features_per_interval],
            row_headers=names,
            col_headers=bins,
            row_header="name")

    # output a combined matrix
    if len(control_tracks) > 0 or shifted:
        rows = []
        for row in features_per_interval:
            l = [row.foreground.counts]
            l.extend([row.controls[x].counts for x in
                      range(len(control_tracks))])
            if shifted:
                l.append(row.shifted.counts)
            rows.append(numpy.concatenate(l))

        n = 1 + len(control_tracks)
        if shifted:
            n += 1

        # make column names unique and make sure they can be sorted
        # lexicographically
        all_bins = []
        for x in range(n):
            all_bins.extend(["%i:%s" % (x, b) for b in bins])

        IOTools.writeMatrix(
            E.openOutputFile("matrix_sidebyside_%s.gz" % (sort_order)),
            rows,
            row_headers=names,
            col_headers=all_bins,
            row_header="name")