Esempio n. 1
0
 def __init__(
     self,
     path,
     lastcol=None,
     lastrow=None,
     colmeta=None,
     rowmeta=None,
     eps_noise=False,
 ):
     self.tbl = table(path)
     self.bak = self.tbl.copy()
     # strip / save metadata
     if lastrow is not None:
         self.tbl.head(lastrow, invert=True)
     if lastcol is not None:
         self.tbl.head(lastcol, invert=True, transposed=True)
     self.row = self.tbl.rowheads[:]
     self.col = self.tbl.colheads[:]
     if eps_noise:
         self.tbl.float()
         self.tbl.apply_entries(lambda x: x + c_eps * random.random())
     self.dat = self.tbl.table2array()
     # colmetas from file / table
     if colmeta is None:
         self.colmeta = None
         self.colmetaname = None
     else:
         self.colmeta = []
         self.colmetaname = []
         for x in colmeta:
             if os.path.exists(x):
                 warn("Loading col metadata from file:", x)
                 temp = col2dict(x, value=1)
                 self.colmeta.append(
                     [temp.get(k, c_str_none) for k in self.col])
                 self.colmetaname.append(path2name(x))
             else:
                 temp = self.bak.rowdict(x)
                 self.colmeta.append(
                     [temp.get(k, c_str_none) for k in self.col])
                 self.colmetaname.append(x)
     # rowmetas from file / table
     if rowmeta is None:
         self.rowmeta = None
         self.rowmetaname = None
     else:
         self.rowmeta = []
         self.rowmetaname = []
         for x in rowmeta:
             if os.path.exists(x):
                 warn("Loading row metadata from file:", x)
                 temp = col2dict(x, value=1)
                 self.rowmeta.append(
                     [temp.get(k, c_str_none) for k in self.row])
                 self.rowmetaname.append(path2name(x))
             else:
                 temp = self.bak.coldict(x)
                 self.rowmeta.append(
                     [temp.get(k, c_str_none) for k in self.row])
                 self.rowmetaname.append(x)
Esempio n. 2
0
def main():
    args = get_args()
    # load genes (accounting for linkage)
    genes = col2dict(
        args.genes,
        value=(1 if args.linked else None),
        headers=args.skip_headers,
    )
    genes = {g: (g if k is None else k) for g, k in genes.items()}
    # load background (accounting for linkage)
    background = None
    if args.background is not None:
        background = col2dict(
            args.background,
            value=(1 if args.linked else None),
            headers=args.skip_headers,
        )
        background = {
            g: (g if k is None else k)
            for g, k in background.items()
        }
    # load gene sets
    gene_sets = polymap(
        args.gene_sets,
        reverse=args.reversed_mapping,
    )
    # run analysis
    results = fisher_enrich(
        genes,
        gene_sets,
        depletions=not args.exclude_depletions,
        background=background,
        intersect_background=args.intersect_background,
        intersect_annotated=args.intersect_annotated,
        fdr=args.fdr,
        min_expected_overlap=args.min_expected_overlap,
        verbose=False,
    )
    # write results
    fh = open(args.outfile, "w") if args.outfile is not None else sys.stdout
    writer = csv.writer(fh, dialect="excel-tab")
    writer.writerow(c_fisher_fields)
    for R in results:
        writer.writerow(R.row())
    # wrapup
    if len(results) == 0:
        say("# NO SIGNIFICANT ENRICHMENTS")
    fh.close()
    return None
Esempio n. 3
0
def main():
    args = get_args()
    # derived args
    path_faa = args.faa
    total_reads = args.reads
    pident = args.pident
    if pident > 1:
        print >> sys.stderr, "interpret", pident, "as percent;",
        pident /= 100.0
        print >> sys.stderr, "fractional value is", pident
    mut_rate = 1 - pident
    read_len = args.readlen
    frag_len = int(math.ceil(read_len / 3.0))
    # load proteins
    faa = load_faa(path_faa, frag_len)
    # initialize weights to effective protein lengths
    weights = {k: (len(v) - frag_len + 1) for k, v in faa.items()}
    # augment weights?
    if args.weights:
        weights2 = col2dict(args.weights, value=1, func=float)
        weights = {k: v * weights2.get(k, 0) for k, v in weights.items()}
    # make the chooser
    wc = WeightedChooser(weights)
    # make the reads (weighting by protein length)
    read = 0
    for prot_name in wc.iter_choice(total_reads):
        read += 1
        prot_seq = faa[prot_name]
        rand_read(prot_seq, prot_name, read, read_len, frag_len, mut_rate)
Esempio n. 4
0
def metacolors(values, cmap):
    found_other = False
    found_none = False
    unique = set(values)
    if c_str_other in unique:
        found_other = True
        unique -= {c_str_other}
    if c_str_none in unique:
        found_none = True
        unique -= {c_str_none}
    if os.path.exists(cmap):
        cmap = col2dict(cmap, value=1)
        for k in unique:
            if k not in cmap:
                cmap[k] = c_none_color
        cmap = {l: c for l, c in cmap.items() if l in unique}
    else:
        ncol = mu.ncolors(len(unique), cmap)
        cmap = {}
        for k, c in zip(sorted(unique), ncol):
            cmap[k] = c
    if found_other:
        cmap[c_str_other] = c_other_color
    if found_none:
        cmap[c_str_none] = c_none_color
    return cmap
Esempio n. 5
0
 def rowlabel(self, df, des, ax_label, scaled=False, path=None):
     ax_label = "{} (n={:,})".format(ax_label, len(df.row))
     # override row names from the table?
     mapping = {}
     if path is not None:
         mapping = col2dict(path, value=1)
     for l in df.row:
         if l not in mapping:
             mapping[l] = "" if path is not None else l
     if len(df.row) > c_max_lab and not scaled:
         warn("Too many row labels.")
         cx = sum(self.rownames.get_xlim()) / 2.0
         cy = sum(self.rownames.get_ylim()) / 2.0
         self.rownames.text(cx,
                            cy,
                            ax_label,
                            size=c_font3,
                            ha="center",
                            va="center",
                            rotation=90,
                            rotation_mode="anchor")
     else:
         ha = "left"
         xi, xf = self.rownames.get_xlim()
         if des.wmargin("heatmap") > des.wmargin("rownames"):
             ha = "right"
             xi, xf = xf, xi
         self.rownames.set_ylim(0, len(df.row))
         for i, l in enumerate(df.row):
             self.rownames.text(
                 xi,
                 i + 0.5,
                 mapping[l],
                 ha=ha,
                 va="center",
                 size=c_font1,
                 clip_on=False,
             )
         cx = xf
         cy = sum(self.rownames.get_ylim()) / 2.0
         self.rownames.text(cx,
                            cy,
                            ax_label,
                            size=c_font3,
                            ha="center",
                            va="bottom" if ha == "left" else "top",
                            rotation=90,
                            rotation_mode="anchor")
Esempio n. 6
0
 def collabel(self, df, des, ax_label, scaled=False, path=None):
     ax_label = "{} (n={:,})".format(ax_label, len(df.col))
     # override col names from the table?
     mapping = {}
     if path is not None:
         mapping = col2dict(path, value=1)
     for l in df.col:
         if l not in mapping:
             mapping[l] = "" if path is not None else l
     if len(df.col) > c_max_lab and not scaled:
         warn("Too many column labels.")
         cx = sum(self.colnames.get_xlim()) / 2.0
         cy = sum(self.colnames.get_ylim()) / 2.0
         self.colnames.text(cx,
                            cy,
                            ax_label,
                            size=c_font3,
                            ha="center",
                            va="center")
     else:
         ha = "left"
         yi, yf = self.colnames.get_ylim()
         if des.hmargin("heatmap") < des.hmargin("colnames"):
             yi, yf = yf, yi
             ha = "right"
         self.colnames.set_xlim(0, len(df.col))
         for i, l in enumerate(df.col):
             self.colnames.text(
                 i + 0.5,
                 yi,
                 mapping[l],
                 rotation=45,
                 rotation_mode="anchor",
                 va="center",
                 ha=ha,
                 size=c_font1,
                 clip_on=False,
             )
         cx = 0
         cy = yf
         self.colnames.text(
             cx,
             cy,
             ax_label,
             size=c_font3,
             ha="left",
             va="top" if ha == "left" else "bottom",
         )
Esempio n. 7
0
 def rowlabel(self, df, ax_label, scaled=False, path=None):
     ax_label = "{} (n={:,})".format(ax_label, len(df.row))
     # override row names from the table?
     mapping = {}
     if path is not None:
         mapping = col2dict(path, value=1)
     for l in df.row:
         if l not in mapping:
             mapping[l] = "" if path is not None else l
     if len(df.row) > c_max_lab and not scaled:
         warn("Too many row labels.")
         cx = sum(self.rownames.get_xlim()) / 2.0
         cy = sum(self.rownames.get_ylim()) / 2.0
         self.rownames.text(cx,
                            cy,
                            ax_label,
                            size=c_font3,
                            ha="center",
                            va="center",
                            rotation=90,
                            rotation_mode="anchor")
     else:
         self.rownames.set_ylim(0, len(df.row))
         for i, l in enumerate(df.row):
             self.rownames.text(
                 0,
                 i + 0.5,
                 mapping[l],
                 va="center",
                 size=c_font1,
                 clip_on=True,
             )
         cx = self.rownames.get_xlim()[1]
         cy = sum(self.rownames.get_ylim()) / 2.0
         self.rownames.text(cx,
                            cy,
                            ax_label,
                            size=c_font3,
                            ha="center",
                            va="bottom",
                            rotation=90,
                            rotation_mode="anchor")
Esempio n. 8
0
def main():
    args = get_args()

    # load key values
    def make_link(row):
        key = row[1] if args.linked else row[0]
        return Link(key, float(row[-1]))

    values = col2dict(
        args.values,
        func=make_link,
        headers=args.skip_headers,
    )
    # load key sets
    gene_sets = polymap(
        args.gene_sets,
        reverse=args.reversed_mapping,
    )
    # perform analysis
    results = rank_enrich(
        values,
        gene_sets,
        depletions=not args.exclude_depletions,
        intersect_annotated=args.intersect_annotated,
        fdr=args.fdr,
        min_overlap=args.min_overlap,
        verbose=True,
    )
    # write results
    fh = open(args.outfile, "w") if args.outfile is not None else sys.stdout
    writer = csv.writer(fh, dialect="excel-tab")
    writer.writerow(c_rank_fields)
    for R in results:
        writer.writerow(R.row())
    # wrapup
    if len(results) == 0:
        say("# NO SIGNIFICANT ENRICHMENTS")
    fh.close()
    return None
Esempio n. 9
0
def main():

    args = get_args()

    # load obo / report rel type
    obo = Ontology(args.obo)
    warn("Summary of relationship types:")
    for k in sorted(parentage_types):
        warn(k, parentage_types[k])

    # attach genes
    if args.mapping is not None:
        mapping = polymap(args.mapping, reverse=args.flip)
        if args.allowed_genes is not None:
            allowed = col2dict(args.allowed_genes)
            mapping = {k: v for k, v in mapping.items() if k in allowed}
        obo.attach_genes(mapping)
        warn("# of attached genes:", len(obo.attached_genes))

    # informative cut
    if args.informative is not None:
        threshold = float(args.informative)
        if threshold < 1:
            warn(
                "Intepretting informative cutoff as fraction of annotated genes"
            )
            threshold *= len(obo.attached_genes)
        threshold = int(threshold)
        obo.set_informative(threshold)
        for term in obo.iter_terms():
            if not term.is_informative:
                term.is_acceptable = False

    # pruning cut
    if args.prune is not None:
        obo.prune(args.prune)
        for term in obo.iter_terms():
            if not term.is_pruned:
                term.is_acceptable = False

    # depth cut
    if args.depth is not None:
        for term in obo.iter_terms():
            if term.depth != args.depth:
                term.is_acceptable = False

    # grep cut
    if args.grep is not None:
        for term in obo.iter_terms():
            if not re.search(args.grep, term.name):
                term.is_acceptable = False

    # namespace cut
    if args.namespace is not None:
        for term in obo.iter_terms():
            if term.namespace_short not in args.namespace:
                term.is_acceptable = False

    # output the new polymap
    fh = open(args.outfile, "w") if args.outfile is not None else sys.stdout
    for term in obo.iter_terms():
        if term.is_acceptable:
            outline = [str(term)]
            if not args.terms_only:
                outline += list(term.get_progeny_genes(
                ) if not args.ignore_progeny else term.genes)
            print >> fh, "\t".join(outline)
    fh.close()
Esempio n. 10
0
# ---------------------------------------------------------------

strInputPath = args.input
strMode = args.mode
strOutputPath = args.output if args.output is not None else ".".join( strInputPath, strMode )
fReadLength = args.read_length
strSampleReadsPath = args.sample_reads

# ---------------------------------------------------------------
# manipulate data
# ---------------------------------------------------------------

if strMode == "rpkm":
    if strSampleReadsPath is None:
        sys.exit( "to compute rpkm you must include a file mapping sample IDs to #s of reads" )
    dictMillions = col2dict( strSampleReadsPath, key=0, value=1, func=lambda x: float( x ) / 1e6 )

tableCladeRPK = table( strInputPath )
tableCladeRPK.grep( "headers", "s__" )
tableCladeRPK.float()
tableCladeRPK.groupby( lambda x: x.split( "|" )[0], median )

if strMode != "rpk":
    for bug, sample, value in tableCladeRPK.iter_entries():
        if strMode == "coverage":
            tableCladeRPK.set( bug, sample, value * fReadLength / 1e3 )
        elif strMode == "rpkm":
            tableCladeRPK.set( bug, sample, value / dictMillions[sample] if dictMillions[sample] > 0 else 0 )

tableCladeRPK.unfloat()
tableCladeRPK.colsort()
Esempio n. 11
0
def print2(d):
    for key, d2 in d.items():
        print key
        for key2, val in d2.items():
            print "  ", key2, "-->", val
    print


# ---------------------------------------------------------------
# col2dict tests
# ---------------------------------------------------------------

print "col2dict tests"
os.system("head %s" % (file))
print1(d.col2dict(file))
print1(d.col2dict(file, headers=True))
print1(d.col2dict(file, key=1, headers=True))
print1(d.col2dict(file, key=1, value=2, headers=True))
print1(d.col2dict(file, key=1, func=lambda row: float(row[2]), headers=True))

# ---------------------------------------------------------------
# col2dict2 tests
# ---------------------------------------------------------------

print "col2dict2 tests"
os.system("head %s" % (file))
print2(d.col2dict2(file))
print2(d.col2dict2(file, headers=True, mirror=True))
print2(
    d.col2dict2(file,
Esempio n. 12
0
#! /usr/bin/env python

import os, sys, re, glob, argparse
from zopy.table2 import table
from zopy.dictation import col2dict
from zopy.utils import path2name

dictMap = col2dict( sys.argv[1], key=0, value=1 )
tableData = table( sys.argv[2] )
tableData.apply_colheads( lambda x: path2name( x ) )
tableData.apply_colheads( lambda x: x.split( "." )[0] )
tableData.apply_colheads( lambda x: dictMap[x] )
tableData.dump( sys.argv[3] )