Esempio n. 1
0
 def __init__(
     self,
     path,
     lastcol=None,
     lastrow=None,
     colmeta=None,
     rowmeta=None,
     eps_noise=False,
 ):
     self.tbl = table(path)
     self.bak = self.tbl.copy()
     # strip / save metadata
     if lastrow is not None:
         self.tbl.head(lastrow, invert=True)
     if lastcol is not None:
         self.tbl.head(lastcol, invert=True, transposed=True)
     self.row = self.tbl.rowheads[:]
     self.col = self.tbl.colheads[:]
     if eps_noise:
         self.tbl.float()
         self.tbl.apply_entries(lambda x: x + c_eps * random.random())
     self.dat = self.tbl.table2array()
     # colmetas from file / table
     if colmeta is None:
         self.colmeta = None
         self.colmetaname = None
     else:
         self.colmeta = []
         self.colmetaname = []
         for x in colmeta:
             if os.path.exists(x):
                 warn("Loading col metadata from file:", x)
                 temp = col2dict(x, value=1)
                 self.colmeta.append(
                     [temp.get(k, c_str_none) for k in self.col])
                 self.colmetaname.append(path2name(x))
             else:
                 temp = self.bak.rowdict(x)
                 self.colmeta.append(
                     [temp.get(k, c_str_none) for k in self.col])
                 self.colmetaname.append(x)
     # rowmetas from file / table
     if rowmeta is None:
         self.rowmeta = None
         self.rowmetaname = None
     else:
         self.rowmeta = []
         self.rowmetaname = []
         for x in rowmeta:
             if os.path.exists(x):
                 warn("Loading row metadata from file:", x)
                 temp = col2dict(x, value=1)
                 self.rowmeta.append(
                     [temp.get(k, c_str_none) for k in self.row])
                 self.rowmetaname.append(path2name(x))
             else:
                 temp = self.bak.coldict(x)
                 self.rowmeta.append(
                     [temp.get(k, c_str_none) for k in self.row])
                 self.rowmetaname.append(x)
Esempio n. 2
0
def main( ):

    args = get_args( )
    tbl = table( args.table )
    data = tbl.table2array( args.last_metadata ).transpose( )
    dist = distance_matrix( data, args.dissimilarity )

    if args.method == "manual":
        embedding, varexp, goodness = ordinate_cmdscale( dist )
    else:
        embedding, varexp, goodness = ordinate_sklearn( dist, method=args.method )
        
    fig = plt.figure()
    fig.set_size_inches( 10, 6 )
    ax = plt.subplot( 111 )
    xcoords = embedding[:, 0]
    ycoords = embedding[:, 1]

    shapes = ["o" for x in xcoords]
    if args.shapeby is not None:
        field, path = args.shapeby
        shapes = shapeize( tbl.row( field ), path, ax )     
    
    colors = ["black" for x in xcoords]
    if args.colorby is not None:
        field, path = args.colorby
        colors = colorize( tbl.row( field ), path, ax )
        
    for x, y, c, s in zip( xcoords, ycoords, colors, shapes ):
        ax.scatter( x, y, color=c, marker=s )
    ax.set_xlim( min( xcoords ), max( xcoords ) )
    ax.set_ylim( min( ycoords ), max( ycoords ) )
    mu.funcMargin( ax )

    title = path2name( args.table ) if args.title is None else args.title
    ax.set_title( "%s | Goodness of fit = %.3f" % ( title, goodness ) )
    ax.set_xlabel( "Dimension 1 (%.1f%%)" % (100 * varexp[0] ) )
    ax.set_ylabel( "Dimension 2 (%.1f%%)" % (100 * varexp[1] ) )
    mu.funcSetTickParams( ax )

    for m in args.level_biplot:
        level_biplot( ax, embedding, tbl.row( m ) )
    for m in args.quant_biplot:
        quant_biplot( ax, embedding, tbl.row( m ), m )

    # Shrink current axis by 20%
    box = ax.get_position( )
    ax.set_position( [box.x0, box.y0, box.width * 0.7, box.height] )

    ax.legend( scatterpoints=1,
               fontsize=8,
               loc='center left',
               bbox_to_anchor=(1, 0.5),
               )
               
    #plt.tight_layout( )
    plt.savefig( args.outfile )
Esempio n. 3
0
def main():
    args = get_args()
    T = table(args.table)
    if args.select is not None:
        T.select(args.select[0], args.select[1], transposed=True)
    groups = T.row(args.groups)
    if args.last_metadata is not None:
        m, T = T.metasplit(args.last_metadata)
    T.float()
    adonis(T, groups)
Esempio n. 4
0
def main( ):
    args = get_args( )
    t = table( args.table, verbose=False )
    if args.stratify_by is None:
        dt = {}
        dt["ALL_SAMPLES"] = t
    else:
        dt = t.stratify( args.stratify_by )
    reports = {}
    for level, t in dt.items( ):
        if args.last_metadata is not None:
            m, t = t.metasplit( args.last_metadata )
        reports[level] = check( t )
    for level, [summary, failed] in reports.items( ):
        print "Considering table <{}> at level <{}>:".format( args.table, level )
        print "  Summary:", summary
        print "  FAILED: ", len( failed )        
        for s, msg in failed.items( ):
            print "\t".join( ["  FAILED:", s, msg] )
    return None
Esempio n. 5
0
#! usr/bin/env python

import sys
from zopy.table2 import table
t = table(sys.argv[1])
t.float()
t.groupby(lambda x: x[0].split("|")[0], sum)
t.dump()
Esempio n. 6
0
)
parser.add_argument( 
    "-l", "--legacy",
    action="store_true",
    help="iteratively merge tables (better maintains feature order)",
)
args = parser.parse_args()

# ---------------------------------------------------------------
# load and process data
# ---------------------------------------------------------------

p = args.tables[0]

if len( args.tables ) == 1:
    t = table( p )
elif args.legacy:
    t = table( p )
    for p2 in args.tables[1:]:
        t2 = table( p2 )
        t.merge( t2 )
else:    
    data = {}
    for p in args.tables:
        d = table( p ).table2nesteddict( )
        for r in d:
            inner = data.setdefault( r, {} )
            for c in d[r]:
                if c in inner and inner[c] != d[r][c]:
                    warn( p, "overwrites", r, c, inner[c], "with", d[r][c] )
                inner[c] = d[r][c]
Esempio n. 7
0
f: focus on non-header field (first arg)
i: invert
m: protect/reattach metadata
x: read choices from file (one per line)

===============================================
Author: Eric Franzosa ([email protected])
"""

import os, sys
from zopy.table2 import table

args = sys.argv[1:]
# first arg is a command cluster; read stdin
if args[0][0] == "-":
    t = table()
# interpret first arg as a file name
else:
    t = table(args[0])
    args = args[1:]

# determine operations
ops = []
i = 0
while i < len(args):
    if args[i][0] == "-":
        op = [args[i], []]
        i += 1
        while i < len(args) and args[i][0] != "-":
            op[1].append(args[i])
            i += 1
Esempio n. 8
0
strInputPath = args.input
strMode = args.mode
strOutputPath = args.output if args.output is not None else ".".join( strInputPath, strMode )
fReadLength = args.read_length
strSampleReadsPath = args.sample_reads

# ---------------------------------------------------------------
# manipulate data
# ---------------------------------------------------------------

if strMode == "rpkm":
    if strSampleReadsPath is None:
        sys.exit( "to compute rpkm you must include a file mapping sample IDs to #s of reads" )
    dictMillions = col2dict( strSampleReadsPath, key=0, value=1, func=lambda x: float( x ) / 1e6 )

tableCladeRPK = table( strInputPath )
tableCladeRPK.grep( "headers", "s__" )
tableCladeRPK.float()
tableCladeRPK.groupby( lambda x: x.split( "|" )[0], median )

if strMode != "rpk":
    for bug, sample, value in tableCladeRPK.iter_entries():
        if strMode == "coverage":
            tableCladeRPK.set( bug, sample, value * fReadLength / 1e3 )
        elif strMode == "rpkm":
            tableCladeRPK.set( bug, sample, value / dictMillions[sample] if dictMillions[sample] > 0 else 0 )

tableCladeRPK.unfloat()
tableCladeRPK.colsort()
tableCladeRPK.rowsort()
tableCladeRPK.dump( strOutputPath )
Esempio n. 9
0
def trunc_normal(m, sd, zmax):
    outlier = True
    while outlier:
        sim = normal(m, sd)
        if abs(m - sim) / sd < zmax:
            outlier = False
    return sim


#-------------------------------------------------------------------------------
# munge hmp data
#-------------------------------------------------------------------------------

zu.say(args.basename, "->", "parsing HMP data")

T = table(args.hmp)
T.select("STSite", args.site, transposed=True)
T.select("VISNO", "1", transposed=True)
T.head("SRS", invert=True)
T.apply_rowheads(lambda x: x.split("|")[-1])
T.grep("headers", "s__")
T.grep("headers", "_unclassified", invert=True)
T.dump("subset.tmp")
T.float()
T.unrarify(1e-20, 1)

bugs = []
for bug, row in T.iter_rows():
    stats = []
    nonzero = [log(k) / log(10) for k in row if k > 0]
    stats.append(len(nonzero) / float(len(row)))
Esempio n. 10
0
                    default=0,
                    type=int,
                    help="position post splitting to isolate")
args = parser.parse_args()

# process map
idmap, isnewid = {}, {}
with open(args.map) as fh:
    for line in fh:
        oldid, newid = line.strip().split("\t")
        if newid in isnewid:
            sys.exit("ERROR: %s listed 2+ times in map" % (newid))
        else:
            idmap[oldid] = newid
            isnewid[newid] = 1


# execute
def applier(colhead):
    oldid = colhead.split(args.split)[args.pos]
    if oldid in idmap:
        return idmap[oldid]
    else:
        print >> sys.stderr, "can't convert", colhead, "->", oldid
        return oldid


t = table(args.table)
t.apply_colheads(applier)
t.dump()
Esempio n. 11
0
from zopy.table2 import table

from time import time

start = time()


def interval():
    global start
    stop = time()
    print stop - start, "seconds"
    start = stop


print "loading"
t1 = table(sys.argv[1])
t2 = table(sys.argv[2])
interval()
print "testing merge"
t1.merge(t2)
interval()

print "loading"
t3 = table(sys.argv[1])
t4 = table(sys.argv[2])
interval()
print "testing merge_old"
t3.merge_old(t4)
interval()

for i in range(len(t1.data)):
Esempio n. 12
0
    default=None,
    help="value to insert in place of missing values",
)
parser.add_argument(
    "-m",
    "--metamerge",
    action="store_true",
    help="specify if second table is a metadata table",
)
args = parser.parse_args()

# ---------------------------------------------------------------
# load and process data
# ---------------------------------------------------------------

table1 = table(args.input1)
table2 = table(args.input2)
if args.metamerge:
    table1.metamerge(table2)
else:
    table1.merge(table2)
if args.fill_empty is not None:
    table1.apply_entries(lambda x: x if x != c_strNA else args.fill_empty)

# ---------------------------------------------------------------
# dump table
# ---------------------------------------------------------------

# not ideal
if args.output is not None:
    table1.dump(args.output)
Esempio n. 13
0
def main():
    args = get_args()
    t = table(args.table)
    t.float()
    # apply scaling
    gmax = max([max(row) for header, row in t.iter_rows()])
    t.apply_entries(lambda x: (x / gmax)**(1 / float(args.scale)))
    # format labels
    xlab = sorted(t.colheads)
    ylab = sorted(t.rowheads, key=lambda x: argmax(t.rowdict(x)), reverse=True)
    # make plot
    colors = mu.ncolors(len(xlab), args.colormap)
    fig = plt.figure()
    axes = mu.funcPlotMatrix([1], [len(t.rowheads), 2])
    ax = axes[0][0]
    legend = axes[1][0]
    ax.xaxis.tick_top()
    ax.yaxis.tick_right()
    ax.set_xticks(range(len(xlab)))
    ax.set_yticks(range(len(ylab)))
    ax.set_xticklabels(xlab, rotation=45, rotation_mode="anchor", ha="left")
    ax.set_yticklabels(ylab)
    ax.set_xlim(-0.5, len(xlab) - 0.5)
    ax.set_ylim(-0.5, len(ylab) - 0.5)
    # add tiles
    for i, x in enumerate(xlab):
        for j, y in enumerate(ylab):
            value = t.entry(y, x)
            if value == 0:
                continue
            tileh = 1.0 if args.mode == "width" else value
            tilew = 1.0 if args.mode == "height" else value
            alpha = 1.0
            if args.emphasis == "fade" and argmax(t.rowdict(y)) != x:
                alpha = c_fadeout
            ax.add_patch(
                patches.Rectangle(
                    (i - tilew / 2.0, j - tileh / 2.0),
                    tilew,
                    tileh,
                    edgecolor="white",
                    facecolor=colors[i],
                    alpha=alpha,
                ))
            if args.emphasis == "dot" and argmax(t.rowdict(y)) == x:
                ax.scatter([i], [j],
                           s=10,
                           color="white",
                           edgecolor="none",
                           zorder=2)
    # add grid
    if args.grid == "box":
        mu.funcGrid2(
            ax,
            h=[k - 0.5 for k in range(1, len(ylab))],
            v=[k - 0.5 for k in range(1, len(xlab))],
            color="0.9",
            zorder=2,
            border=True,
        )
    elif args.grid == "cross":
        mu.funcGrid2(
            ax,
            h=[k for k in range(len(ylab))],
            v=[k for k in range(len(xlab))],
            color="0.9",
            zorder=0,
            border=True,
        )
        mu.funcHideBorder(ax)
    # cleanup
    mu.funcHideTicks(ax)
    # draw the legend
    mu.funcSetTickParams(legend)
    #mu.funcHideBorder( legend )
    legend.yaxis.tick_right()
    legend.set_yticks([0])
    legend.set_yticklabels(["Scale"])
    legend.set_xlim(-0.5, len(xlab) - 0.5)
    legend.set_xticks(range(len(xlab)))
    legend.set_ylim(-0.5, 0.5)
    start = nearest_power(gmax, 10)
    values = [(10**start) * 0.1**i for i in range(len(xlab))]
    values = [(gmax) * 0.1**i for i in range(len(xlab))]
    values.reverse()
    legend.set_xticklabels(["%.2g" % k for k in values])
    j = 0
    for i, value in enumerate(values):
        value = (value / gmax)**(1 / float(args.scale))
        tileh = 1.0 if args.mode == "width" else value
        tilew = 1.0 if args.mode == "height" else value
        legend.add_patch(
            patches.Rectangle(
                (i - tilew / 2.0, j - tileh / 2.0),
                tilew,
                tileh,
                edgecolor="none",
                facecolor="0.5",
            ))
    # finalize
    fig.set_size_inches(
        c_colw * len(xlab) + c_rowlabw,
        c_rowh * (1 + len(ylab)) + c_collabh,
    )
    plt.tight_layout()
    plt.savefig(args.outfile)
Esempio n. 14
0
#! /usr/bin/env python

import os, sys, re, glob, argparse
from zopy.table2 import table
from zopy.dictation import col2dict
from zopy.utils import path2name

dictMap = col2dict( sys.argv[1], key=0, value=1 )
tableData = table( sys.argv[2] )
tableData.apply_colheads( lambda x: path2name( x ) )
tableData.apply_colheads( lambda x: x.split( "." )[0] )
tableData.apply_colheads( lambda x: dictMap[x] )
tableData.dump( sys.argv[3] )
Esempio n. 15
0
c_strColor2 = "0.60"
c_strBackgroundColor = "0.95"

# constants from args
c_pathPCL = args.input
c_astrSamples = funcArgPathCheck(args.samples)
c_astrClades = funcArgPathCheck(args.clades)
c_strScaling = args.scaling
c_afBins = funcArgPathCheck(args.bins)
if c_afBins is not None:
    c_afBins = [float(k) for k in c_afBins]
    c_afBins.sort()
c_astrHighlight = funcArgPathCheck(args.highlight)

# load and manipulate table
tableData = table(c_pathPCL)
tableData.float()
if c_astrClades is not None:
    tableData.grep("headers", c_astrClades)
if c_astrSamples is not None:
    tableData.select("headers", c_astrSamples, transposed=True)
dictCladeTables = tableData.groupify(
    lambda strRowhead: strRowhead.split("|")[0])

# override these lists based on what was in the table
c_astrClades = dictCladeTables.keys()
c_astrSamples = tableData.colheads[:]

# derived constants
c_iClades = len(c_astrClades)
c_iSamples = len(c_astrSamples)
Esempio n. 16
0
# argparse
# ---------------------------------------------------------------

parser = argparse.ArgumentParser()
parser.add_argument('--input', help='')
parser.add_argument('--output', help='')
parser.add_argument('--logmin', type=float, help='')
parser.add_argument('--logmax', type=float, help='')
args = parser.parse_args()

# ---------------------------------------------------------------
# main
# ---------------------------------------------------------------

# get and manipulate data
t = table(args.input)
t.apply_entries(lambda x: None if x == "" else x)
t.apply_entries(lambda x: float(x) if x is not None else None)

# derived features
aColors = [
    plt.cm.Dark2(i / float(len(t.colheads)))
    for i, colhead in enumerate(t.colheads)
]
aaData = [row for rowhead, row in t.iter_rows()]
logmin, logmax = args.logmin, args.logmax

# make plot
fig = plt.figure()
axes = plt.subplot(111)
stepplot(axes, aaData, colors=aColors)