Esempio n. 1
0
 def __init__(
     self,
     path,
     lastcol=None,
     lastrow=None,
     colmeta=None,
     rowmeta=None,
     eps_noise=False,
 ):
     self.tbl = table(path)
     self.bak = self.tbl.copy()
     # strip / save metadata
     if lastrow is not None:
         self.tbl.head(lastrow, invert=True)
     if lastcol is not None:
         self.tbl.head(lastcol, invert=True, transposed=True)
     self.row = self.tbl.rowheads[:]
     self.col = self.tbl.colheads[:]
     if eps_noise:
         self.tbl.float()
         self.tbl.apply_entries(lambda x: x + c_eps * random.random())
     self.dat = self.tbl.table2array()
     # colmetas from file / table
     if colmeta is None:
         self.colmeta = None
         self.colmetaname = None
     else:
         self.colmeta = []
         self.colmetaname = []
         for x in colmeta:
             if os.path.exists(x):
                 warn("Loading col metadata from file:", x)
                 temp = col2dict(x, value=1)
                 self.colmeta.append(
                     [temp.get(k, c_str_none) for k in self.col])
                 self.colmetaname.append(path2name(x))
             else:
                 temp = self.bak.rowdict(x)
                 self.colmeta.append(
                     [temp.get(k, c_str_none) for k in self.col])
                 self.colmetaname.append(x)
     # rowmetas from file / table
     if rowmeta is None:
         self.rowmeta = None
         self.rowmetaname = None
     else:
         self.rowmeta = []
         self.rowmetaname = []
         for x in rowmeta:
             if os.path.exists(x):
                 warn("Loading row metadata from file:", x)
                 temp = col2dict(x, value=1)
                 self.rowmeta.append(
                     [temp.get(k, c_str_none) for k in self.row])
                 self.rowmetaname.append(path2name(x))
             else:
                 temp = self.bak.coldict(x)
                 self.rowmeta.append(
                     [temp.get(k, c_str_none) for k in self.row])
                 self.rowmetaname.append(x)
Esempio n. 2
0
 def dict(self, key_fields, value_field, append=False):
     ret = {}
     lossy = False
     # generate keys
     key_fields = self.check(key_fields)
     if len(key_fields) == 1:
         keys = self.data[key_fields[0]]
     else:
         keys = [
             tuple_key for tuple_key in zip(*[self[k] for k in key_fields])
         ]
     # populate dictionary
     for k, v in zip(keys, self[value_field]):
         if append:
             ret.setdefault(k, []).append(v)
         elif k in ret and ret[k] != v:
             lossy = True
         else:
             ret[k] = v
     if lossy:
         warn("One to many mapping in <{}>: {} --> {}".format(
             self.source,
             key_fields,
             value_field,
         ))
     return ret
Esempio n. 3
0
def check( t ):
    failed = {}
    t.float( )
    # place samples on rows
    t.transpose( )
    for s, row in t.iter_rows( ):
        if sum( row ) == 0:
            failed[s] = "Sample with zero sum"
    t.delete( "headers", failed )
    array = t.table2array( )
    dists = spd.squareform( spd.pdist( array, "braycurtis" ) )
    median_dists = []
    indices = set( range( len( t.rowheads ) ) )
    for i, s in enumerate( t.rowheads ):
        others = list( indices - {i} )
        median_dists.append( median( dists[i][others] ) )
    q1, q2, q3 = mquantiles( median_dists )
    limit = q3 + 1.5 * (q3 - q1)
    qmin = min( median_dists )
    qmax = max( median_dists )
    summary = "Comparison summary: N=%d min=%.3f q1=%.3f q2=%.3f q3=%.3f uif=%.3f max=%.3f" % (
        len( t.rowheads ), qmin, q1, q2, q3, limit, qmax ) 
    if limit > 1:
        warn( "Upper inner fence exceeds theoretical limit: %.3f > %.3f" %
              ( limit, 1.0 ) )    
    for m, s in zip( median_dists, t.rowheads ):
        if m > limit:
            failed[s] = "Extreme distance: {}".format( m )
    return summary, failed
Esempio n. 4
0
def polymap(path, key=0, skip=None, headers=False, reverse=False, sets=False):
    """
    input like:
    key1 val1 val2 val3
    key2 val4 val5
    output like:
    [key][value]
    """
    if skip is None:
        skip = []
    skip.append(key)
    dd = {}
    for row in read_csv(path, headers):
        if key > len(row) - 1:
            warn("skipping unindexable short row:", row)
            continue
        inner = dd.setdefault(row[key], {})
        for i, item in enumerate(row):
            if i not in skip:
                inner[item] = c_default
    if reverse:
        dd2 = {}
        for key, inner in dd.items():
            for key2 in inner:
                dd2.setdefault(key2, {})[key] = c_default
        dd = dd2
    if sets:
        dd = {k: set(inner) for k, inner in dd.items()}
    return dd
Esempio n. 5
0
def get_args():
    """ """
    parser = argparse.ArgumentParser()
    parser.add_argument("obo", help="GO-provided obo file")
    parser.add_argument("--mapping",
                        default=None,
                        metavar="<path>",
                        help="mapping from genes to GO terms")
    parser.add_argument("--allowed-genes",
                        default=None,
                        metavar="<path>",
                        help="subset of allowed genes")
    parser.add_argument("--flip",
                        action="store_true",
                        help="mapping is from GO terms to genes")
    parser.add_argument(
        "--depth",
        type=int,
        default=None,
        metavar="<int>",
        help="trimming option: only terms at this min distance from root")
    parser.add_argument("--grep",
                        default=None,
                        metavar="<regex>",
                        help="trimming option: only terms matching this regex")
    parser.add_argument(
        "--prune",
        default=None,
        metavar="<term>",
        help="trimming option: only terms descending from this term (GO id)")
    parser.add_argument("--namespace",
                        default=None,
                        choices=["BP", "MF", "CC"],
                        nargs="+",
                        metavar="<BP/MF/CC>",
                        help="trimming option: only terms in this namespace")
    parser.add_argument(
        "--informative",
        default=None,
        metavar="<number OR fraction of genes>",
        help="trimming option: only terms that are informative at a given level"
    )
    parser.add_argument(
        "--ignore-progeny",
        action="store_true",
        help="do not let more specific annotations rise through the dag")
    parser.add_argument("--terms-only",
                        action="store_true",
                        help="just output the terms list (no annotations)")
    parser.add_argument("--outfile", default=None, help="output file")
    args = parser.parse_args()
    # warnings
    if args.ignore_progeny:
        warn(
            "Only considering direct annotations. Annotations will not rise through the DAG."
        )
    return args
Esempio n. 6
0
def file_order(path):
    warn("reading header order from", path)
    order = {}
    counter = 0
    with open(path) as fh:
        for line in fh:
            line = line.strip()
            order[line] = counter
            counter += 1
    return order
Esempio n. 7
0
 def transform(self, method):
     if method == "none":
         pass
     elif method == "sqrt":
         self.dat = np.sqrt(self.dat)
     elif "log" in method:
         eps = min([k for row in self.dat for k in row if k > 0])
         warn("Applying Laplace smoothing with constant:", eps)
         self.dat = np.log10(self.dat + eps)
         base = int(method.replace("log", ""))
         warn("Log transforming with base", base)
         self.dat /= np.log10(base)
Esempio n. 8
0
 def rowlabel(self, df, des, ax_label, scaled=False, path=None):
     ax_label = "{} (n={:,})".format(ax_label, len(df.row))
     # override row names from the table?
     mapping = {}
     if path is not None:
         mapping = col2dict(path, value=1)
     for l in df.row:
         if l not in mapping:
             mapping[l] = "" if path is not None else l
     if len(df.row) > c_max_lab and not scaled:
         warn("Too many row labels.")
         cx = sum(self.rownames.get_xlim()) / 2.0
         cy = sum(self.rownames.get_ylim()) / 2.0
         self.rownames.text(cx,
                            cy,
                            ax_label,
                            size=c_font3,
                            ha="center",
                            va="center",
                            rotation=90,
                            rotation_mode="anchor")
     else:
         ha = "left"
         xi, xf = self.rownames.get_xlim()
         if des.wmargin("heatmap") > des.wmargin("rownames"):
             ha = "right"
             xi, xf = xf, xi
         self.rownames.set_ylim(0, len(df.row))
         for i, l in enumerate(df.row):
             self.rownames.text(
                 xi,
                 i + 0.5,
                 mapping[l],
                 ha=ha,
                 va="center",
                 size=c_font1,
                 clip_on=False,
             )
         cx = xf
         cy = sum(self.rownames.get_ylim()) / 2.0
         self.rownames.text(cx,
                            cy,
                            ax_label,
                            size=c_font3,
                            ha="center",
                            va="bottom" if ha == "left" else "top",
                            rotation=90,
                            rotation_mode="anchor")
Esempio n. 9
0
 def collabel(self, df, des, ax_label, scaled=False, path=None):
     ax_label = "{} (n={:,})".format(ax_label, len(df.col))
     # override col names from the table?
     mapping = {}
     if path is not None:
         mapping = col2dict(path, value=1)
     for l in df.col:
         if l not in mapping:
             mapping[l] = "" if path is not None else l
     if len(df.col) > c_max_lab and not scaled:
         warn("Too many column labels.")
         cx = sum(self.colnames.get_xlim()) / 2.0
         cy = sum(self.colnames.get_ylim()) / 2.0
         self.colnames.text(cx,
                            cy,
                            ax_label,
                            size=c_font3,
                            ha="center",
                            va="center")
     else:
         ha = "left"
         yi, yf = self.colnames.get_ylim()
         if des.hmargin("heatmap") < des.hmargin("colnames"):
             yi, yf = yf, yi
             ha = "right"
         self.colnames.set_xlim(0, len(df.col))
         for i, l in enumerate(df.col):
             self.colnames.text(
                 i + 0.5,
                 yi,
                 mapping[l],
                 rotation=45,
                 rotation_mode="anchor",
                 va="center",
                 ha=ha,
                 size=c_font1,
                 clip_on=False,
             )
         cx = 0
         cy = yf
         self.colnames.text(
             cx,
             cy,
             ax_label,
             size=c_font3,
             ha="left",
             va="top" if ha == "left" else "bottom",
         )
Esempio n. 10
0
 def rowlabel(self, df, ax_label, scaled=False, path=None):
     ax_label = "{} (n={:,})".format(ax_label, len(df.row))
     # override row names from the table?
     mapping = {}
     if path is not None:
         mapping = col2dict(path, value=1)
     for l in df.row:
         if l not in mapping:
             mapping[l] = "" if path is not None else l
     if len(df.row) > c_max_lab and not scaled:
         warn("Too many row labels.")
         cx = sum(self.rownames.get_xlim()) / 2.0
         cy = sum(self.rownames.get_ylim()) / 2.0
         self.rownames.text(cx,
                            cy,
                            ax_label,
                            size=c_font3,
                            ha="center",
                            va="center",
                            rotation=90,
                            rotation_mode="anchor")
     else:
         self.rownames.set_ylim(0, len(df.row))
         for i, l in enumerate(df.row):
             self.rownames.text(
                 0,
                 i + 0.5,
                 mapping[l],
                 va="center",
                 size=c_font1,
                 clip_on=True,
             )
         cx = self.rownames.get_xlim()[1]
         cy = sum(self.rownames.get_ylim()) / 2.0
         self.rownames.text(cx,
                            cy,
                            ax_label,
                            size=c_font3,
                            ha="center",
                            va="bottom",
                            rotation=90,
                            rotation_mode="anchor")
Esempio n. 11
0
def read_stream(stream, stratified=False):
    total, bad = 0, 0
    data = {}
    for row in reader(stream):
        if not stratified:
            key, values = "#", row
        else:
            key, values = row[0], row[1:]
        total += len(values)
        values2 = []
        for v in values:
            try:
                values2.append(float(v))
            except:
                warn("bad value:", v)
                bad += 1
        inner = data.setdefault(key, [])
        inner += values2
    if bad > 0:
        warn("bad values: %d (%.2f%%)" % (bad, 100 * bad / float(total)))
    data = {k: v for k, v in data.items() if len(v) > 0}
    return data
Esempio n. 12
0
 def index(self):
     """ """
     self.shape = self.data.shape
     self.nrows, self.ncols = self.shape
     assert len(self.rowheads) == self.nrows, "Dimension issue"
     assert len(self.colheads) == self.ncols, "Dimension issue"
     self.rowmap = {}
     self.colmap = {}
     for i, value in enumerate(self.rowheads):
         while value in self.rowmap:
             warn("Duplicate rowhead", value, "defined at",
                  self.rowmap[value])
             value += "-dup"
             warn("  trying", value)
         self.rowmap[value] = i
     for j, value in enumerate(self.colheads):
         while value in self.colmap:
             warn("Duplicate colhead", value, "defined at",
                  self.colmap[value])
             value += "-dup"
             warn("  trying", value)
         self.colmap[value] = j
Esempio n. 13
0
 def rowsort(self, metric, linkage="average"):
     Z = None
     if metric == "none":
         order = range(len(self.row))
     elif metric == "names":
         order = sorted(range(len(self.row)),
                        key=lambda i: self.row[i],
                        reverse=True)
     elif metric == "mean":
         order = sorted(range(len(self.row)),
                        key=lambda i: np.mean(self.dat[i]))
     elif os.path.exists(metric):
         d = file_order(metric)
         if len(d) != len(self.row):
             warn("dimension mismatch with order in", metric, len(d),
                  len(self.row))
         order = [0 for k in self.row]
         for i, k in enumerate(self.row):
             if k in d:
                 order[d[k]] = i
             else:
                 warn("no position for", k, "in", metric)
     elif "metadata" in metric:
         index = 0 if ":" not in metric else (int(metric.split(":")[1]) - 1)
         values = self.rowmeta[index]
         order = sorted(range(len(values)),
                        key=lambda i: values[i],
                        reverse=True)
     elif metric == "spearman":
         temp = [rankdata(col) for col in self.dat.transpose()]
         temp = np.array(temp)
         try:
             Z = sch.linkage(temp.transpose(),
                             method=linkage,
                             metric="correlation")
             order = sch.leaves_list(Z)
         except:
             warn("Spearman clustering failed")
             Z = None
             order = range(len(self.row))
     else:
         Z = sch.linkage(self.dat, method=linkage, metric=metric)
         order = sch.leaves_list(Z)
     self.dat = self.dat[order, :]
     self.row = subseq(self.row, order)
     if self.rowmeta is not None:
         self.rowmeta = [subseq(x, order) for x in self.rowmeta]
     return Z
Esempio n. 14
0
def main():

    args = get_args()

    # load obo / report rel type
    obo = Ontology(args.obo)
    warn("Summary of relationship types:")
    for k in sorted(parentage_types):
        warn(k, parentage_types[k])

    # attach genes
    if args.mapping is not None:
        mapping = polymap(args.mapping, reverse=args.flip)
        if args.allowed_genes is not None:
            allowed = col2dict(args.allowed_genes)
            mapping = {k: v for k, v in mapping.items() if k in allowed}
        obo.attach_genes(mapping)
        warn("# of attached genes:", len(obo.attached_genes))

    # informative cut
    if args.informative is not None:
        threshold = float(args.informative)
        if threshold < 1:
            warn(
                "Intepretting informative cutoff as fraction of annotated genes"
            )
            threshold *= len(obo.attached_genes)
        threshold = int(threshold)
        obo.set_informative(threshold)
        for term in obo.iter_terms():
            if not term.is_informative:
                term.is_acceptable = False

    # pruning cut
    if args.prune is not None:
        obo.prune(args.prune)
        for term in obo.iter_terms():
            if not term.is_pruned:
                term.is_acceptable = False

    # depth cut
    if args.depth is not None:
        for term in obo.iter_terms():
            if term.depth != args.depth:
                term.is_acceptable = False

    # grep cut
    if args.grep is not None:
        for term in obo.iter_terms():
            if not re.search(args.grep, term.name):
                term.is_acceptable = False

    # namespace cut
    if args.namespace is not None:
        for term in obo.iter_terms():
            if term.namespace_short not in args.namespace:
                term.is_acceptable = False

    # output the new polymap
    fh = open(args.outfile, "w") if args.outfile is not None else sys.stdout
    for term in obo.iter_terms():
        if term.is_acceptable:
            outline = [str(term)]
            if not args.terms_only:
                outline += list(term.get_progeny_genes(
                ) if not args.ignore_progeny else term.genes)
            print >> fh, "\t".join(outline)
    fh.close()
Esempio n. 15
0
if len( args.tables ) == 1:
    t = table( p )
elif args.legacy:
    t = table( p )
    for p2 in args.tables[1:]:
        t2 = table( p2 )
        t.merge( t2 )
else:    
    data = {}
    for p in args.tables:
        d = table( p ).table2nesteddict( )
        for r in d:
            inner = data.setdefault( r, {} )
            for c in d[r]:
                if c in inner and inner[c] != d[r][c]:
                    warn( p, "overwrites", r, c, inner[c], "with", d[r][c] )
                inner[c] = d[r][c]
    t = nesteddict2table( data, empty=c_strNA )

if args.metatable is not None:
    t.metamerge( table( args.metatable ) )
if args.fill_empty is not None:
    t.apply_entries( lambda x: x if x != c_strNA else args.fill_empty )
    
# ---------------------------------------------------------------
# dump table
# ---------------------------------------------------------------

t.dump( )
Esempio n. 16
0
    parser.add_argument('--obo', default=None, help='')
    parser.add_argument('--map', default=None, help='')
    parser.add_argument('--depth', default=None, help='')
    parser.add_argument('--grep', default=None, nargs="+", help='')
    parser.add_argument( '--namespace', default=None, \
                         choices=["BP", "MF", "CC"], nargs="+", help='' )
    parser.add_argument('--informative', default=None, help='')
    parser.add_argument('--ignore_progeny', action="store_true", help='')
    parser.add_argument('--terms_only', action="store_true", help='')
    parser.add_argument('--output', default=None, help='')
    args = parser.parse_args()

    # warnings
    if args.ignore_progeny:
        warn(
            "Only considering direct annotations. Annotations will not rise through the DAG."
        )

    # load data (attach genes?)
    obo = Ontology(args.obo)
    if args.map is not None:
        obo.attach_genes(args.map)

    # set informative?
    if args.informative is not None:
        cutoff = float(args.informative)
        if cutoff < 1:
            print >> sys.stderr, "Intepretting informative cutoff as fraction of annotated genes"
            cutoff *= len(obo.attached_genes)
        cutoff = int(cutoff)
        obo.set_informative(cutoff)
Esempio n. 17
0
    def __init__(self, p_obo):

        # mapping from goid to term object
        self.terms = {}
        self.idmap = {}
        self.roots = []
        self.leaves = []
        self.attached_genes = set()

        # populate terms and lookup
        for term in OBOParser(p_obo).extract_terms():
            if not re.search(c_goid_pattern, term.goid):
                continue
            elif term.is_obsolete:
                if term.replaced_by is not None:
                    self.idmap[term.goid] = term.replaced_by
            else:
                self.terms[term.goid] = term
                self.idmap[term.goid] = term.goid
                for alt_id in term.alt_ids:
                    self.idmap[alt_id] = term.goid

        # populate parent/child relationships
        for cterm in self.terms.values():
            for parent_id in cterm.parent_ids:
                pterm = self.terms[parent_id]
                if ALLOW_CROSSTALK or (pterm.namespace == cterm.namespace):
                    cterm.parents.add(pterm)
                    pterm.children.add(cterm)
                else:
                    warn(
                        "parent->child spans namespaces (ignoring):",
                        "\n",
                        "\t",
                        pterm.__repr__(),
                        "\n",
                        "\t",
                        cterm.__repr__(),
                        "\n",
                    )
                    parentage_types["Cross-ontology [ignored]"] += 1

        # identify roots, leaves
        for termid, term in self.terms.items():
            if len(term.parents) == 0:
                term.is_root = True
                self.roots.append(term)
            if len(term.children) == 0:
                term.is_leaf = True
                self.leaves.append(term)

        # add depth information
        def recurse_depth(term):
            step_down = term.depth + 1
            for cterm in term.children:
                if ( cterm.depth is None ) or \
                   ( cterm.depth > step_down ):
                    cterm.depth = step_down
                    recurse_depth(cterm)

        for root in self.roots:
            root.depth = 0
            recurse_depth(root)
Esempio n. 18
0
def test_insert(d, key, value, verbose=True):
    """ inserts a key, value pair but warns if we're overwriting something """
    if verbose and key in d and d[key] != value:
        warn("Overwriting <{}>:<{}> with <{}>".format(key, d[key], value))
    d[key] = value
Esempio n. 19
0
lengths2 = []
d = {}
headers2 = None
with try_open(args.file2) as fh:
    for items in csv.reader(fh, dialect="excel-tab"):
        lengths2.append(len(items))
        if headers2 is None and args.head2:
            headers2 = c_sep.join(items)
            continue
        key = items[args.key2]
        d.setdefault(key, {})["\t".join(items)] = 1
print("finished loading file2", file=sys.stderr)

# make dummy line to add when join fails
if len(set(lengths2)) != 1:
    warn("file2 lines have unequal lengths")
    if args.het:
        dummyline2 = c_na
    else:
        die()
else:
    dummyline2 = "\t".join(c_na for k in range(lengths2[0]))
if not args.head2:
    headers2 = dummyline2

# load first file, print join
counts = Counter()
lengths1 = []
hits = {}
headers1 = None
with (try_open(args.file1) if args.file1 != "-" else sys.stdin) as fh:
Esempio n. 20
0
def main():

    args = get_args()
    fig = plt.figure()
    dims = Dimensions()
    df = DataFrame(
        args.table,
        lastcol=args.lastcol,
        lastrow=args.lastrow,
        colmeta=args.colmeta,
        rowmeta=args.rowmeta,
        eps_noise=args.eps_noise,
    )

    # force labeling all features
    if args.force_labels:
        global c_max_lab
        c_max_lab = 1e6

    # dim overrides
    vscale = 1
    hscale = 1
    if args.colmeta is not None and args.metascale:
        dims.colmeta_r = len(args.colmeta)
    if args.rowmeta is not None and args.metascale:
        dims.rowmeta_c = len(args.rowmeta)
    if args.vscale:
        old = dims.heat_r
        new = int(len(df.row) / 2.0) + len(df.row) % 2
        new = max(new, 8)
        dims.heat_r = new
        vscale = new / float(old)
    if args.hscale:
        old = dims.heat_c
        new = int(len(df.col) / 2.0) + len(df.col) % 2
        dims.heat_c = new
        new = max(new, 12)
        hscale = new / float(old)
    if args.cbar_extent is not None:
        dims.cbar_extent = args.cbar_extent
    if not args.debug:
        if args.title is None:
            dims.title_r = 0
        # no tree axes if last sort is on 1) file or 2) metadata or 3) nothing
        if os.path.exists( args.colsort[-1] ) \
           or re.search( "none|names|mean|metadata", args.colsort[-1] ):
            dims.coltree_r = 0
        if os.path.exists( args.rowsort[-1] ) \
           or re.search( "none|names|mean|metadata", args.rowsort[-1] ):
            dims.rowtree_c = 0
        if args.colmeta is None:
            dims.colmeta_r = 0
        if args.rowmeta is None:
            dims.rowmeta_c = 0
        if len(df.col) > c_max_lab and not args.hscale:
            dims.colnames_r = 1
        if len(df.row) > c_max_lab and not args.vscale:
            dims.rownames_c = 1
    dims.update()

    # manual overrides
    for o in args.overrides:
        p, v = o.split(":")
        v = int(v)
        setattr(dims, p, v)
    dims.update()

    # define figure
    fig.set_size_inches(
        args.hstretch * dims.csize * args.grid_inches / dims.scale,
        args.vstretch * dims.rsize * args.grid_inches / dims.scale)

    # setup axes
    axes = HeatmapAxes(dims)

    # cluster cols
    Z = None
    for metric in args.colsort:
        Z = df.colsort(metric, linkage=args.linkage)
    if Z is not None:
        sch.dendrogram( Z, ax=axes.coltree, \
                        above_threshold_color="0.75",
                        color_threshold=0, )

    # cluster rows
    Z = None
    for metric in args.rowsort:
        Z = df.rowsort(metric, linkage=args.linkage)
    if Z is not None:
        sch.dendrogram( Z, ax=axes.rowtree, orientation="left", \
                        above_threshold_color="0.75",
                        color_threshold=0, )

    # apply transform
    df.transform(args.transform)

    # check limits
    poormin = False
    poormax = False
    vmin, vmax = (None, None) if args.limits is None else args.limits
    dmin, dmax = np.min(df.dat), np.max(df.dat)
    if vmin is None:
        vmin = dmin
    elif dmin < vmin:
        poormin = True
        n, p = acheck(df.dat, lambda x: x < vmin)
        warn("{} values ({:.2f}%) < vmin ({}), extreme: {}".format(
            n, 100 * p, vmin, dmin))
    if vmax is None:
        vmax = dmax
    elif dmax > vmax:
        poormax = True
        n, p = acheck(df.dat, lambda x: x > vmax)
        warn("{} values ({:.2f}%) > vmax ({}), extreme: {}".format(
            n, 100 * p, vmax, dmax))

    # add heatmap
    axes.heatmap.set_xlim(0, len(df.col))
    axes.heatmap.set_ylim(0, len(df.row))
    # imshow is similar to pcolorfast, but better centered
    if args.engine == "imshow":
        nr = len(df.row)
        nc = len(df.col)
        kwargs = {
            "interpolation": "none",
            "origin": "lower",
            "aspect": "auto",
            "extent": [0, nc, 0, nr]
        }
        pc = axes.heatmap.imshow(df.dat,
                                 cmap=args.cmap,
                                 vmin=vmin,
                                 vmax=vmax,
                                 **kwargs)
    # probably no reason to use this
    elif args.engine == "pcolorfast":
        pc = axes.heatmap.pcolorfast(df.dat,
                                     cmap=args.cmap,
                                     vmin=vmin,
                                     vmax=vmax)
    # use this if you want the individual heatmap cells to be editable shapes
    elif args.engine == "pcolormesh":
        pc = axes.heatmap.pcolormesh(df.dat,
                                     cmap=args.cmap,
                                     vmin=vmin,
                                     vmax=vmax)

    # add cmap bar
    fig.colorbar(pc, cax=axes.cbar)
    axes.cbar.set_ylabel( args.units if args.transform == "none" else \
                          "{}( {} )".format( args.transform, args.units ), size=c_font3 )
    set_cbar_ticks(axes.cbar, pc.get_clim(), poormin=poormin, poormax=poormax)

    # add column metadata
    if df.colmeta is not None:
        colmeta_cmaps = axes.colmetaplot(df, args.colmeta_colors,
                                         args.max_levels)

    # add row metadata
    if df.rowmeta is not None:
        rowmeta_cmaps = axes.rowmetaplot(df, args.rowmeta_colors,
                                         args.max_levels)

    # column transition lines
    if "metadata" in args.colsort[-1]:
        args.colbreaks = args.colsort[-1]
    if args.colbreaks is not None:
        lastsort = args.colbreaks
        index = 0 if ":" not in lastsort else \
                ( int( lastsort.split( ":" )[1] ) - 1 )
        pos = []
        for i, value in enumerate(df.colmeta[index]):
            if i > 0 and df.colmeta[index][i - 1] != value:
                pos.append(i)
        for i in pos:
            mu.vline(axes.colmeta, i, color="black")
            mu.vline(axes.heatmap, i, color=args.break_color)

    # add row transition lines if ending on a metasort
    if "metadata" in args.rowsort[-1]:
        args.rowbreaks = args.rowsort[-1]
    if args.rowbreaks is not None:
        lastsort = args.rowbreaks
        index = 0 if ":" not in lastsort else \
                ( int( lastsort.split( ":" )[1] ) - 1 )
        pos = []
        for i, value in enumerate(df.rowmeta[index]):
            if i > 0 and df.rowmeta[index][i - 1] != value:
                pos.append(i)
        for i in pos:
            mu.hline(axes.rowmeta, i, color="black")
            mu.hline(axes.heatmap, i, color=args.break_color)

    # add generic grids
    if "x" in args.grid:
        for i in range(1, len(df.col)):
            mu.vline(axes.heatmap, i, color=args.break_color)
    if "y" in args.grid:
        for i in range(1, len(df.row)):
            mu.hline(axes.heatmap, i, color=args.break_color)

    # title
    if args.title is not None:
        axes.set_title(args.title)

    # add dots
    dots_added = []
    if args.dots is not None:
        for p in args.dots:
            dots_added.append(add_dots(axes, df, p))

    # legend
    L = mu.Legendizer(axes.legend, vscale=0.7 / vscale)
    # col sort legend
    L.subhead("Col sort")
    for m in args.colsort:
        if "metadata" in m:
            i = 0
            if ":" in m:
                i = int(m.split(":")[1]) - 1
            m = "metadata:" + df.colmetaname[i]
        L.element("_", color="0.75", label=m)
    # row sort legend
    L.subhead("Row sort")
    for m in args.rowsort:
        if "metadata" in m:
            i = 0
            if ":" in m:
                i = int(m.split(":")[1]) - 1
            m = "metadata:" + df.rowmetaname[i]
        L.element("_", color="0.75", label=m)
    # col metadata legend
    levelorder = {c_str_other: 1, c_str_none: 2}
    if df.colmeta is not None:
        for n, c in zip(df.colmetaname[::-1], colmeta_cmaps[::-1]):
            L.subhead(n)
            for l in sorted(c, key=lambda x: [levelorder.get(x, 0), x]):
                color = c[l]
                L.element("s", color=color, label=l)
    # row metadata legend
    if df.rowmeta is not None:
        for n, c in zip(df.rowmetaname[::-1], rowmeta_cmaps[::-1]):
            L.subhead(n)
            for l in sorted(c, key=lambda x: [levelorder.get(x, 0), x]):
                color = c[l]
                L.element("s", color=color, label=l)
    if len(dots_added) > 0:
        L.subhead("Dots")
        for p, kwargs in dots_added:
            marker = kwargs.get("marker", "o")
            kwargs = {
                k: v
                for k, v in kwargs.items() if k not in "s marker".split()
            }
            L.element(marker, label=path2name(p), **kwargs)
    # finalize
    L.draw()

    # cleanup
    if args.override_colnames is not "-":
        axes.collabel(df,
                      args.collabel,
                      scaled=args.hscale,
                      path=args.override_colnames)
    if args.override_rownames is not "-":
        axes.rowlabel(df,
                      args.rowlabel,
                      scaled=args.vscale,
                      path=args.override_rownames)
    if not args.debug:
        axes.clean()
    plt.subplots_adjust(wspace=0.3, hspace=0.3)
    plt.savefig(args.output, bbox_inches="tight")

    # logging
    if args.dump_colsort_order:
        with open(args.output + ".colsort", "w") as fh:
            for item in df.col:
                print >> fh, item
    if args.dump_rowsort_order:
        with open(args.output + ".rowsort", "w") as fh:
            for item in df.row:
                print >> fh, item