def getfeat1(label, n): if n.kind == 'call': (data, _, _) = n.data.partition(' ') (klass, name, func) = parsemethodname(data) return f'{label},{n.kind},{name}' elif n.kind == 'new': (data, _, _) = n.data.partition(' ') (klass, name, func) = parsemethodname(data) return f'{label},{n.kind},{klass.name}' elif n.kind in TYPEOPS: (_, typ) = DFType.parse(n.data) return f'{label},{n.kind},{typ.get_name()}' elif n.data is None: return f'{label},{n.kind}' else: return f'{label},{n.kind},{n.data}'
def f(group, level=1): h = ' '*level (klass,name,func) = parsemethodname(group.method.name) if group.children: out.write(h+f'subgraph {q("cluster_"+str(group.gid))} {{\n') out.write(h+f' label={q(stripid(klass.name)+"."+name)};\n') vin = group.vin vout = group.vout out.write(h+f' V{vin.vid} [label={q("enter")}];\n') out.write(h+f' V{vout.vid} [label={q("exit")}];\n') for vtx in vin.linkto: outedges.append((vin,vtx)) for vtx in vout.linkto: outedges.append((vout,vtx)) for g in group.children: f(g, level+1) out.write(h+'}\n') else: vin = group.vin vout = group.vout out.write(h+f'V{vin.vid} [shape=box, label={q(name)}];\n') for vtx in vin.linkto: if vtx is not vout: outedges.append((vin,vtx)) for vtx in vout.linkto: if vtx is not vout: outedges.append((vin,vtx)) return
def shownode(n): if not n.kind: return '<empty>' elif n.kind == 'call': (data,_,_) = n.data.partition(' ') (klass,name,func) = parsemethodname(data) return f'<{n.kind} {name}()>' elif n.kind == 'new': (data,_,_) = n.data.partition(' ') (klass,name,func) = parsemethodname(data) return f'<{n.kind} {klass.name}>' elif n.kind in REFS: return f'<{n.kind} {parserefname(n.ref)}>' elif n.kind in TYPEOPS: (_,klass) = DFType.parse(n.data) return f'<{n.kind} {klass.name}>' elif n.data is None: return f'<{n.kind}>' else: return f'<{n.kind} {n.data}>'
def main(argv): import fileinput import getopt def usage(): print( f'usage: {argv[0]} [-H] [-o output] [-n name] [-h nid] [graph ...]' ) return 100 try: (opts, args) = getopt.getopt(argv[1:], 'Ho:h:n:') except getopt.GetoptError: return usage() html = False output = sys.stdout highlight = None names = None for (k, v) in opts: if k == '-H': html = True elif k == '-o': output = open(v, 'w') html = v.endswith('.html') elif k == '-h': highlight = set((int(nid) for nid in v.split(','))) elif k == '-n': names = [v] if not args: args.append('-') methods = [] for path in args: for method in get_graphs(path): if names and method.name not in names: continue methods.append(method) if html: output.write('<!DOCTYPE html><html><body>\n') for data in run_dot(methods): output.write('<div>\n') output.write(data) output.write('</div><hr>\n') output.write('</body>') else: for method in methods: if method.root is None: continue (klass, name, func) = parsemethodname(method.name) write_gv(output, method.root, highlight=highlight, name=f'{klass}.{name}') output.close() return 0
def show(count, r, level=0): cpt = r[0] try: (klass,name,func) = parsemethodname(cpt.nodes[0].name) except ValueError: return h = ' '*level if count[cpt] < 2: print(h+f'{level} {name}') for c in r[1:]: show(count, c, level+1) else: print(h+f'{level} {name} ...') return
def getfeatext(label, n): if n.is_funcall(): (data, _, _) = n.data.partition(' ') (klass, name, func) = parsemethodname(data) return f'{label},{n.kind},{name}' elif n.kind in REFS or n.kind in ASSIGNS: return f'{label},{n.kind},{parserefname(n.ref)}' elif n.kind == 'value' and n.ntype == 'Ljava/lang/String;': return f'{label},{n.kind},STRING' elif n.kind in TYPEOPS: (_, typ) = DFType.parse(n.data) return f'{label},{n.kind},{typ.get_name()}' elif n.kind in EXTOPS: return f'{label},{n.kind},{n.data}' else: return None
def getfeat1(n): while True: if len(n.inputs) == 1 and n.kind in IGNORED: (n, ) = n.inputs.values() else: break if n.kind in VALUES: return (n, '#const') elif n.kind in REFS and not n.ref.startswith('@'): name = parserefname(n.ref) return (n, name) elif n.kind == 'call': methods = n.data.split() (_, name, _) = parsemethodname(methods[0]) return (n, '()' + name) else: return (n, None)
def main(argv): import getopt def usage(): print(f'usage: {argv[0]} ' '[-d] [-n limit] ' '[graph ...]') return 100 try: (opts, args) = getopt.getopt(argv[1:], 'dn:') except getopt.GetoptError: return usage() debug = 0 limit = 10 for (k, v) in opts: if k == '-d': debug += 1 elif k == '-n': limit = int(v) words = {} for path in args: for method in get_graphs(path): (klass, name, func) = parsemethodname(method.name) if name.startswith('<'): continue for (pos, w) in postag(reversed(splitwords(name))): if pos in words: d = words[pos] else: d = words[pos] = {} d[w] = d.get(w, 0) + 1 print('counts', {pos: sum(d.values()) for (pos, d) in words.items()}) print('words', {pos: len(d) for (pos, d) in words.items()}) for (pos, d) in sorted(words.items(), key=lambda x: len(x[1]), reverse=True): print(pos) a = sorted(d.items(), key=lambda x: x[1], reverse=True) if 0 < limit: a = a[:limit] for (w, n) in a: print(f' {n} {w}') return 0
def run_dot(methods, type='svg'): args = ['dot', '-T' + type] logging.info(f'run_dot: {args!r}') data = io.StringIO() for method in methods: if method.root is None: continue (klass, name, func) = parsemethodname(method.name) write_gv(data, method.root, name=name) p = Popen(args, stdin=PIPE, stdout=PIPE, encoding='utf-8') (stdout, _) = p.communicate(data.getvalue()) a = [] lines = [] for line in stdout.splitlines(): if line.startswith('<?'): if lines: a.append(''.join(lines)) lines = [] continue lines.append(line) if lines: a.append(''.join(lines)) return a
def main(argv): import getopt def usage(): print(f'usage: {argv[0]} [-d] [-M maxoverrides] [graph ...]') return 100 try: (opts, args) = getopt.getopt(argv[1:], 'dM:') except getopt.GetoptError: return usage() level = logging.INFO maxoverrides = 1 for (k, v) in opts: if k == '-d': level = logging.DEBUG elif k == '-M': maxoverrides = int(v) if not args: return usage() logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=level) builder = IDFBuilder(maxoverrides=maxoverrides) for path in args: logging.info(f'Loading: {path!r}...') builder.load(path) if 0: # list all the methods and number of its uses. (being called) for method in builder.methods: mname = method.name if mname not in builder.funcalls: continue try: (klass,name,func) = parsemethodname(mname) n = len(builder.funcalls[mname]) print(n, name) except ValueError: pass return def getcallers(callee): if callee.name not in builder.funcalls: return [] return set( node.method for node in builder.funcalls[callee.name] ) (cpts, _) = SCC.fromnodes(builder.methods, getcallers) def visit(count, cpt): if cpt not in count: count[cpt] = 0 count[cpt] += 1 if 2 <= count[cpt]: return [cpt] else: return [cpt] + [ visit(count, c) for c in cpt.linkfrom ] def show(count, r, level=0): cpt = r[0] try: (klass,name,func) = parsemethodname(cpt.nodes[0].name) except ValueError: return h = ' '*level if count[cpt] < 2: print(h+f'{level} {name}') for c in r[1:]: show(count, c, level+1) else: print(h+f'{level} {name} ...') return for cpt in cpts: if cpt.linkto: continue count = {} r = visit(count, cpt) if len(r) < 2: continue show(count, r) print() return 0
def main(argv): import fileinput import getopt def usage(): print(f'usage: {argv[0]} [-d] [-o output] [-f method] [-M maxoverrides] [-L maxlevel] [graph ...]') return 100 try: (opts, args) = getopt.getopt(argv[1:], 'do:f:M:L:') except getopt.GetoptError: return usage() level = logging.INFO outpath = None maxoverrides = 1 maxlevel = 5 targets = {'main'} for (k, v) in opts: if k == '-d': level = logging.DEBUG elif k == '-o': outpath = v elif k == '-f': targets.update(v.split(',')) elif k == '-M': maxoverrides = int(v) elif k == '-L': maxlevel = int(v) if not args: return usage() logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=level) out = sys.stdout if outpath is not None: out = open(outpath, 'w') methods = [] name2method = {} for path in args: logging.info(f'Loading: {path}...') for method in get_graphs(path): if method.style == 'initializer': continue methods.append(method) name2method[method.name] = method def trace(method, prevs, cc=None): logging.info(f'trace {method}') group = Group(method) vout = Vertex(group) group.vout = vout for vtx in prevs: vout.connect(vtx) edges = { 'in': set() } edgefunc = (lambda n0: ( n1 for (x,n1) in n0.inputs.items() if x.startswith('_') )) for n0 in topsort(method, edgefunc): if n0 in edges: p = edges[n0] else: p = set([vout]) if n0.is_funcall() and (maxlevel == 0 or Cons.len(cc) < maxlevel): funcs = n0.data.split() a = [] for name in funcs[:maxoverrides]: if name not in name2method: continue callee = name2method[name] if cc is not None and callee in cc: continue vtx = trace(callee, p, Cons(method, cc)) group.add(vtx.group) a.append(vtx) if a: p = set(a) if n0.inputs: for (label,n1) in n0.inputs.items(): if label.startswith('_'): continue if n1 in edges: edges[n1].update(p) else: edges[n1] = p.copy() else: edges['in'].update(p) vin = Vertex(group) group.vin = vin for vtx in edges['in']: vin.connect(vtx) return vin groups = [] for method in methods: # Filter "top-level" methods only which aren't called by anyone else. if method.callers: continue (klass,name,func) = parsemethodname(method.name) #if (name not in targets) and (method.name not in targets): continue vtx = trace(method, []) groups.append(vtx.group) #break outedges = [] out.write(f'digraph {q(path)} {{\n') def f(group, level=1): h = ' '*level (klass,name,func) = parsemethodname(group.method.name) if group.children: out.write(h+f'subgraph {q("cluster_"+str(group.gid))} {{\n') out.write(h+f' label={q(stripid(klass.name)+"."+name)};\n') vin = group.vin vout = group.vout out.write(h+f' V{vin.vid} [label={q("enter")}];\n') out.write(h+f' V{vout.vid} [label={q("exit")}];\n') for vtx in vin.linkto: outedges.append((vin,vtx)) for vtx in vout.linkto: outedges.append((vout,vtx)) for g in group.children: f(g, level+1) out.write(h+'}\n') else: vin = group.vin vout = group.vout out.write(h+f'V{vin.vid} [shape=box, label={q(name)}];\n') for vtx in vin.linkto: if vtx is not vout: outedges.append((vin,vtx)) for vtx in vout.linkto: if vtx is not vout: outedges.append((vin,vtx)) return for group in groups: f(group) for (v0,v1) in outedges: out.write(f' V{v0.vid} -> V{v1.vid};\n') out.write('}\n') return 0
def main(argv): global debug global check import fileinput import getopt def usage(): print( f'usage: {argv[0]} [-d] [-o output] [-M maxoverrides] [-E] [graph ...]' ) return 100 try: (opts, args) = getopt.getopt(argv[1:], 'do:M:E') except getopt.GetoptError: return usage() outpath = None maxoverrides = 1 minlen = 2 check = check_equiv for (k, v) in opts: if k == '-d': debug += 1 elif k == '-o': outpath = v elif k == '-M': maxoverrides = int(v) elif k == '-E': check = check_equiv elif k == '-A': check = check_any if not args: return usage() out = sys.stdout if outpath is not None: out = open(outpath, 'w') builder = IDFBuilder(maxoverrides=maxoverrides) for path in args: print(f'Loading: {path}...', file=sys.stderr) builder.load(path) builder.run() nfuncalls = sum(len(a) for a in builder.funcalls.values()) print( f'Read: {len(builder.srcmap)} sources, {len(builder.methods)} methods, {nfuncalls} funcalls, {len(builder.vtxs)} IPVertexes', file=sys.stderr) # Enumerate all the assignments. links = {} for method in builder.methods: (klass, name, func) = parsemethodname(method.name) if debug: print(f'method: {method.name}', file=sys.stderr) for node in method: if not node.inputs: continue for (ref1, ref0, chain) in trace(builder.vtxs[node]): if ref1 == ref0: continue k = (ref1, ref0) if k in links and clen(links[k]) <= clen(chain): continue links[k] = chain if debug: print(f'{ref1!r} <- {ref0!r}') print(f'links: {len(links)}', file=sys.stderr) srcs = {} dsts = {} for ((ref1, ref0), chain) in links.items(): if ref1 == ref0: continue #print(ref1, '=', ref0, [ v.node.kind for v in chain ]) if ref1 in srcs: a = srcs[ref1] else: a = srcs[ref1] = set() a.add(ref0) if ref0 in dsts: a = dsts[ref0] else: a = dsts[ref0] = set() a.add(ref1) print() nodes = {} def getnode(ref): if ref in nodes: n = nodes[ref] else: n = nodes[ref] = Node(ref) return n # ref = {src, ...} :: ref is supertype of srcs: src -> ref. for (ref, a) in srcs.items(): if len(a) == 1: continue n = getnode(ref) for src in a: getnode(src).link(n) # {dst, ...} = ref :: ref is supertype of dsts: dst -> ref. for (ref, a) in dsts.items(): if len(a) == 1: continue n = getnode(ref) for dst in a: getnode(dst).link(n) (sc, cpts) = Component.fromnodes(nodes.values()) def disp(c, level=0): print(' ' * level, c) for s in c.linkfrom: assert c in s.linkto disp(s, level + 1) return for c in cpts: if c.linkto: continue disp(c) return for (ref, a) in srcs.items(): a = set(map(stripid, a)) if len(a) == 1: continue print(ref, '=', a) print() for (ref, a) in dsts.items(): a = set(map(stripid, a)) if len(a) == 1: continue print(a, '=', ref) print() pairs = [] for (name, a) in srcs.items(): if len(a) < 2: continue if name not in dsts: continue b = dsts[name] if len(b) < 2: continue pairs.append((name, a, b)) pairs.sort(key=lambda x: len(x[1]) * len(x[2]), reverse=True) for (name, a, b) in pairs: print(name, a, b) return 0
def showcall(n0, n1): if n0.method != n1.method: (klass,name,func) = parsemethodname(n1.method.name) return f'[{name}]' else: return ''
def write_gv(out, scope, highlight=None, level=0, name=None): h = ' ' * level if name is None: name = scope.name.split('.')[-1] if level == 0: out.write(f'digraph {q(name)} {{\n') else: out.write(h + f'subgraph {q("cluster_"+name)} {{\n') out.write(h + f' label={q(name)};\n') nodes = {-1: [], 0: [], 1: []} for node in scope.nodes: rank = 0 kind = node.kind styles = {'label': kind} if kind in ('join', 'begin', 'end', 'repeat', 'case'): styles['shape'] = 'diamond' if node.ref is not None: styles['label'] = f'{kind} ({parserefname(node.ref)})' elif kind in ('value', 'valueset'): styles['shape'] = 'box' styles['fontname'] = 'courier' styles['label'] = repr(node.data) elif kind in ('input', 'output', 'receive'): if node.ref is not None: styles['label'] = f'{kind} ({parserefname(node.ref)})' if kind == 'input': rank = -1 elif kind == 'output': rank = +1 elif kind in ('passin', 'passout'): styles['style'] = 'dotted' if kind == 'passin': rank = -1 elif kind == 'passout': rank = +1 elif kind == 'new': (_, klass) = DFType.parse(node.ntype) styles['shape'] = 'box' styles['style'] = 'rounded' styles['fontname'] = 'courier' styles['label'] = f'new {klass.name}' elif kind == 'call': (klass, name, func) = parsemethodname(node.data) styles['shape'] = 'box' styles['style'] = 'rounded' styles['fontname'] = 'courier' styles['label'] = f'{name}()' elif kind is not None and kind.startswith('op_'): styles['fontname'] = 'courier' styles['label'] = (node.data or kind) elif kind is not None: if node.ref is not None: styles['label'] = f'{kind} ({parserefname(node.ref)})' else: if node.ref is not None: styles['label'] = f'({parserefname(node.ref)})' if highlight is not None and node.nid in highlight: styles['style'] = 'filled' nodes[rank].append((node, styles)) for (rank, a) in nodes.items(): if not a: continue if rank < 0: out.write(h + 'subgraph { rank=source;\n') elif 0 < rank: out.write(h + 'subgraph { rank=sink;\n') for (node, styles) in a: out.write(h + f' N{r(node.nid)} [{qp(styles)}];\n') if rank != 0: out.write(h + '}\n') for child in scope.children: write_gv(out, child, highlight, level=level + 1) if level == 0: for node in scope.walk(): for (label, src) in node.inputs.items(): if not label: styles = {} elif label == 'cond': styles = {'style': 'dotted', 'label': label} elif label == '_end': styles = {'style': 'dashed', 'constraint': 'false'} elif label.startswith('_'): continue else: styles = {'label': label} out.write( h + f' N{r(src.nid)} -> N{r(node.nid)} [{qp(styles)}];\n') out.write(h + '}\n') return
def main(argv): import fileinput import getopt def usage(): print(f'usage: {argv[0]} [-d] [-o output] [-M maxoverrides] [-r ratio] [-f method] [graph ...]') return 100 try: (opts, args) = getopt.getopt(argv[1:], 'do:M:r:f:') except getopt.GetoptError: return usage() level = logging.INFO outpath = None maxoverrides = 1 ratio = 0.9 maxfan = 5 methods = {'main'} for (k, v) in opts: if k == '-d': level = logging.DEBUG elif k == '-o': outpath = v elif k == '-M': maxoverrides = int(v) elif k == '-r': ratio = float(v) elif k == '-f': methods.update(v.split(',')) if not args: return usage() logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=level) out = sys.stdout if outpath is not None: out = open(outpath, 'w') builder = IDFBuilder(maxoverrides=maxoverrides) for path in args: logging.info(f'Loading: {path}...') builder.load(path) builder.run() nfuncalls = sum( len(a) for a in builder.funcalls.values() ) logging.info(f'Read: {len(builder.srcmap)} sources, {len(builder.methods)} methods, {nfuncalls} funcalls, {len(builder.vtxs)} IPVertexes') # Enumerate all the flows. irefs = set() ctx2iref = {} nlinks = 0 for method in builder.methods: # Filter "top-level" methods only which aren't called by anyone else. if method.callers: continue (klass,name,func) = parsemethodname(method.name) if name == ':clinit:': continue if name in methods: methods.remove(name) elif method.name in methods: methods.remove(method.name) else: logging.info(f'ignored: {method.name}') continue logging.info(f'method: {method.name}') for node in method: vtx = builder.vtxs[node] if vtx.outputs: continue # Filter the last nodes (no output) only. for (refsrc, refdst) in enumflow(ctx2iref, vtx): # refsrc -> refdst assert refsrc is not None if refdst is None: continue if refdst == refsrc: continue refsrc.connect(refdst) irefs.add(refsrc) irefs.add(refdst) nlinks += 1 logging.info(f'irefs: {len(irefs)}') logging.info(f'links: {nlinks}') # Discover strong components. (allcpts, ref2cpt) = SCC.fromnodes(irefs, lambda iref: iref.linkto) logging.info(f'allcpts: {len(allcpts)}') # Discover the most significant edges. incount = {} incoming = {} outcount = {} outgoing = {} for cpt in allcpts: incount[cpt] = len(cpt.linkfrom) incoming[cpt] = 0 if cpt.linkfrom else 1 outcount[cpt] = len(cpt.linkto) outgoing[cpt] = 0 if cpt.linkto else 1 def count_forw(cpt0): for cpt1 in cpt0.linkto: assert cpt0 is not cpt1 incount[cpt1] -= 1 incoming[cpt1] += incoming[cpt0] if incount[cpt1] == 0: count_forw(cpt1) return def count_bacj(cpt1): for cpt0 in cpt1.linkfrom: assert cpt0 is not cpt1 outcount[cpt0] -= 1 outgoing[cpt0] += outgoing[cpt1] if outcount[cpt0] == 0: count_bacj(cpt0) return for cpt in allcpts: if not cpt.linkfrom: count_forw(cpt) if not cpt.linkto: count_bacj(cpt) maxcount = 0 for cpt0 in allcpts: for cpt1 in cpt0.linkto: count = incoming[cpt0] + outgoing[cpt1] if maxcount < count: maxcount = count if len(cpt0) < 2: continue if level == logging.DEBUG: logging.debug(f'cpt: {cpt0}') for iref in cpt0: logging.debug(f' {iref!r}') logging.info(f'maxcount: {maxcount}') # Traverse the edges. maxlinks = set() def trav_forw(cpt0): linkto = sorted(cpt0.linkto, key=lambda cpt1: outgoing[cpt1], reverse=True) for cpt1 in linkto[:maxfan]: maxlinks.add((cpt0, cpt1)) trav_forw(cpt1) return def trav_back(cpt1): linkfrom = sorted(cpt1.linkfrom, key=lambda cpt0: incoming[cpt0], reverse=True) for cpt0 in linkfrom[:maxfan]: maxlinks.add((cpt0, cpt1)) trav_back(cpt0) return for cpt0 in allcpts: for cpt1 in cpt0.linkto: count = incoming[cpt0] + outgoing[cpt1] if ratio*maxcount <= count: maxlinks.add((cpt0, cpt1)) trav_back(cpt0) trav_forw(cpt1) logging.info(f'maxlinks: {len(maxlinks)}') maxcpts = set( cpt0 for (cpt0,_) in maxlinks ) maxcpts.update( cpt1 for (_,cpt1) in maxlinks ) logging.info(f'maxcpts: {len(maxcpts)}') # Generate a trimmed graph. out.write(f'digraph {q(path)} {{\n') for cpt in maxcpts: out.write(f' N{cpt.cid} [label={q(str(cpt))}, fontname="courier"];\n') for (cpt0,cpt1) in maxlinks: out.write(f' N{cpt0.cid} -> N{cpt1.cid};\n') out.write('}\n') return 0