def trace(r, vtx0, srcs=None, chain=None): if chain is not None and maxlen <= len(chain): return node = vtx0.node if srcs is not None and node in srcs: return srcs = Cons(node, srcs) #print(' '*level, node.name, node.kind, node.ref, node.data) if node in r: chains = r[node] else: chains = r[node] = [] if maxchains <= len(chains): return chains.append(chain) if chain is None: n0 = None else: n0 = chain.car for (label, vtx1) in vtx0.outputs: if label.startswith('_'): continue n1 = vtx1.node if n1.kind == 'call' and not label.startswith('#'): continue feats = getfeats(n0, label, n1) if feats: for feat in feats: trace(r, vtx1, srcs, Cons((feat, n1), chain)) else: trace(r, vtx1, srcs, chain) return
def enum_forw(r, v0, srcs=None, feats=None, maxlen=5, maxfeats=32): # prevent loop. if srcs is not None and v0 in srcs: return srcs = Cons(v0, srcs) # limit width. if v0 in r: a = r[v0] else: a = r[v0] = [] if maxfeats <= len(a): return a.append(feats) # limit depth. if maxlen <= clen(feats): return n0 = v0.node for (l1,v1) in v0.outputs: if l1.startswith('_'): continue n1 = v1.node if n1.is_funcall() and not l1.startswith('#'): continue if is_ignored(n1): enum_forw(r, v1, srcs, feats, maxlen) else: for feat1 in getfeats(l1, n1, n1.inputs.items(), n0): f1 = Cons((feat1, n1), feats) enum_forw(r, v1, srcs, f1, maxlen) return
def traceout(mdist, nexts, v0, dist=(0, 0), chain=None): if chain is not None and v0 in chain: return if v0 in mdist and dist <= mdist[v0][0]: return chain = Cons(v0, chain) mdist[v0] = (dist, chain) (a, n) = dist if v0.node.kind == 'assign_var': a += 1 n += 1 dist = (a, n) if v0.node.kind == 'output': assert nexts is not None caller = nexts.car nexts = nexts.cdr for (label, v1) in v0.outputs: if label.startswith('_'): continue if v1.node.kind == 'input': continue if v1.node.graph is not caller: continue traceout(mdist, nexts, v1, dist, chain) else: for (label, v1) in v0.outputs: if label.startswith('_'): continue if v1.node.kind == 'input': continue traceout(mdist, nexts, v1, dist, chain) return
def enum_back(vtx, feats0=None, chain=None, maxlen=5): if chain is not None and vtx in chain: return if feats0 is not None and maxlen < len(feats0): return chain = Cons(vtx, chain) for (label,v) in vtx.inputs: if label.startswith('_'): continue if vtx.node.is_funcall() and not label.startswith('#arg'): continue if vtx.node.kind == 'receive' and label != 'return': continue feats = feats0 feat1 = getfeat(v.node, label, vtx.node) if feat1 is not None: feats = Cons((feat1, v.node), feats0) yield feats for z in enum_back(v, feats, chain, maxlen): yield z return
def tracein(mdist, v0, dist=(0, 0), chain=None): if chain is not None and v0 in chain: return if v0 in mdist and dist <= mdist[v0][0]: return chain = Cons(v0, chain) mdist[v0] = (dist, chain) (a, n) = dist if v0.node.kind == 'assign_var': a += 1 n += 1 dist = (a, n) for (label, v1) in v0.outputs: if label.startswith('_'): continue if v1.node.kind == 'output': continue tracein(mdist, v1, dist, chain) return
def trace(v1, ref0=None, done=None): if done is not None and v1 in done: return done = Cons(v1, done) n1 = v1.node ref1 = n1.ref if ref1 is not None: if ref1.startswith('%'): return if not ref1.startswith('#'): if ref0 is not None: yield (ref0, ref1, done) return ref0 = ref1 links = check(n1) if links is None: return for (link, v2, _) in v1.inputs: if link.startswith('_'): continue if links and link not in links: continue yield from trace(v2, ref0, done) return
def trace(ctx2iref, v1, iref0=None, cc=None): k = (cc, v1) if k in ctx2iref: iref1 = ctx2iref[k] if iref0 is not None: if debug: print(f'{clen(cc)} {iref1!r} -> {iref0!r}') yield (iref1, iref0) return n1 = v1.node ref1 = n1.ref if n1.kind in IGNORED: iref1 = iref0 elif ref1 is None or ref1.startswith('#'): iref1 = iref0 else: if ref1[0] == '$': iref1 = IRef.get(cc, ref1) else: iref1 = IRef.get(None, ref1) if iref0 is not None: if debug: print(f'{clen(cc)} {iref1!r} -> {iref0!r}') yield (iref1, iref0) ctx2iref[k] = iref1 for (link, v2, funcall) in v1.inputs: if link.startswith('_'): continue n2 = v2.node if n2.kind == 'output' and funcall is not None: if cc is None or funcall not in cc: yield from trace(ctx2iref, v2, iref1, Cons(funcall, cc)) elif n1.kind == 'input' and funcall is not None: if cc is not None and cc.car is funcall: yield from trace(ctx2iref, v2, iref1, cc.cdr) else: yield from trace(ctx2iref, v2, iref1, cc) return
def main(argv): import fileinput import getopt def usage(): print( 'usage: %s [-d] [-o output] [-c encoding] [-B basedir] [-m maxpaths] ' '[-M maxoverrides] [graph ...]' % argv[0]) return 100 try: (opts, args) = getopt.getopt(argv[1:], 'do:c:B:m:M:') except getopt.GetoptError: return usage() debug = 0 maxpaths = 100 maxoverrides = 1 encoding = None srcdb = None output = None for (k, v) in opts: if k == '-d': debug += 1 elif k == '-o': output = v elif k == '-c': encoding = v elif k == '-B': srcdb = SourceDB(v, encoding) elif k == '-m': maxpaths = int(v) elif k == '-M': maxoverrides = int(v) if not args: return usage() if output is None: fp = sys.stdout else: fp = open(output, 'w') builder = IDFBuilder(maxoverrides=maxoverrides) for path in args: print('Loading: %r...' % path, file=sys.stderr) builder.load(path, fp) builder.run() print('Read: %d sources, %d graphs, %d funcalls, %d IPVertexes' % (len(builder.srcmap), len(builder.graphs), sum(len(a) for a in builder.funcalls.values()), len(builder.vtxs)), file=sys.stderr) def tracein(mdist, v0, dist=(0, 0), chain=None): if chain is not None and v0 in chain: return if v0 in mdist and dist <= mdist[v0][0]: return chain = Cons(v0, chain) mdist[v0] = (dist, chain) (a, n) = dist if v0.node.kind == 'assign_var': a += 1 n += 1 dist = (a, n) for (label, v1) in v0.outputs: if label.startswith('_'): continue if v1.node.kind == 'output': continue tracein(mdist, v1, dist, chain) return def traceout(mdist, nexts, v0, dist=(0, 0), chain=None): if chain is not None and v0 in chain: return if v0 in mdist and dist <= mdist[v0][0]: return chain = Cons(v0, chain) mdist[v0] = (dist, chain) (a, n) = dist if v0.node.kind == 'assign_var': a += 1 n += 1 dist = (a, n) if v0.node.kind == 'output': assert nexts is not None caller = nexts.car nexts = nexts.cdr for (label, v1) in v0.outputs: if label.startswith('_'): continue if v1.node.kind == 'input': continue if v1.node.graph is not caller: continue traceout(mdist, nexts, v1, dist, chain) else: for (label, v1) in v0.outputs: if label.startswith('_'): continue if v1.node.kind == 'input': continue traceout(mdist, nexts, v1, dist, chain) return mdist = {} for vtx in builder: if not vtx.inputs and vtx.node.kind == 'input': tracein(mdist, vtx) for (vtx0, (dist0, chain0)) in list(mdist.items()): inputs = [vtx.node for vtx in chain0 if vtx.node.kind == 'input'] nexts = Cons.fromseq(n.graph for n in reversed(inputs)) if nexts.cdr is None: continue traceout(mdist, nexts.cdr, vtx0, dist=dist0, chain=chain0) vtxs = sorted(mdist.items(), key=lambda x: x[1][0], reverse=True) for (vtx1, (dist1, chain1)) in vtxs[:maxpaths]: nodes = [vtx.node for vtx in chain1 if vtx.node.kind == 'assign_var'] nodes = list(reversed(nodes)) print('+PATH', dist1, ' '.join(getnoun(n.ref) for n in nodes)) for (i, n) in enumerate(nodes): print('#', n.ref) if srcdb is not None: src = builder.getsrc(n, False) if src is None: continue (name, start, end) = src annot = SourceAnnot(srcdb) annot.add(name, start, end, i) annot.show_text(fp) print() if fp is not sys.stdout: fp.close() return 0
def enum_back(self, count, v1, lprev=None, v0=None, fprev='', chain=None, dist=0, calls=None): # prevent explosion. if count < 0: return count -= 1 if (v0,v1) in self.done: return if v0 is not None: self.done.add((v0,v1)) n1 = v1.node if self.debug: print(f'back: {n1.nid}({n1.kind}), kids={len(v1.inputs)}, fprev={fprev}, lprev={lprev}, count={count}, done={len(self.done)}') chain = Cons(n1, chain) # list the input nodes to visit. inputs = [] for (link,v2,funcall) in v1.inputs: # do not follow informational links. if link.startswith('_') and link != '_end': continue # do not use a value in arrays. if n1.kind == 'ref_array' and not link: continue # treat indirect assignment the same way as normal assignment. if link and link[0] in '@%': #if funcall is None: continue link = '' # interprocedural. if n1.kind == 'output': if self.interproc: inputs.append((link, v2, Cons(funcall, calls))) elif n1.kind == 'input' and calls is not None: if self.interproc and calls.car is funcall: inputs.append((link, v2, calls.cdr)) else: inputs.append((link, v2, calls)) if v0 is None: for (link,v2,calls) in inputs: self.enum_back(count, v2, link, v1, fprev, chain, dist, calls) return # ignore transparent nodes. if n1.kind in IGNORED or n1.kind == 'assign_var': for (_,v2,calls) in inputs: self.enum_back(count, v2, lprev, v1, fprev, chain, dist, calls) return # add the features. ws = [ lprev+':'+f for f in self.getnamefeats(n1) ] fs = ws + [ lprev+':'+f for f in self.getbasefeats(n1) ] if not fs: return if self.typefeat: fs += [ lprev+':'+f for f in gettypefeats(n1) ] for f in fs: feat = (-(dist+1),fprev,f) self.feats[feat] = chain if self.debug: print(' feat:', feat) # if this is a ref_var node, the fact that it refers to a certain variable # is recorded, but the node itself is transparent in a chain. if n1.kind == 'ref_var': for (_,v2,calls) in inputs: self.enum_back(count, v2, lprev, v1, fprev, chain, dist, calls) return # visit the next nodes. count -= self.node_cost dist += 1 for fprev in (ws or fs[:1]): for (link,v2,calls) in inputs: self.enum_back(count, v2, link, v1, fprev, None, dist, calls) return