def main(argv): import getopt def usage(): print(f'usage: {argv[0]} ' '[-d] [-n limit] [-w] ' '[graph ...]') return 100 try: (opts, args) = getopt.getopt(argv[1:], 'dWn:') except getopt.GetoptError: return usage() debug = 0 limit = 10 wordstat = False for (k, v) in opts: if k == '-d': debug += 1 elif k == '-W': wordstat = True elif k == '-n': limit = int(v) refs = {} for path in args: for method in get_graphs(path): for node in method: ref = node.ref if ref is None: continue if node.ntype is None: continue if ref[0] not in '$@': continue refs[ref] = node.ntype if wordstat: words = {} for ref in refs.keys(): name = stripid(ref) if name is None: continue for (pos, w) in postag(reversed(splitwords(name))): if pos in words: d = words[pos] else: d = words[pos] = {} d[w] = d.get(w, 0) + 1 print('counts', {pos: sum(d.values()) for (pos, d) in words.items()}) print('words', {pos: len(d) for (pos, d) in words.items()}) for (pos, d) in sorted(words.items(), key=lambda x: len(x[1]), reverse=True): print(pos) a = sorted(d.items(), key=lambda x: x[1], reverse=True) if 0 < limit: a = a[:limit] for (w, n) in a: print(f' {n} {w}') else: for (ref, ntype) in sorted(refs.items()): print(ref, ntype) return 0
def main(argv): import fileinput import getopt def usage(): print( f'usage: {argv[0]} [-H] [-o output] [-n name] [-h nid] [graph ...]' ) return 100 try: (opts, args) = getopt.getopt(argv[1:], 'Ho:h:n:') except getopt.GetoptError: return usage() html = False output = sys.stdout highlight = None names = None for (k, v) in opts: if k == '-H': html = True elif k == '-o': output = open(v, 'w') html = v.endswith('.html') elif k == '-h': highlight = set((int(nid) for nid in v.split(','))) elif k == '-n': names = [v] if not args: return usage() methods = [] for path in args: for method in get_graphs(path): if names and method.name not in names: continue methods.append(method) if html: output.write('<!DOCTYPE html><html><body>\n') for data in run_dot(methods): output.write('<div>\n') output.write(data) output.write('</div><hr>\n') output.write('</body>') else: for method in methods: write_gv(output, method.root, highlight=highlight, name=method.name) output.close() return 0
def load(self, path, fp=None, filter=None): for method in get_graphs(path): if method.style == 'initializer': continue if filter is not None and not filter(method): continue path = method.klass.path if path not in self.srcmap: fid = len(self.srcmap) self.srcmap[path] = fid src = (fid, path) if fp is not None: fp.write(f'+SOURCE {src}\n') self.methods.append(method) self.gid2method[method.name] = method return
def main(argv): import fileinput import getopt def usage(): print(f'usage: {argv[0]} [-c)ontinue] graph.db index.db [graph ...]') return 100 try: (opts, args) = getopt.getopt(argv[1:], 'c') except getopt.GetoptError: return usage() isnew = True for (k, v) in opts: if k == '-c': isnew = False def exists(path): print(f'already exists: {path}') return 111 if not args: return usage() path = args.pop(0) if isnew and os.path.exists(path): return exists(path) graphdb = GraphDB(path) if not args: return usage() path = args.pop(0) if isnew and os.path.exists(path): return exists(path) indexdb = IndexDB(path, insert=True) cid = None for path in args: for method in get_graphs(path): assert isinstance(method, DFMethod) path = method.klass.src if path is not None: cid = graphdb.add_src(path) assert cid is not None graphdb.add(cid, method) indexdb.index_method(method) graphdb.close() indexdb.close() return 0
def main(argv): import getopt def usage(): print(f'usage: {argv[0]} ' '[-d] [-n limit] ' '[graph ...]') return 100 try: (opts, args) = getopt.getopt(argv[1:], 'dn:') except getopt.GetoptError: return usage() debug = 0 limit = 10 for (k, v) in opts: if k == '-d': debug += 1 elif k == '-n': limit = int(v) words = {} for path in args: for method in get_graphs(path): (name, args, retype) = splitmethodname(method.name) if name is None: continue #print(name) for (pos, w) in postag(reversed(splitwords(name))): if pos in words: d = words[pos] else: d = words[pos] = {} d[w] = d.get(w, 0) + 1 print('counts', {pos: sum(d.values()) for (pos, d) in words.items()}) print('words', {pos: len(d) for (pos, d) in words.items()}) for (pos, d) in sorted(words.items(), key=lambda x: len(x[1]), reverse=True): print(pos) a = sorted(d.items(), key=lambda x: x[1], reverse=True) if 0 < limit: a = a[:limit] for (w, n) in a: print(f' {n} {w}') return 0
def main(argv): import getopt def usage(): print('usage: %s [-B basedir] [-H] [graph ...]' % argv[0]) return 100 try: (opts, args) = getopt.getopt(argv[1:], 'vB:H') except getopt.GetoptError: return usage() verbose = False srcdb = None html = False for (k, v) in opts: if k == '-v': verbose = True elif k == '-B': srcdb = SourceDB(v) elif k == '-H': html = True if not args: return usage() if html: show_html_headers() for graph in get_graphs(args.pop(0)): src = None if srcdb is not None: try: src = srcdb.get(graph.src) except KeyError: pass for ref in finditer(graph): if src is not None: nodes = [ node for node in graph.nodes.values() if node.ref == ref ] if html: show_html(src, nodes) else: print (src, graph, ref) src.show_nodes(nodes) print() return 0
def main(argv): import fileinput import getopt def usage(): print(f'usage: {argv[0]} [-o output] [-B basedir] [-c encoding] ' 'out.graph ...') return 100 try: (opts, args) = getopt.getopt(argv[1:], 'o:B:c:') except getopt.GetoptError: return usage() output = None srcdb = None encoding = None for (k, v) in opts: if k == '-o': output = v elif k == '-B': srcdb = SourceDB(v, encoding) elif k == '-c': encoding = v if not args: return usage() if output is None: fp = sys.stdout else: fp = open(output, 'w') names = {} for path in args: print(f'Loading: {path!r}...', file=sys.stderr) for method in get_graphs(path): if method.style == 'initializer': continue if ';.' in method.name: (_,_,name) = method.name.partition(';.') (name,_,_) = name.partition('(') else: name = method.name words = splitcamel(name) #print(name, words) for n in range(1, len(words)): k = tuple(words[-n:]) if k in names: a = names[k] else: a = names[k] = [] a.append(method) done = set() for k in sorted(names.keys(), key=lambda k:len(k), reverse=True): a = [ method for method in names[k] if method not in done ] if 2 <= len(a): fp.write(f'= {len(a)}\n') for method in a: fp.write(f'+ {method.name}\n') if srcdb is None: continue if method.src is None or method.ast is None: continue src = srcdb.get(method.src) (_,start,end) = method.ast fp.write(f'# {method.src}\n') ranges = [(start, end, 0)] for (lineno,line) in src.show(ranges): if lineno is None: fp.write(line.rstrip()+'\n') else: fp.write(f'{lineno:4d}: {line.rstrip()}\n') fp.write('\n') done.update(a) if fp is not sys.stdout: fp.close() return 0
def main(argv): import fileinput import getopt def usage(): print( f'usage: {argv[0]} [-v] [-o output] {{-T|-N}} [-B basedir] [-c encoding] [-t threshold] ' 'out.graph ...') return 100 try: (opts, args) = getopt.getopt(argv[1:], 'vo:TNB:c:t:') except getopt.GetoptError: return usage() output = None srcdb = None encoding = 'utf-8' threshold = 0.7 verbose = False calcsim = tokensim for (k, v) in opts: if k == '-v': verbose = True elif k == '-o': output = v elif k == '-T': calcsim = tokensim elif k == '-N': calcsim = namesim elif k == '-B': srcdb = SourceDB(v, encoding) elif k == '-c': encoding = v elif k == '-t': threshold = float(v) if not args: return usage() if output is None: fp = sys.stdout else: fp = open(output, 'w') assert srcdb is not None PAT = re.compile(r'\w+') def gettokens(s): return (m.group(0) for m in PAT.finditer(s)) tokens = [] freq = {} for path in args: print(f'Loading: {path!r}...', file=sys.stderr) for method in get_graphs(path): if method.style == 'initializer': continue if method.src is None: continue if method.ast is None: continue (_, start, end) = method.ast src = srcdb.get(method.src) text = src.data[start:end] c = {} for t in gettokens(text): if t not in c: c[t] = 0 c[t] += 1 assert c, repr(text) for t in c.keys(): if t not in freq: freq[t] = 0 freq[t] += 1 tokens.append((method, c)) total = sum(freq.values()) idf = {} for (t, n) in freq.items(): idf[t] = log(total / n) sys.stderr.write('Clustering') a = [] for (i, (g1, c1)) in enumerate(tokens): for (g2, c2) in tokens[i + 1:]: sim = calcsim(g1, c1, g2, c2) if threshold <= sim: a.append((sim, g1, g2)) sys.stderr.write('.') sys.stderr.flush() a.sort(key=lambda x: x[0], reverse=True) sys.stderr.write('\n') class Cluster: def __init__(self): self.objs = [] return def __len__(self): return len(self.objs) def __iter__(self): return iter(self.objs) def add(self, obj): self.objs.append(obj) def merge(self, c): self.objs.extend(c.objs) cls = {} for (_, g1, g2) in a: if g1 in cls and g2 in cls: # both g1 and g2 are in - merge them. cls[g1].merge(cls[g2]) del cls[g2] elif g1 in cls: # g1 is in, g2 is not. cls[g1].add(g2) elif g2 in cls: # g2 is in, g1 is not. cls[g2].add(g1) else: # both are not in. create new. c = Cluster() c.add(g1) c.add(g2) cls[g1] = cls[g2] = c for c in sorted(set(cls.values()), key=len, reverse=True): fp.write(f'= {len(c)}\n') for method in c: fp.write(f'+ {method.name}\n') if not verbose: continue if method.src is None or method.ast is None: continue src = srcdb.get(method.src) (_, start, end) = method.ast fp.write(f'# {method.src}\n') ranges = [(start, end, 0)] for (lineno, line) in src.show(ranges): if lineno is None: fp.write(line.rstrip() + '\n') else: fp.write(f'{lineno:4d}: {line.rstrip()}\n') fp.write('\n') if fp is not sys.stdout: fp.close() return 0
def main(argv): import fileinput import getopt def usage(): print('usage: %s [-d] [graph ...]' % argv[0]) return 100 try: (opts, args) = getopt.getopt(argv[1:], 'd') except getopt.GetoptError: return usage() debug = 0 for (k, v) in opts: if k == '-d': debug += 1 if not args: return usage() # Load graphs. graphs = {} for path in args: for graph in get_graphs(path): graphs[graph.name] = graph print('# graphs: %r' % len(graphs), file=sys.stderr) # Enumerate caller/callee relationships. linkto = {} # callee linkfrom = {} # caller def link(x, y): # (caller, callee) if x in linkto: a = linkto[x] else: a = linkto[x] = [] if y not in a: a.append(y) if y in linkfrom: a = linkfrom[y] else: a = linkfrom[y] = [] if x not in a: a.append(x) return for src in graphs.values(): for node in src: if node.kind == 'call': for name in node.data.split(' '): # In order to stop the number of possible contexts grow # exponentially, the only first function is used. if name in graphs: link(src.name, name) break else: # This function is not defined within the source code. for name in node.data.split(' '): link(src.name, name) elif node.kind == 'new': name = node.data link(src.name, name) # enum contexts def enum_context(src, chain=None): if chain is not None and src in chain: return chain = CLink(src, chain) if 2 <= len(chain): print(' '.join(chain)) if src in linkto: for dst in linkto[src]: enum_context(dst, chain) return # Find start nodes. for graph in graphs.values(): if graph.name not in linkfrom: print('# start: %r' % graph.name, file=sys.stderr) enum_context(graph.name) return 0