コード例 #1
0
def main(argv):
    import getopt

    def usage():
        print(f'usage: {argv[0]} ' '[-d] [-n limit] [-w] ' '[graph ...]')
        return 100

    try:
        (opts, args) = getopt.getopt(argv[1:], 'dWn:')
    except getopt.GetoptError:
        return usage()
    debug = 0
    limit = 10
    wordstat = False
    for (k, v) in opts:
        if k == '-d': debug += 1
        elif k == '-W': wordstat = True
        elif k == '-n': limit = int(v)

    refs = {}
    for path in args:
        for method in get_graphs(path):
            for node in method:
                ref = node.ref
                if ref is None: continue
                if node.ntype is None: continue
                if ref[0] not in '$@': continue
                refs[ref] = node.ntype

    if wordstat:
        words = {}
        for ref in refs.keys():
            name = stripid(ref)
            if name is None: continue
            for (pos, w) in postag(reversed(splitwords(name))):
                if pos in words:
                    d = words[pos]
                else:
                    d = words[pos] = {}
                d[w] = d.get(w, 0) + 1
        print('counts', {pos: sum(d.values()) for (pos, d) in words.items()})
        print('words', {pos: len(d) for (pos, d) in words.items()})
        for (pos, d) in sorted(words.items(),
                               key=lambda x: len(x[1]),
                               reverse=True):
            print(pos)
            a = sorted(d.items(), key=lambda x: x[1], reverse=True)
            if 0 < limit:
                a = a[:limit]
            for (w, n) in a:
                print(f'  {n} {w}')
    else:
        for (ref, ntype) in sorted(refs.items()):
            print(ref, ntype)
    return 0
コード例 #2
0
ファイル: graph2gv.py プロジェクト: fagan2888/fgyama
def main(argv):
    import fileinput
    import getopt

    def usage():
        print(
            f'usage: {argv[0]} [-H] [-o output] [-n name] [-h nid] [graph ...]'
        )
        return 100

    try:
        (opts, args) = getopt.getopt(argv[1:], 'Ho:h:n:')
    except getopt.GetoptError:
        return usage()
    html = False
    output = sys.stdout
    highlight = None
    names = None
    for (k, v) in opts:
        if k == '-H': html = True
        elif k == '-o':
            output = open(v, 'w')
            html = v.endswith('.html')
        elif k == '-h':
            highlight = set((int(nid) for nid in v.split(',')))
        elif k == '-n':
            names = [v]
    if not args: return usage()

    methods = []
    for path in args:
        for method in get_graphs(path):
            if names and method.name not in names: continue
            methods.append(method)

    if html:
        output.write('<!DOCTYPE html><html><body>\n')
        for data in run_dot(methods):
            output.write('<div>\n')
            output.write(data)
            output.write('</div><hr>\n')
        output.write('</body>')
    else:
        for method in methods:
            write_gv(output,
                     method.root,
                     highlight=highlight,
                     name=method.name)

    output.close()
    return 0
コード例 #3
0
ファイル: interproc.py プロジェクト: fagan2888/fgyama
 def load(self, path, fp=None, filter=None):
     for method in get_graphs(path):
         if method.style == 'initializer': continue
         if filter is not None and not filter(method): continue
         path = method.klass.path
         if path not in self.srcmap:
             fid = len(self.srcmap)
             self.srcmap[path] = fid
             src = (fid, path)
             if fp is not None:
                 fp.write(f'+SOURCE {src}\n')
         self.methods.append(method)
         self.gid2method[method.name] = method
     return
コード例 #4
0
ファイル: graph2db.py プロジェクト: fagan2888/fgyama
def main(argv):
    import fileinput
    import getopt

    def usage():
        print(f'usage: {argv[0]} [-c)ontinue] graph.db index.db [graph ...]')
        return 100

    try:
        (opts, args) = getopt.getopt(argv[1:], 'c')
    except getopt.GetoptError:
        return usage()

    isnew = True
    for (k, v) in opts:
        if k == '-c': isnew = False

    def exists(path):
        print(f'already exists: {path}')
        return 111

    if not args: return usage()
    path = args.pop(0)
    if isnew and os.path.exists(path): return exists(path)
    graphdb = GraphDB(path)

    if not args: return usage()
    path = args.pop(0)
    if isnew and os.path.exists(path): return exists(path)
    indexdb = IndexDB(path, insert=True)

    cid = None
    for path in args:
        for method in get_graphs(path):
            assert isinstance(method, DFMethod)
            path = method.klass.src
            if path is not None:
                cid = graphdb.add_src(path)
            assert cid is not None
            graphdb.add(cid, method)
            indexdb.index_method(method)
    graphdb.close()
    indexdb.close()
    return 0
コード例 #5
0
ファイル: listmethods.py プロジェクト: fagan2888/fgyama
def main(argv):
    import getopt

    def usage():
        print(f'usage: {argv[0]} ' '[-d] [-n limit] ' '[graph ...]')
        return 100

    try:
        (opts, args) = getopt.getopt(argv[1:], 'dn:')
    except getopt.GetoptError:
        return usage()
    debug = 0
    limit = 10
    for (k, v) in opts:
        if k == '-d': debug += 1
        elif k == '-n': limit = int(v)

    words = {}
    for path in args:
        for method in get_graphs(path):
            (name, args, retype) = splitmethodname(method.name)
            if name is None: continue
            #print(name)
            for (pos, w) in postag(reversed(splitwords(name))):
                if pos in words:
                    d = words[pos]
                else:
                    d = words[pos] = {}
                d[w] = d.get(w, 0) + 1

    print('counts', {pos: sum(d.values()) for (pos, d) in words.items()})
    print('words', {pos: len(d) for (pos, d) in words.items()})
    for (pos, d) in sorted(words.items(),
                           key=lambda x: len(x[1]),
                           reverse=True):
        print(pos)
        a = sorted(d.items(), key=lambda x: x[1], reverse=True)
        if 0 < limit:
            a = a[:limit]
        for (w, n) in a:
            print(f'  {n} {w}')
    return 0
コード例 #6
0
ファイル: finditer.py プロジェクト: fagan2888/fgyama
def main(argv):
    import getopt
    def usage():
        print('usage: %s [-B basedir] [-H] [graph ...]' % argv[0])
        return 100
    try:
        (opts, args) = getopt.getopt(argv[1:], 'vB:H')
    except getopt.GetoptError:
        return usage()
    verbose = False
    srcdb = None
    html = False
    for (k, v) in opts:
        if k == '-v': verbose = True
        elif k == '-B': srcdb = SourceDB(v)
        elif k == '-H': html = True
    if not args: return usage()

    if html:
        show_html_headers()
    for graph in get_graphs(args.pop(0)):
        src = None
        if srcdb is not None:
            try:
                src = srcdb.get(graph.src)
            except KeyError:
                pass
        for ref in finditer(graph):
            if src is not None:
                nodes = [ node for node in graph.nodes.values() if node.ref == ref ]
                if html:
                    show_html(src, nodes)
                else:
                    print (src, graph, ref)
                    src.show_nodes(nodes)
                print()
    return 0
コード例 #7
0
def main(argv):
    import fileinput
    import getopt
    def usage():
        print(f'usage: {argv[0]} [-o output] [-B basedir] [-c encoding] '
              'out.graph ...')
        return 100
    try:
        (opts, args) = getopt.getopt(argv[1:], 'o:B:c:')
    except getopt.GetoptError:
        return usage()
    output = None
    srcdb = None
    encoding = None
    for (k, v) in opts:
        if k == '-o': output = v
        elif k == '-B': srcdb = SourceDB(v, encoding)
        elif k == '-c': encoding = v
    if not args: return usage()

    if output is None:
        fp = sys.stdout
    else:
        fp = open(output, 'w')

    names = {}
    for path in args:
        print(f'Loading: {path!r}...', file=sys.stderr)
        for method in get_graphs(path):
            if method.style == 'initializer': continue
            if ';.' in method.name:
                (_,_,name) = method.name.partition(';.')
                (name,_,_) = name.partition('(')
            else:
                name = method.name
            words = splitcamel(name)
            #print(name, words)
            for n in range(1, len(words)):
                k = tuple(words[-n:])
                if k in names:
                    a = names[k]
                else:
                    a = names[k] = []
                a.append(method)

    done = set()
    for k in sorted(names.keys(), key=lambda k:len(k), reverse=True):
        a = [ method for method in names[k] if method not in done ]
        if 2 <= len(a):
            fp.write(f'= {len(a)}\n')
            for method in a:
                fp.write(f'+ {method.name}\n')
                if srcdb is None: continue
                if method.src is None or method.ast is None: continue
                src = srcdb.get(method.src)
                (_,start,end) = method.ast
                fp.write(f'# {method.src}\n')
                ranges = [(start, end, 0)]
                for (lineno,line) in src.show(ranges):
                    if lineno is None:
                        fp.write(line.rstrip()+'\n')
                    else:
                        fp.write(f'{lineno:4d}: {line.rstrip()}\n')
            fp.write('\n')
        done.update(a)

    if fp is not sys.stdout:
        fp.close()
    return 0
コード例 #8
0
ファイル: simtoken.py プロジェクト: fagan2888/fgyama
def main(argv):
    import fileinput
    import getopt

    def usage():
        print(
            f'usage: {argv[0]} [-v] [-o output] {{-T|-N}} [-B basedir] [-c encoding] [-t threshold] '
            'out.graph ...')
        return 100

    try:
        (opts, args) = getopt.getopt(argv[1:], 'vo:TNB:c:t:')
    except getopt.GetoptError:
        return usage()
    output = None
    srcdb = None
    encoding = 'utf-8'
    threshold = 0.7
    verbose = False
    calcsim = tokensim
    for (k, v) in opts:
        if k == '-v': verbose = True
        elif k == '-o': output = v
        elif k == '-T': calcsim = tokensim
        elif k == '-N': calcsim = namesim
        elif k == '-B': srcdb = SourceDB(v, encoding)
        elif k == '-c': encoding = v
        elif k == '-t': threshold = float(v)
    if not args: return usage()

    if output is None:
        fp = sys.stdout
    else:
        fp = open(output, 'w')

    assert srcdb is not None

    PAT = re.compile(r'\w+')

    def gettokens(s):
        return (m.group(0) for m in PAT.finditer(s))

    tokens = []
    freq = {}
    for path in args:
        print(f'Loading: {path!r}...', file=sys.stderr)
        for method in get_graphs(path):
            if method.style == 'initializer': continue
            if method.src is None: continue
            if method.ast is None: continue
            (_, start, end) = method.ast
            src = srcdb.get(method.src)
            text = src.data[start:end]
            c = {}
            for t in gettokens(text):
                if t not in c:
                    c[t] = 0
                c[t] += 1
            assert c, repr(text)
            for t in c.keys():
                if t not in freq:
                    freq[t] = 0
                freq[t] += 1
            tokens.append((method, c))
    total = sum(freq.values())
    idf = {}
    for (t, n) in freq.items():
        idf[t] = log(total / n)

    sys.stderr.write('Clustering')
    a = []
    for (i, (g1, c1)) in enumerate(tokens):
        for (g2, c2) in tokens[i + 1:]:
            sim = calcsim(g1, c1, g2, c2)
            if threshold <= sim:
                a.append((sim, g1, g2))
        sys.stderr.write('.')
        sys.stderr.flush()
    a.sort(key=lambda x: x[0], reverse=True)
    sys.stderr.write('\n')

    class Cluster:
        def __init__(self):
            self.objs = []
            return

        def __len__(self):
            return len(self.objs)

        def __iter__(self):
            return iter(self.objs)

        def add(self, obj):
            self.objs.append(obj)

        def merge(self, c):
            self.objs.extend(c.objs)

    cls = {}
    for (_, g1, g2) in a:
        if g1 in cls and g2 in cls:
            # both g1 and g2 are in - merge them.
            cls[g1].merge(cls[g2])
            del cls[g2]
        elif g1 in cls:
            # g1 is in, g2 is not.
            cls[g1].add(g2)
        elif g2 in cls:
            # g2 is in, g1 is not.
            cls[g2].add(g1)
        else:
            # both are not in. create new.
            c = Cluster()
            c.add(g1)
            c.add(g2)
            cls[g1] = cls[g2] = c
    for c in sorted(set(cls.values()), key=len, reverse=True):
        fp.write(f'= {len(c)}\n')
        for method in c:
            fp.write(f'+ {method.name}\n')
            if not verbose: continue
            if method.src is None or method.ast is None: continue
            src = srcdb.get(method.src)
            (_, start, end) = method.ast
            fp.write(f'# {method.src}\n')
            ranges = [(start, end, 0)]
            for (lineno, line) in src.show(ranges):
                if lineno is None:
                    fp.write(line.rstrip() + '\n')
                else:
                    fp.write(f'{lineno:4d}: {line.rstrip()}\n')
        fp.write('\n')

    if fp is not sys.stdout:
        fp.close()
    return 0
コード例 #9
0
ファイル: graph2context.py プロジェクト: fagan2888/fgyama
def main(argv):
    import fileinput
    import getopt

    def usage():
        print('usage: %s [-d] [graph ...]' % argv[0])
        return 100

    try:
        (opts, args) = getopt.getopt(argv[1:], 'd')
    except getopt.GetoptError:
        return usage()
    debug = 0
    for (k, v) in opts:
        if k == '-d': debug += 1
    if not args: return usage()

    # Load graphs.
    graphs = {}
    for path in args:
        for graph in get_graphs(path):
            graphs[graph.name] = graph

    print('# graphs: %r' % len(graphs), file=sys.stderr)

    # Enumerate caller/callee relationships.
    linkto = {}  # callee
    linkfrom = {}  # caller

    def link(x, y):  # (caller, callee)
        if x in linkto:
            a = linkto[x]
        else:
            a = linkto[x] = []
        if y not in a:
            a.append(y)
        if y in linkfrom:
            a = linkfrom[y]
        else:
            a = linkfrom[y] = []
        if x not in a:
            a.append(x)
        return

    for src in graphs.values():
        for node in src:
            if node.kind == 'call':
                for name in node.data.split(' '):
                    # In order to stop the number of possible contexts grow
                    # exponentially, the only first function is used.
                    if name in graphs:
                        link(src.name, name)
                        break
                else:
                    # This function is not defined within the source code.
                    for name in node.data.split(' '):
                        link(src.name, name)
            elif node.kind == 'new':
                name = node.data
                link(src.name, name)

    # enum contexts
    def enum_context(src, chain=None):
        if chain is not None and src in chain: return
        chain = CLink(src, chain)
        if 2 <= len(chain):
            print(' '.join(chain))
        if src in linkto:
            for dst in linkto[src]:
                enum_context(dst, chain)
        return

    # Find start nodes.
    for graph in graphs.values():
        if graph.name not in linkfrom:
            print('# start: %r' % graph.name, file=sys.stderr)
            enum_context(graph.name)
    return 0