コード例 #1
0
ファイル: mkhtml.py プロジェクト: afcarl/comvcom
def main(argv):
    import fileinput
    import getopt

    def usage():
        print('usage: %s [-B basedir] [-M srcmap.db] '
              '[-c context] out.comm' % argv[0])
        return 100

    try:
        (opts, args) = getopt.getopt(argv[1:], 'B:M:c:')
    except getopt.GetoptError:
        return usage()
    srcdb = None
    srcmap = None
    ncontext = 4
    for (k, v) in opts:
        if k == '-B': srcdb = SourceDB(v)
        elif k == '-M': srcmap = SourceMap(v)
        elif k == '-c': ncontext = int(v)
    if not args: return usage()

    show_html_headers()

    fp = fileinput.input(args)
    index = 0
    for e in CommentEntry.load(fp):
        src = srcdb.get(e.path)
        url = srcmap.geturl(e.path)
        cid = 'c%03d' % index
        show(cid, src, e.spans, e.key, url, ncontext=ncontext)
        index += 1

    return 0
コード例 #2
0
def getwords(fp):
    wc = {}
    for e in CommentEntry.load(fp):
        cat = e['predCategory']
        if cat != 'p': continue
        if 'words' not in e or 'posTags' not in e: continue
        words = e['words'].split(',')
        postags = e['posTags'].split(',')
        pairs = list(zip(words, postags))
        for (i, (w1, p1)) in enumerate(pairs):
            if p1 not in POS1: continue
            for (w2, p2) in pairs[i + 1:]:
                if p2 not in POS2: continue
                k = (w1, w2)
                wc[k] = wc.get(k, 0) + 1
    return wc
コード例 #3
0
ファイル: getstats.py プロジェクト: afcarl/comvcom
def main(argv):
    args = argv[1:]
    for path in args:
        sys.stderr.write(path + '...\n')
        sys.stderr.flush()
        name = os.path.basename(path)
        (name, _) = os.path.splitext(name)
        (name, _, _) = name.rpartition('-')
        cc = {}
        with open(path) as fp:
            for e in CommentEntry.load(fp):
                cat = e['predCategory']
                cc[cat] = cc.get(cat, 0) + 1
        total = sum(cc.values())
        a = sorted(cc.items(), key=lambda x: x[1], reverse=True)
        print('+', name, total,
              ' '.join('%s:%d(%.2f)' % (c, n, n / total) for (c, n) in a))
    return 0
コード例 #4
0
def main(argv):
    import getopt

    def usage():
        print('usage: %s [-d] [-t tab] [file ...]' % argv[0])
        return 100

    try:
        (opts, args) = getopt.getopt(argv[1:], 'dt:')
    except getopt.GetoptError:
        return usage()
    debug = 0
    tab = 8
    for (k, v) in opts:
        if k == '-d': debug += 1
        elif k == '-t': tab = int(v)
    for path in args:
        src = Source(tab=tab)
        try:
            with open(path) as fp:
                src.load(fp)
                src.tokenize()
                src.parse()
        except (UnicodeError, SyntaxError, tokenize.TokenError) as e:
            sys.stderr.write('! %s\n' % path)
            continue
        prev = None
        for (start, end, feats) in getfeats(src):
            if prev is not None:
                (start0, end0) = prev
                feats['prevLine'] = src.getrow(end0)
                feats['prevCols'] = src.getcol(start0)
            prev = (start, end)
            span = (start + 1, end)
            ent = CommentEntry(path, [span], feats)
            print(ent)
            #s = src.get(start+1, end).replace('\n',' ')
            #print('+ %s\n' % s.encode('utf-8'))
    return
コード例 #5
0
def main(argv):
    import fileinput
    import getopt

    def usage():
        print('usage: %s [-c context] [-f k=v] basedir out.comm' % argv[0])
        return 100

    try:
        (opts, args) = getopt.getopt(argv[1:], 'c:f:')
    except getopt.GetoptError:
        return usage()
    ncontext = 4
    filters = []
    for (k, v) in opts:
        if k == '-c': ncontext = int(v)
        elif k == '-f':
            (a, _, b) = v.partition('=')
            filters.append((a, b))
    if not args: return usage()

    path = args.pop(0)
    srcdb = SourceDB(path)

    fp = fileinput.input(args)
    for e in CommentEntry.load(fp):
        for (k, v) in filters:
            if e[k] != v: break
        else:
            src = srcdb.get(e.path)
            print('@ %s %r' % (src.name, e.spans))
            ranges = [(s, e, True) for (s, e) in e.spans]
            for (_, line) in src.show(ranges, ncontext=ncontext):
                print('  ' + line, end='')
            print()
    return 0
コード例 #6
0
ファイル: detcat.py プロジェクト: afcarl/comvcom
def main(argv):
    import getopt
    import fileinput
    def usage():
        print('usage: %s [-d] [-P] [-B srcdb] [-k keyprop] [-r resprop] [file ...]' %
              argv[0])
        return 100
    try:
        (opts, args) = getopt.getopt(argv[1:], 'dPB:k:r:')
    except getopt.GetoptError:
        return usage()
    debug = 0
    pythonmode = False
    srcdb = None
    keyprop = 'keyCategory'
    resprop = 'predCategory'
    for (k, v) in opts:
        if k == '-d': debug += 1
        elif k == '-P': pythonmode = True
        elif k == '-B': srcdb = SourceDB(v)
        elif k == '-k': keyprop = v
        elif k == '-r': resprop = v
    builder = TreeBuilder()
    add_cat_feats(builder)

    path = args.pop(0)
    with open(path) as fp:
        data = eval(fp.read())
    tree = builder.import_tree(data)

    mat = {}
    keys = set()
    fp = fileinput.input(args)
    for e in CommentEntry.load(fp):
        if 'parentTypes' not in e: continue
        if pythonmode:
            e['parentTypes'] = pythonify(e['parentTypes'])
            if 'leftTypes' in e:
                e['leftTypes'] = pythonify(e['leftTypes'])
            if 'rightTypes' in e:
                e['rightTypes'] = pythonify(e['rightTypes'])
        # ignore non-local comments.
        if 'Block,MethodDeclaration' not in e['parentTypes']: continue
        line = int(e['line'])
        cols = int(e['cols'])
        if 'prevLine' in e:
            e['deltaLine'] = line - int(e['prevLine'])
        if 'prevCols' in e:
            e['deltaCols'] = cols - int(e['prevCols'])
        if 'leftLine' in e:
            e['deltaLeft'] = line - int(e['leftLine'])
        if 'rightLine' in e:
            e['deltaRight'] = line - int(e['rightLine'])

        cat0 = e[keyprop]
        assert cat0, e
        keys.add(cat0)
        cat1 = tree.test(e)
        keys.add(cat1)
        e[resprop] = cat1
        if cat0 is not None and cat0 != 'u':
            k = (cat0,cat1)
            mat[k] = mat.get(k, 0)+1
        print(e)
        if srcdb is not None:
            src = srcdb.get(e.path)
            ranges = [(s,e,1) for (s,e) in e.spans]
            for (_,line) in src.show(ranges):
                print(line, end='')
            print()
    #
    if debug:
        #keys = sorted(keys)
        keys = ('p','a','c','v','o','d','i')
        print ('A\C  %s| recall' % ('|'.join( '%5s' % k for k in keys )))
        col_t = {}
        row_t = {}
        for cat0 in keys:
            a = {}
            for cat1 in keys:
                v = mat.get((cat0,cat1), 0)
                a[cat1] = v
                col_t[cat1] = col_t.get(cat1, 0)+v
            row_c = mat.get((cat0,cat0), 0)
            row_t1 = sum(a.values())
            row_t[cat0] = row_t1
            print ('%4s:%s| %.3f(%2d/%2d)' %
                   (cat0, '|'.join( '%5d' % a[cat1] for cat1 in keys ),
                    row_c/Z(row_t1), row_c, row_t1))
        print ('prec.%s' %
               ('|'.join( '%2d/%2d' % (mat.get((cat,cat), 0), col_t[cat])
                          for cat in keys )))
        print ('     %s' %
               ('|'.join( '%2.3f' % (mat.get((cat,cat), 0)/Z(col_t[cat]))
                          for cat in keys )))
        print()
        for cat in keys:
            v = mat.get((cat,cat), 0)
            p = v/Z(col_t[cat])
            r = v/Z(row_t[cat])
            f = 2*(p*r)/Z(p+r)
            print ('%s: prec=%.3f(%d/%d), recl=%.3f(%d/%d), F=%.3f' %
                   (cat, p, v, col_t[cat], r, v, row_t[cat], f))
        print ('%d/%d' % (sum( v for ((cat0,cat1),v) in mat.items() if cat0 == cat1 ),
                          sum(mat.values())))
    return 0
コード例 #7
0
def main(argv):
    import fileinput
    builder = TreeBuilder()
    builder.addfeat(DF('type'))
    builder.addfeat(QF('deltaLine'))
    builder.addfeat(QF('deltaCols'))
    builder.addfeat(QF('deltaLeft'))
    builder.addfeat(QF('deltaRight'))
    builder.addfeat(DF('parentStart'))
    builder.addfeat(DF('parentEnd'))

    args = argv[1:]
    path = args.pop(0)
    with open(path) as fp:
        data = eval(fp.read())
    tree = builder.import_tree(data)

    def merge(ents):
        e0 = ents.pop(0)
        for e1 in ents:
            e0.merge(e1)
            if 'rightLine' in e1:
                e0['rightLine'] = e1['rightLine']
            if 'rightTypes' in e1:
                e0['rightTypes'] = e1['rightTypes']
            if 'deltaRight' in e1:
                e0['deltaRight'] = e1['deltaRight']
        return e0

    fp = fileinput.input(args)
    b = []
    prev = None
    for e in CommentEntry.load(fp):
        line = int(e['line'])
        cols = int(e['cols'])
        if 'prevLine' in e:
            e['deltaLine'] = line - int(e['prevLine'])
        if 'prevCols' in e:
            e['deltaCols'] = cols - int(e['prevCols'])
        if 'leftLine' in e:
            e['deltaLeft'] = line - int(e['leftLine'])
        if 'rightLine' in e:
            e['deltaRight'] = line - int(e['rightLine'])
        if prev is not None:
            if (prev.path != e.path or prev['type'] != e['type']
                    or prev['parentTypes'] != e['parentTypes']):
                if b:
                    print(merge(b))
                    b = []
        try:
            bio = tree.test(e)
        except ValueError:
            bio = 'B'
        e['keyBIO'] = bio
        if bio == 'B':
            if b:
                print(merge(b))
                b = []
        b.append(e)
        prev = e
    if b:
        print(merge(b))
    return 0
コード例 #8
0
def main(argv):
    import getopt
    import fileinput

    def usage():
        print(
            'usage: %s [-d] [-m minkeys] [-f feats] [-k keyprop] [file ...]' %
            argv[0])
        return 100

    try:
        (opts, args) = getopt.getopt(argv[1:], 'dm:f:k:')
    except getopt.GetoptError:
        return usage()
    debug = 0
    minkeys = 10
    feats = None
    keyprop = 'key'
    for (k, v) in opts:
        if k == '-d': debug += 1
        elif k == '-m': minkeys = int(v)
        elif k == '-f': feats = v
        elif k == '-k': keyprop = v

    builder = TreeBuilder(minkeys=minkeys, debug=debug)
    add_cat_feats(builder)

    fp = fileinput.input(args)
    ents = []
    for e in CommentEntry.load(fp):
        e.key = e[keyprop]
        assert e.key is not None
        line = int(e['line'])
        cols = int(e['cols'])
        if 'prevLine' in e:
            e['deltaLine'] = line - int(e['prevLine'])
        if 'prevCols' in e:
            e['deltaCols'] = cols - int(e['prevCols'])
        if 'leftLine' in e:
            e['deltaLeft'] = line - int(e['leftLine'])
        if 'rightLine' in e:
            e['deltaRight'] = line - int(e['rightLine'])
        ents.append(e)

    if feats is None:
        # training
        root = builder.build(ents)
        if debug:
            print()
            root.dump()
        print(export_tree(root))
    else:
        # testing
        with open(feats) as fp:
            data = eval(fp.read())
        tree = builder.import_tree(data)
        correct = {}
        keys = {}
        resp = {}
        for e in ents:
            keys[e.key] = keys.get(e.key, 0) + 1
            key = tree.test(e)
            resp[key] = resp.get(key, 0) + 1
            if e.key == key:
                correct[key] = correct.get(key, 0) + 1
        for (k, v) in correct.items():
            p = v / resp[k]
            r = v / keys[k]
            f = 2 * (p * r) / (p + r)
            print('%s: prec=%.3f(%d/%d), recl=%.3f(%d/%d), F=%.3f' %
                  (k, p, v, resp[k], r, v, keys[k], f))
        print('%d/%d' % (sum(correct.values()), sum(keys.values())))
    return 0