def main(argv): import fileinput import getopt def usage(): print('usage: %s [-B basedir] [-M srcmap.db] ' '[-c context] out.comm' % argv[0]) return 100 try: (opts, args) = getopt.getopt(argv[1:], 'B:M:c:') except getopt.GetoptError: return usage() srcdb = None srcmap = None ncontext = 4 for (k, v) in opts: if k == '-B': srcdb = SourceDB(v) elif k == '-M': srcmap = SourceMap(v) elif k == '-c': ncontext = int(v) if not args: return usage() show_html_headers() fp = fileinput.input(args) index = 0 for e in CommentEntry.load(fp): src = srcdb.get(e.path) url = srcmap.geturl(e.path) cid = 'c%03d' % index show(cid, src, e.spans, e.key, url, ncontext=ncontext) index += 1 return 0
def getwords(fp): wc = {} for e in CommentEntry.load(fp): cat = e['predCategory'] if cat != 'p': continue if 'words' not in e or 'posTags' not in e: continue words = e['words'].split(',') postags = e['posTags'].split(',') pairs = list(zip(words, postags)) for (i, (w1, p1)) in enumerate(pairs): if p1 not in POS1: continue for (w2, p2) in pairs[i + 1:]: if p2 not in POS2: continue k = (w1, w2) wc[k] = wc.get(k, 0) + 1 return wc
def main(argv): args = argv[1:] for path in args: sys.stderr.write(path + '...\n') sys.stderr.flush() name = os.path.basename(path) (name, _) = os.path.splitext(name) (name, _, _) = name.rpartition('-') cc = {} with open(path) as fp: for e in CommentEntry.load(fp): cat = e['predCategory'] cc[cat] = cc.get(cat, 0) + 1 total = sum(cc.values()) a = sorted(cc.items(), key=lambda x: x[1], reverse=True) print('+', name, total, ' '.join('%s:%d(%.2f)' % (c, n, n / total) for (c, n) in a)) return 0
def main(argv): import getopt def usage(): print('usage: %s [-d] [-t tab] [file ...]' % argv[0]) return 100 try: (opts, args) = getopt.getopt(argv[1:], 'dt:') except getopt.GetoptError: return usage() debug = 0 tab = 8 for (k, v) in opts: if k == '-d': debug += 1 elif k == '-t': tab = int(v) for path in args: src = Source(tab=tab) try: with open(path) as fp: src.load(fp) src.tokenize() src.parse() except (UnicodeError, SyntaxError, tokenize.TokenError) as e: sys.stderr.write('! %s\n' % path) continue prev = None for (start, end, feats) in getfeats(src): if prev is not None: (start0, end0) = prev feats['prevLine'] = src.getrow(end0) feats['prevCols'] = src.getcol(start0) prev = (start, end) span = (start + 1, end) ent = CommentEntry(path, [span], feats) print(ent) #s = src.get(start+1, end).replace('\n',' ') #print('+ %s\n' % s.encode('utf-8')) return
def main(argv): import fileinput import getopt def usage(): print('usage: %s [-c context] [-f k=v] basedir out.comm' % argv[0]) return 100 try: (opts, args) = getopt.getopt(argv[1:], 'c:f:') except getopt.GetoptError: return usage() ncontext = 4 filters = [] for (k, v) in opts: if k == '-c': ncontext = int(v) elif k == '-f': (a, _, b) = v.partition('=') filters.append((a, b)) if not args: return usage() path = args.pop(0) srcdb = SourceDB(path) fp = fileinput.input(args) for e in CommentEntry.load(fp): for (k, v) in filters: if e[k] != v: break else: src = srcdb.get(e.path) print('@ %s %r' % (src.name, e.spans)) ranges = [(s, e, True) for (s, e) in e.spans] for (_, line) in src.show(ranges, ncontext=ncontext): print(' ' + line, end='') print() return 0
def main(argv): import getopt import fileinput def usage(): print('usage: %s [-d] [-P] [-B srcdb] [-k keyprop] [-r resprop] [file ...]' % argv[0]) return 100 try: (opts, args) = getopt.getopt(argv[1:], 'dPB:k:r:') except getopt.GetoptError: return usage() debug = 0 pythonmode = False srcdb = None keyprop = 'keyCategory' resprop = 'predCategory' for (k, v) in opts: if k == '-d': debug += 1 elif k == '-P': pythonmode = True elif k == '-B': srcdb = SourceDB(v) elif k == '-k': keyprop = v elif k == '-r': resprop = v builder = TreeBuilder() add_cat_feats(builder) path = args.pop(0) with open(path) as fp: data = eval(fp.read()) tree = builder.import_tree(data) mat = {} keys = set() fp = fileinput.input(args) for e in CommentEntry.load(fp): if 'parentTypes' not in e: continue if pythonmode: e['parentTypes'] = pythonify(e['parentTypes']) if 'leftTypes' in e: e['leftTypes'] = pythonify(e['leftTypes']) if 'rightTypes' in e: e['rightTypes'] = pythonify(e['rightTypes']) # ignore non-local comments. if 'Block,MethodDeclaration' not in e['parentTypes']: continue line = int(e['line']) cols = int(e['cols']) if 'prevLine' in e: e['deltaLine'] = line - int(e['prevLine']) if 'prevCols' in e: e['deltaCols'] = cols - int(e['prevCols']) if 'leftLine' in e: e['deltaLeft'] = line - int(e['leftLine']) if 'rightLine' in e: e['deltaRight'] = line - int(e['rightLine']) cat0 = e[keyprop] assert cat0, e keys.add(cat0) cat1 = tree.test(e) keys.add(cat1) e[resprop] = cat1 if cat0 is not None and cat0 != 'u': k = (cat0,cat1) mat[k] = mat.get(k, 0)+1 print(e) if srcdb is not None: src = srcdb.get(e.path) ranges = [(s,e,1) for (s,e) in e.spans] for (_,line) in src.show(ranges): print(line, end='') print() # if debug: #keys = sorted(keys) keys = ('p','a','c','v','o','d','i') print ('A\C %s| recall' % ('|'.join( '%5s' % k for k in keys ))) col_t = {} row_t = {} for cat0 in keys: a = {} for cat1 in keys: v = mat.get((cat0,cat1), 0) a[cat1] = v col_t[cat1] = col_t.get(cat1, 0)+v row_c = mat.get((cat0,cat0), 0) row_t1 = sum(a.values()) row_t[cat0] = row_t1 print ('%4s:%s| %.3f(%2d/%2d)' % (cat0, '|'.join( '%5d' % a[cat1] for cat1 in keys ), row_c/Z(row_t1), row_c, row_t1)) print ('prec.%s' % ('|'.join( '%2d/%2d' % (mat.get((cat,cat), 0), col_t[cat]) for cat in keys ))) print (' %s' % ('|'.join( '%2.3f' % (mat.get((cat,cat), 0)/Z(col_t[cat])) for cat in keys ))) print() for cat in keys: v = mat.get((cat,cat), 0) p = v/Z(col_t[cat]) r = v/Z(row_t[cat]) f = 2*(p*r)/Z(p+r) print ('%s: prec=%.3f(%d/%d), recl=%.3f(%d/%d), F=%.3f' % (cat, p, v, col_t[cat], r, v, row_t[cat], f)) print ('%d/%d' % (sum( v for ((cat0,cat1),v) in mat.items() if cat0 == cat1 ), sum(mat.values()))) return 0
def main(argv): import fileinput builder = TreeBuilder() builder.addfeat(DF('type')) builder.addfeat(QF('deltaLine')) builder.addfeat(QF('deltaCols')) builder.addfeat(QF('deltaLeft')) builder.addfeat(QF('deltaRight')) builder.addfeat(DF('parentStart')) builder.addfeat(DF('parentEnd')) args = argv[1:] path = args.pop(0) with open(path) as fp: data = eval(fp.read()) tree = builder.import_tree(data) def merge(ents): e0 = ents.pop(0) for e1 in ents: e0.merge(e1) if 'rightLine' in e1: e0['rightLine'] = e1['rightLine'] if 'rightTypes' in e1: e0['rightTypes'] = e1['rightTypes'] if 'deltaRight' in e1: e0['deltaRight'] = e1['deltaRight'] return e0 fp = fileinput.input(args) b = [] prev = None for e in CommentEntry.load(fp): line = int(e['line']) cols = int(e['cols']) if 'prevLine' in e: e['deltaLine'] = line - int(e['prevLine']) if 'prevCols' in e: e['deltaCols'] = cols - int(e['prevCols']) if 'leftLine' in e: e['deltaLeft'] = line - int(e['leftLine']) if 'rightLine' in e: e['deltaRight'] = line - int(e['rightLine']) if prev is not None: if (prev.path != e.path or prev['type'] != e['type'] or prev['parentTypes'] != e['parentTypes']): if b: print(merge(b)) b = [] try: bio = tree.test(e) except ValueError: bio = 'B' e['keyBIO'] = bio if bio == 'B': if b: print(merge(b)) b = [] b.append(e) prev = e if b: print(merge(b)) return 0
def main(argv): import getopt import fileinput def usage(): print( 'usage: %s [-d] [-m minkeys] [-f feats] [-k keyprop] [file ...]' % argv[0]) return 100 try: (opts, args) = getopt.getopt(argv[1:], 'dm:f:k:') except getopt.GetoptError: return usage() debug = 0 minkeys = 10 feats = None keyprop = 'key' for (k, v) in opts: if k == '-d': debug += 1 elif k == '-m': minkeys = int(v) elif k == '-f': feats = v elif k == '-k': keyprop = v builder = TreeBuilder(minkeys=minkeys, debug=debug) add_cat_feats(builder) fp = fileinput.input(args) ents = [] for e in CommentEntry.load(fp): e.key = e[keyprop] assert e.key is not None line = int(e['line']) cols = int(e['cols']) if 'prevLine' in e: e['deltaLine'] = line - int(e['prevLine']) if 'prevCols' in e: e['deltaCols'] = cols - int(e['prevCols']) if 'leftLine' in e: e['deltaLeft'] = line - int(e['leftLine']) if 'rightLine' in e: e['deltaRight'] = line - int(e['rightLine']) ents.append(e) if feats is None: # training root = builder.build(ents) if debug: print() root.dump() print(export_tree(root)) else: # testing with open(feats) as fp: data = eval(fp.read()) tree = builder.import_tree(data) correct = {} keys = {} resp = {} for e in ents: keys[e.key] = keys.get(e.key, 0) + 1 key = tree.test(e) resp[key] = resp.get(key, 0) + 1 if e.key == key: correct[key] = correct.get(key, 0) + 1 for (k, v) in correct.items(): p = v / resp[k] r = v / keys[k] f = 2 * (p * r) / (p + r) print('%s: prec=%.3f(%d/%d), recl=%.3f(%d/%d), F=%.3f' % (k, p, v, resp[k], r, v, keys[k], f)) print('%d/%d' % (sum(correct.values()), sum(keys.values()))) return 0