def load_points(files, show_progress=False, no_appname=None, extra=[], parallel=True, nopt=False): points = [] labels = [] apps = [] scrs = [] rets = util.parallel_work(functools.partial(load_point, no_appname, nopt), files, parallel, show_progress=show_progress) for (xpoints, xlabels, xapps, xscrs) in rets: for i in range(len(xpoints)): xpoints[i]['extra'] = False xscrs[i] = 'region' if xlabels[i] != 'NONE' and not tags.valid(xscrs[i], xlabels[i]): xlabels[i] = 'NONE' points.append(xpoints[i]) labels.append(xlabels[i]) apps.append(xapps[i]) scrs.append(xscrs[i]) rets = util.parallel_work(functools.partial(load_point, no_appname, nopt), extra, parallel, show_progress=show_progress) for (xpoints, xlabels, xapps, xscrs) in rets: for i in range(len(xpoints)): xpoints[i]['extra'] = True xscrs[i] = 'region' if xlabels[i] != 'NONE' and not tags.valid(xscrs[i], xlabels[i]): xlabels[i] = 'NONE' if xlabels[i] != 'NONE': points.append(xpoints[i]) labels.append(xlabels[i]) apps.append(xapps[i]) scrs.append(xscrs[i]) return (points, labels, apps, scrs)
def load_points(files, show_progress=True, no_appname=None, extra=[]): # files = sklearn.utils.shuffle(files, random_state=0) # rets = analyze.analyze(files, show_progress=show_progress) # for tree in rets: # analyze.print_tree(tree) points = [] labels = [] apps = [] scrs = [] #cnt = {} pool = multiprocessing.Pool(processes=config.threads) rets = pool.map(functools.partial(load_point, no_appname), files) #for i in progress(range(len(files))): for (xpoints, xlabels, xapps, xscrs) in rets: for i in range(len(xpoints)): xpoints[i]['extra'] = False if try_all: xscrs[i] = 'ALL' if xlabels[i] != 'NONE' and not tags.valid(xscrs[i], xlabels[i]): xlabels[i] = 'NONE' points += xpoints labels += xlabels apps += xapps scrs += xscrs rets = pool.map(functools.partial(load_point, no_appname), extra) #for i in progress(range(len(files))): for (xpoints, xlabels, xapps, xscrs) in rets: for i in range(len(xpoints)): xpoints[i]['extra'] = True if xlabels[i] != 'NONE' and not tags.valid(xscrs[i], xlabels[i]): xlabels[i] = 'NONE' if xlabels[i] != 'NONE': points.append(xpoints[i]) labels.append(xlabels[i]) apps.append(xapps[i]) scrs.append(xscrs[i]) pool.close() return (points, labels, apps, scrs)
def evaluate_app(parallel, guispath, extrapath, my_points): good_cnt = total_cnt = inside_cnt = 0 app = my_points[0]['app'] logger.info("app %s, learning...", app) start_time = time.time() clas = learn(guispath, app, extrapath, parallel=parallel) logger.info("%s: Learned in %.3fs, testing...", app, time.time() - start_time) correct = [] missing = [] extra = [] correct_scr = {} total_scr = {} for point in my_points: cached_pts = load_cached_point(os.path.splitext(point['file'])[0]) rets = clas.classify(point['tree'], point['img'], point['path'], cached_pts[0] if cached_pts else None) regs = list( filter(lambda tag: tags.valid('region', tag), point['regs'])) print(os.path.basename(point['file']), 'Tags:', regs, 'Rets:', rets) ret_region = set() for ret in rets: ret_region.add(ret[1]) if set(regs) == ret_region: good_cnt += 1 correct_scr[point['scr']] = correct_scr.get(point['scr'], 0) + 1 found = 0 for tag in regs: if tag in ret_region: found += 1 correct.append(tag) else: missing.append(tag) for tag in ret_region: if tag not in regs: extra.append(tag) if found == len(regs): inside_cnt += 1 total_cnt += 1 total_scr[point['scr']] = total_scr.get(point['scr'], 0) + 1 avg_scr_ratio = 0.0 for scr in total_scr: scr_ratio = 1.0 * correct_scr.get(scr, 0) / total_scr[scr] avg_scr_ratio += scr_ratio avg_scr_ratio /= len(total_scr) return { 'good': good_cnt, 'total': total_cnt, 'inside': inside_cnt, 'correct': correct, 'missing': missing, 'extra': extra, 'app': app, 'scr_avg': avg_scr_ratio }
def run_clas(files, eval_app, extrafiles): print("Analyzing files") (points, labels, apps, scrs) = load_points(files, extra=extrafiles) print("Point count: %d" % len(points)) if len(points) == 0: print("No point matches specification!") return for i in range(len(labels)): if not tags.valid(scrs[i], labels[i]): labels[i] = 'NONE' sorted_apps = sorted(apps) global_tag_good = global_none_good = global_tag_bad = global_none_bad = 0 global_good_cnt = {} global_bad_cnt = {} global_badpred_cnt = {} conf_cnt = {} app_stat = {} for scr in sorted(set(scrs)): scr_points = [] scr_apps = [] scr_labels = [] for i in range(len(points)): if scr == 'ALL' or scrs[i] == scr: scr_points.append(points[i]) scr_apps.append(apps[i]) scr_labels.append(labels[i]) if len(set(scr_apps)) == 1: # single-app screen, can't test continue if eval_app is None: split = PredefinedSplit(test_fold=list(map( lambda x: sorted_apps.index(x), scr_apps))).split() else: train_idx = [] test_idx = [] for i in range(len(scr_apps)): if scr_apps[i] != eval_app: train_idx.append(i) else: test_idx.append(i) if test_idx == []: continue split = [(train_idx, test_idx)] # split = PredefinedSplit(test_fold=list(map(lambda x: 1 if x == 'etsy' # else 0, scr_apps))) all_tag_good = all_none_good = all_tag_bad = all_none_bad = 0 good_cnt = {} bad_cnt = {} badpred_cnt = {} pool = multiprocessing.Pool(processes=config.threads) rets = pool.map(functools.partial(evaluate, scr_points, scr_labels, scr_apps), split) for (tag_good, tag_bad, none_good, none_bad, xgood, xbad, xbadpred, xconf, test_app) in rets: if test_app is None: continue all_tag_good += tag_good all_tag_bad += tag_bad all_none_good += none_good all_none_bad += none_bad merge_cnt(good_cnt, xgood) merge_cnt(bad_cnt, xbad) merge_cnt(badpred_cnt, xbadpred) merge_cnt(conf_cnt, xconf) if test_app not in app_stat: app_stat[test_app] = {} merge_cnt(app_stat[test_app], {'tag_good': tag_good, 'tag_bad': tag_bad, 'none_good': none_good, 'none_bad': none_bad}) pool.close() #for train_idx, test_idx in split.split(): # per-screen stat info print("SCREEN %20s good: %4d,%4d bad: %4d,%4d %.3f %.3f" % ( scr, all_tag_good, all_none_good, all_tag_bad, all_none_bad, 1.0 * all_tag_good / (all_tag_good + all_tag_bad), 1.0 * (all_tag_good + all_none_good) / ( all_tag_bad + all_none_bad + all_tag_good + all_none_good))) if print_per_screen: print("IN ALL: good: %d,%d bad: %d,%d" % ( all_tag_good, all_none_good, all_tag_bad, all_none_bad)) print("PER TAG:") for item in sorted(set(list(good_cnt) + list(bad_cnt))): print("\t%s: %d+ %d- %d*" % (item, good_cnt.get(item, 0), bad_cnt.get(item, 0), badpred_cnt.get(item, 0))) global_tag_good += all_tag_good global_tag_bad += all_tag_bad global_none_good += all_none_good global_none_bad += all_none_bad merge_cnt(global_good_cnt, good_cnt) merge_cnt(global_bad_cnt, bad_cnt) merge_cnt(global_badpred_cnt, badpred_cnt) if len(scrs) > 1: print("GLOBAL: good: %d,%d bad: %d,%d G/G: %.3f T/T: %.3f" % ( global_tag_good, global_none_good, global_tag_bad, global_none_bad, 1.0 * global_tag_good / (global_tag_bad + global_tag_good), 1.0 * (global_tag_good + global_none_good) / ( global_tag_bad + global_none_bad + global_tag_good + global_none_good))) print("PER TAG:") for item in sorted(set(list(global_good_cnt) + list(global_bad_cnt))): print("\t%s: %d+ %d- %d*" % ( item, global_good_cnt.get(item, 0), global_bad_cnt.get(item, 0), global_badpred_cnt.get(item, 0))) global_tag_good = 0.0 global_tag_good_cnt = 0 global_all_good = 0.0 global_all_good_cnt = 0 global_none_good = 0.0 global_none_good_cnt = 0 for app in app_stat: entry = app_stat[app] if entry['tag_good'] + entry['tag_bad'] > 0: tag_good_per = 100.0 * entry['tag_good'] / (entry['tag_good'] + entry['tag_bad']) global_tag_good += tag_good_per global_tag_good_cnt += 1 else: tag_good_per = -1.0 if (entry['tag_good'] + entry['tag_bad'] + entry['none_good'] + entry['none_bad'] > 0): all_good_per = 100.0 * (entry['tag_good'] + entry['none_good']) / ( entry['tag_good'] + entry['tag_bad'] + entry['none_good'] + entry['none_bad']) global_all_good += all_good_per global_all_good_cnt += 1 none_good_per = 100.0 * entry['none_good'] / (entry['none_good'] + entry['none_bad']) global_none_good += none_good_per global_none_good_cnt += 1 else: all_good_per = -1.0 if print_perapp or print_detail: print("%s: good: %d, %d bad: %d, %d %.3f, %.3f" % ( app, entry['tag_good'], entry['none_good'], entry['tag_bad'], entry['none_bad'], tag_good_per, all_good_per)) global_tag_good /= global_tag_good_cnt global_all_good /= global_all_good_cnt global_none_good /= global_none_good_cnt print("FINAL app avg: +%.3f, *%.3f, -%.3f" % ( global_tag_good, global_all_good, global_none_good)) print("conf_order: %r" % conf_cnt)
tree = analyze.load_tree(filename) (appname, scrname) = util.get_aux_info(filename) if not tags.validapp(appname): continue if not tags.validscr(scrname) or scrname in tags.tag['ignored_screens']: #print('skip', scrname) continue scr_cnt += 1 for nodeid in tree: node = tree[nodeid] tagged = False for tag in node['tags']: if tags.valid(scrname, tag): tagged = True break if not node['click'] and is_click: continue if node['children'] != [] and is_leaf: continue if node['text'] == '' and is_text: continue if is_visible and (node['height'] < 5 or node['width'] < 5): continue leaf_cnt += 1 if tagged: tag_cnt += 1
wids = {} tags.load("../etc-%s" % catname) for filename in glob.glob("../guis-%s/*.xml" % catname): basename = os.path.basename(filename).split('.')[0] appname = basename.split('_')[0] scrname = basename.split('_')[-1] if appname not in scrs: scrs[appname] = {} wids[appname] = {} scrs[appname][scrname] = 1 descfile = filename.replace('.xml', '.desc.txt') if os.path.exists(descfile): for line in open(descfile): tagname = line.strip().split(' ', 1)[1] if tags.valid(scrname, tagname): wids[appname][tagname] = 1 tot = 0 totw = 0 appc = 0 for app in scrs: if app not in tags.apps: continue print(app, len(scrs[app]), len(wids[app])) tot += len(scrs[app]) totw += len(wids[app]) appc += 1 print("CAT", catname, tot, 1.0 * tot / appc) print("CAT", catname, totw, 1.0 * totw / appc)
if am_bridge: bridge_totline += 1 if am_app: app_totline += 1 if cus_flow: cus_totline += 1 elif '@%s:%s' % (appname, test_name) in tests: totline += 1 if "screen is" in line or "screen is not" in line: cur_scr = line.split(' ')[-1] used_scr.add(cur_scr) if "@" in line and line[0] != '@': for wid in wid_re.findall(line): wid = wid[1:] if tags.valid(cur_scr, wid): used_wid.add(cur_scr + ':' + wid) print("Cat: ", catname) if tests - found: print(tests - found) print('Lib: ', testcnt, totline, "%.2f" % (1.0 * totline / testcnt)) print('Custom:', cus_testcnt, cus_totline, "%.2f" % (1.0 * cus_totline / cus_testcnt)) print('App:', app_cnt, app_totline) print('Bridge', bridge_cnt, bridge_totline) print('A+B:', app_cnt + bridge_cnt, app_totline + bridge_totline) print('Used screens:', len(used_scr), list(sorted(used_scr))) print('Used widgets:', len(used_wid), list(sorted(used_wid)))