def load_info(only_app, only_scrs, filename): (appname, caseid, scrname) = re.search('([a-z0-9]+)_?([0-9]+)?_([a-z0-9]+)', filename).groups() if only_app is not None and appname != only_app: return None if only_scrs is not None and scrname not in only_scrs: return None filebase = os.path.splitext(filename)[0] tree = analyze.load_tree(filename) hidden.find_hidden_ocr(tree) hidden.mark_children_hidden_ocr(tree) pngfile = filebase + '.png' path = get_path(filebase) return { 'tree': tree, 'img': pngfile, 'regs': collect_tags(tree), 'app': appname, 'file': filename, 'path': path, 'scr': scrname }
def dump_tree(self, filename): tree = analyze.load_tree(filename) app = os.path.basename(filename).split('_')[0] if self.hide: hidden.find_hidden_ocr(tree) if not self.nofill: hidden.mark_children_hidden_ocr(tree) for nodeid in tree: node = tree[nodeid] if node['visible'] == 'hidden': self.hidden_cnt += 1 self.hidden_app[app] = self.hidden_app.get(app, 0) + 1 self.total_cnt += 1 self.total_app[app] = self.total_app.get(app, 0) + 1 if (node['regs'] or node['tags']) and node['visible'] == 'hidden': logger.error( "ERROR!: INVISIBLE %s %d %s", filename, nodeid, node['regs'][0] if node['regs'] else node['tags']) if self.print_tree or self.show: util.print_tree(tree, show_hidden=self.show)
def process_input(filename): filebase = os.path.splitext(filename)[0] basename = os.path.basename(filename) pagename = basename.split(".")[0] if basename.count("_") == 1: (appname, scrname) = pagename.split("_") else: (appname, casename, scrname) = pagename.split("_") if scrname == 'cat1': scrname = "cat" elif scrname == 'cat': pass elif scrname.startswith('cat'): scrname = "cat2" # if scrname == "cat1": # continue # if scrname == "searchret": # scrname = "list" ptfile = filebase + '.pt' if os.path.exists(ptfile) and not nopt: ptf = open(ptfile, 'rb') unpickler = pickle.Unpickler(ptf) try: pt = unpickler.load() return (scrname, pt) except: pass ptf = open(ptfile, 'wb') pickler = pickle.Pickler(ptf) #with open(filename, 'r') as f: # xmldata = f.read() tree = analyze.load_tree(filename) hidden.find_hidden_ocr(tree) hidden.mark_children_hidden_ocr(tree) if '.xml' in filename: actfile = filebase + '.txt' actname = open(actfile).read() # tree = analyze.analyze([filename], show_progress=False)[0] elif '.hier' in filename: urlfile = filebase + '.url' actname = util.url_to_actname(open(urlfile).read()) # loaded = webdriver.load(filebase) # tree = analyze.analyze_items(loaded['items']) imgfile = filebase + '.png' # treeinfo = preprocess(xmldata) # # data = "" # # actinfo = '' # if os.path.exists(actfile): # with open(actfile) as txtf: # actinfo = preprocess_txt(txtf.read()) # else: # if only_good_pt: # logger.info("skipping %s: no act", filename) # continue # else: # if missing_warn: # logger.warn("WARN: %s missing activity info" % filename) # # imginfo = '' # if os.path.exists(imgfile): # if not use_fake_ocr: # imginfo = preprocess_img(imgfile) # else: # imginfo = preprocess_img_fake(xmldata) # else: # if only_good_pt: # logger.info("skipping %s: no img", filename) # continue # else: # if missing_warn: # logger.warn("WARN: %s missing img info" % filename) # pt = {'app': appname, 'tree': treeinfo, 'act': actinfo, 'img': imginfo, #'file': filename} # if config.use_postprocess: pt = prepare_point(actname, imgfile, tree) # else: # pt = prepare_point(xmldata, open(actfile).read(), imgfile) pt['file'] = filename pt['app'] = appname pt['scr'] = appname # treeinfo = ' '.join([treeinfo, actinfo, imginfo]) pickler.dump(pt) return (scrname, pt)
def load(self, prev=False): if self.file_idx == len(self.filenames): Gtk.main_quit() return if prev: self.file_idx -= 2 filename = self.filenames[self.file_idx] (self.app, self.scr) = util.get_aux_info(filename) if self.app not in self.memory: self.memory[self.app] = {} self.set_title(filename) self.file_idx += 1 print("Loading %s" % filename) self.pngfile = os.path.splitext(filename)[0] + '.png' self.descname = os.path.splitext(filename)[0] + '.%s.txt' % self.kind starttime = time.time() self.tree = analyze.load_tree(filename) hidden.find_hidden_ocr(self.tree) hidden.mark_children_hidden_ocr(self.tree) util.print_tree(self.tree, show_hidden=self.show_hidden) if self.ml: self.get_ml_rets() else: self.load_desc() endtime = time.time() print("Load time: %.3fs" % (endtime - starttime)) self.focus_id = -1 self.colors = {} self.ptx = self.pty = 0 self.img = cairo.ImageSurface.create_from_png(self.pngfile) print('Image:', self.img.get_width(), self.img.get_height()) root_item_id = min(self.tree) root_node = self.tree[root_item_id] print('Root node:', root_node['width'], root_node['height']) self.scale = 1.0 * self.img.get_width() / config.width #self.scale = analyze.find_closest(self.scale, analyze.SCALE_RATIOS) print('Scale:', '%.3f' % self.scale, '->', '%.3f' % self.scale) self.resize(self.img.get_width(), self.img.get_height()) self.mark_depth(self.tree) for item_id in self.tree: color_r = random.random() / 2 color_g = random.random() / 2 color_b = random.random() / 2 self.colors[item_id] = (color_r, color_g, color_b) imgocr = Image.open(self.pngfile) self.imgwidth = imgocr.width self.imgheight = imgocr.height #imgocr2 = imgocr.convert("RGB").resize( # (imgocr.width * OCR_RATIO, imgocr.height * OCR_RATIO)) self.tesapi.SetImage(imgocr) self.tesapi.SetSourceResolution(config.ocr_resolution) self.dump_memory()
def load_point(no_appname, nopt, filename): points = [] labels = [] apps = [] scrs = [] filebase = os.path.splitext(filename)[0] (appname, caseid, scrname) = re.search('([a-z0-9]+)_?([0-9]+)?_([a-z0-9]+)', filename).groups() if appname is not None and appname == no_appname: return (points, labels, apps, scrs) if scrname in tags.tag['ignored_screens']: return (points, labels, apps, scrs) if not nopt: cached = load_cached_point(filebase) if cached is not None: if discard_middle_layer: tree = analyze.load_tree(filename) discard_middle(cached[0], tree) return cached tree = analyze.load_tree(filename) hidden.find_hidden_ocr(tree) hidden.mark_children_hidden_ocr(tree) if not has_label(tree): logger.info("%s: no label", filename) return (points, labels, apps, scrs) logger.debug("analyzing %s", filename) pngfile = filebase + '.png' imgdata = skimage.io.imread(pngfile, as_grey=True) imgdata = skimage.transform.resize(imgdata, (config.height, config.width), mode='constant') treeinfo = analyze.collect_treeinfo(tree) path = get_path(filebase) collector = FeatureCollector(tree, pngfile) for itemid in sorted(tree): if cannot_be_region(tree[itemid]): continue try: point = collector.prepare_point(itemid, appname, scrname, caseid, imgdata, treeinfo, path) except ValueError: logger.exception("ERROR at %s" % filename) return (points, labels, apps, scrs) if point['empty']: continue pt_tag = 'NONE' if len(tree[itemid]['regs']) > 0: firsttag = tree[itemid]['regs'][0] pt_tag = firsttag points.append(point) labels.append(pt_tag) apps.append(appname) scrs.append(scrname) cache_points(filebase, points, labels, apps, scrs) if discard_middle_layer: tree = analyze.load_tree(filename) discard_middle(points, tree) return (points, labels, apps, scrs)
def load_point(no_appname, filename): points = [] labels = [] apps = [] scrs = [] filebase = os.path.splitext(filename)[0] featurefile = filebase + '.pts' (appname, caseid, scrname) = re.search('([a-z0-9]+)_?([0-9]+)?_([a-z0-9]+)', filename).groups() if appname is not None and appname == no_appname: return (points, labels, apps, scrs) if scrname in tags.tag['ignored_screens']: return (points, labels, apps, scrs) if os.path.exists(featurefile) and not nopt: featuref = open(featurefile, 'rb') unpickler = pickle.Unpickler(featuref) count = unpickler.load() for i in range(count): try: point = unpickler.load() pt_tag = unpickler.load() appname = unpickler.load() scrname = unpickler.load() points.append(point) labels.append(pt_tag) apps.append(appname) scrs.append(scrname) except: break return (points, labels, apps, scrs) if not nopt and print_empty_pt: logger.info("analyzing %s", filename) tree = analyze.load_tree(filename) hidden.find_hidden_ocr(tree) hidden.mark_children_hidden_ocr(tree) #if '.xml' in filename: # tree = analyze.analyze([filename], show_progress=False)[0] # # if scrname.startswith('cat'): # # scrname = "cat" #else: # loaded = webdriver.load(filebase) # descs = util.load_desc(filebase) # tree = analyze.analyze_items(loaded['items'], descs=descs) if not has_label(tree): logger.info("%s: no label", filename) return (points, labels, apps, scrs) pngfile = filebase + '.png' imgdata = skimage.io.imread(pngfile, as_grey=True) imgdata = skimage.transform.resize(imgdata, (config.height, config.width), mode='constant') featuref = open(featurefile, 'wb') pickler = pickle.Pickler(featuref) pickler.dump(len(sorted(tree))) treeinfo = analyze.collect_treeinfo(tree) tesapi = get_tesapi() set_tes_image(tesapi, imgdata) for itemid in sorted(tree): try: point = prepare_point(tree, itemid, appname, scrname, caseid, imgdata, treeinfo, tesapi) except ValueError: logger.exception("ERROR at %s" % filename) return (points, labels, apps, scrs) pt_tag = 'NONE' if len(tree[itemid]['tags']) > 0: firsttag = tree[itemid]['tags'][0] #if tags.valid(scrname, firsttag): pt_tag = firsttag # firsttag = CONV.get(firsttag, firsttag) # if firsttag is not None: # if (not scrname in ONLY_CARE_LABEL or # not ONLY_CARE_LABEL[scrname] or # firsttag in ONLY_CARE_LABEL[scrname]): # pt_tag = firsttag # # if scrname in IGNORE_LABELS and pt_tag in IGNORE_LABELS[scrname]: # pt_tag = 'NONE' # cnt[pt_tag] = cnt.get(pt_tag, 0) + 1 # skimage.io.imsave("%s_%d.png" % (pt_tag, cnt[pt_tag]), point['img_thr']) for i in range(REP_COUNT.get(scrname, 1) if use_rep else 1): points.append(point) labels.append(pt_tag) apps.append(appname) scrs.append(scrname) pickler.dump(point) pickler.dump(pt_tag) pickler.dump(appname) pickler.dump(scrname) if print_points: print_point(point) return (points, labels, apps, scrs)
import tags catname = sys.argv[1] tags.load("../etc-%s" % catname) is_leaf = 'leaf' in sys.argv is_click = 'click' in sys.argv is_text = 'text' in sys.argv is_visible = 'visible' in sys.argv leaf_cnt = 0 tag_cnt = 0 scr_cnt = 0 for filename in util.collect_files("../guis-%s" % catname): tree = analyze.load_tree(filename) (appname, scrname) = util.get_aux_info(filename) if not tags.validapp(appname): continue if not tags.validscr(scrname) or scrname in tags.tag['ignored_screens']: #print('skip', scrname) continue scr_cnt += 1 for nodeid in tree: node = tree[nodeid] tagged = False for tag in node['tags']: if tags.valid(scrname, tag):