def wtl_thread(cli_args): workflow = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output) workflow.classifiers.add(wtl.ActiveElementFilter(action=wtl.actions.Click, highlight=True)) workflow.run() workflow.quit()
def test_mhtml_export(): OUTPUT_DIR = Path("./mhtml/") with Xvfb(): workflow = wtl.Workflow( url=TESTURL, policy=test_policy, config=wtl.Config([ "default", "browser.enable_mhtml=True", "scraping.save_mhtml=True", "debug.save=True" ]), output=OUTPUT_DIR, ) workflow.run() assert (OUTPUT_DIR / "0" / "page.mhtml").exists() assert (OUTPUT_DIR / "1" / "page.mhtml").exists() assert os.stat(OUTPUT_DIR / "0" / "page.mhtml").st_size assert os.stat(OUTPUT_DIR / "1" / "page.mhtml").st_size shutil.rmtree(OUTPUT_DIR)
def _start_btn(elements, _): return [e for e in elements if e.metadata["id"] == "sync-task-cover" and "block" in e.metadata["display"]] def _tile_div(elements, _): return [ e for e in elements if e.metadata["tag"] == "span" and e.metadata["id"].startswith("ttt") and e.tag.parent.name == "div" ] if __name__ == "__main__": cli_args = parse_cli_args() workflow = wtl.Workflow( config=wtl.Config(cli_args.config), policy=policy, url="https://stanfordnlp.github.io/miniwob-plusplus/html/miniwob/tic-tac-toe.html", output=cli_args.output, ) workflow.classifiers.add(wtl.ActiveElementFilter()) workflow.classifiers.add(wtl.ActiveElementFilter(name="start", callback=_start_btn, action=Click)) workflow.classifiers.add(wtl.ActiveElementFilter(name="tile", callback=_tile_div, action=Click)) workflow.run() workflow.quit()
# After seven deletions, start over from step 3 if workflow.loop_idx == 7: return wtl.actions.Revert(3) # Randomly pick one of the deleting actions return [ random.choice(view.actions.by_type(wtl.actions.Remove)), wtl.actions.Wait(0.25), wtl.actions.Clear(viewport=False), wtl.actions.WaitForUser(), ] if __name__ == "__main__": cli_args = parse_cli_args() wf = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output) wf.classifiers.add( wtl.ElementClassifier( name="dementor", enabled=True, highlight=False, action=wtl.actions.Remove, callback=lambda e, _: e, # Will label _all_ elements removable ) ) wf.run() wf.quit()
from .util import parse_cli_args @wtl.single_tab def policy(workflow: wtl.Workflow, view: wtl.View) -> wtl.Action: assert workflow.duplicate_loop_idx == workflow.loop_idx # With some small probabilty, refresh instead of clicking. return choice(view.actions.by_type( Click)) if random() < 0.95 else view.actions.by_type(Refresh).unique() def set_duplicate_loop_idx(workflow: wtl.Workflow): workflow.duplicate_loop_idx = workflow.loop_idx if __name__ == "__main__": cli_args = parse_cli_args() wf = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output) wf.classifiers.add(wtl.ActiveElementFilter(action=Click)) wf.postload_callbacks.append(partial(set_duplicate_loop_idx, wf)) wf.run() wf.quit()
goal = N_STEPS(2) @wtl.single_tab def policy(workflow: wtl.Workflow, view: wtl.View) -> Optional[wtl.Action]: if len(workflow.history) == 1: images_by_size = sorted( view.snapshot.elements.by_score("image"), key=lambda element: element.bounds.area, reverse=True ) return Click(images_by_size[0]) print("\n", view.snapshot.page_metadata["url"] != workflow.history[0].snapshot.page_metadata["url"], "\n") return None def image_classifier_func(elements, _): return [elem for elem in elements if elem.metadata["tag"] == "img"] if __name__ == "__main__": cli_args = parse_cli_args() wf = wtl.Workflow( config=wtl.Config(cli_args.config), policy=policy, goal=goal, url=cli_args.url, output=cli_args.output ) wf.classifiers.add(wtl.ElementClassifier(name="image", highlight=True, callback=image_classifier_func)) wf.run() wf.quit()