コード例 #1
0
ファイル: gui.py プロジェクト: teaolivia/webtraversallibrary
def wtl_thread(cli_args):
    workflow = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)

    workflow.classifiers.add(wtl.ActiveElementFilter(action=wtl.actions.Click, highlight=True))

    workflow.run()
    workflow.quit()
コード例 #2
0
def test_mhtml_export():
    OUTPUT_DIR = Path("./mhtml/")

    with Xvfb():
        workflow = wtl.Workflow(
            url=TESTURL,
            policy=test_policy,
            config=wtl.Config([
                "default", "browser.enable_mhtml=True",
                "scraping.save_mhtml=True", "debug.save=True"
            ]),
            output=OUTPUT_DIR,
        )
        workflow.run()

    assert (OUTPUT_DIR / "0" / "page.mhtml").exists()
    assert (OUTPUT_DIR / "1" / "page.mhtml").exists()
    assert os.stat(OUTPUT_DIR / "0" / "page.mhtml").st_size
    assert os.stat(OUTPUT_DIR / "1" / "page.mhtml").st_size

    shutil.rmtree(OUTPUT_DIR)
コード例 #3
0

def _start_btn(elements, _):
    return [e for e in elements if e.metadata["id"] == "sync-task-cover" and "block" in e.metadata["display"]]


def _tile_div(elements, _):
    return [
        e
        for e in elements
        if e.metadata["tag"] == "span" and e.metadata["id"].startswith("ttt") and e.tag.parent.name == "div"
    ]


if __name__ == "__main__":
    cli_args = parse_cli_args()

    workflow = wtl.Workflow(
        config=wtl.Config(cli_args.config),
        policy=policy,
        url="https://stanfordnlp.github.io/miniwob-plusplus/html/miniwob/tic-tac-toe.html",
        output=cli_args.output,
    )

    workflow.classifiers.add(wtl.ActiveElementFilter())
    workflow.classifiers.add(wtl.ActiveElementFilter(name="start", callback=_start_btn, action=Click))
    workflow.classifiers.add(wtl.ActiveElementFilter(name="tile", callback=_tile_div, action=Click))

    workflow.run()
    workflow.quit()
コード例 #4
0
    # After seven deletions, start over from step 3
    if workflow.loop_idx == 7:
        return wtl.actions.Revert(3)

    # Randomly pick one of the deleting actions
    return [
        random.choice(view.actions.by_type(wtl.actions.Remove)),
        wtl.actions.Wait(0.25),
        wtl.actions.Clear(viewport=False),
        wtl.actions.WaitForUser(),
    ]


if __name__ == "__main__":
    cli_args = parse_cli_args()

    wf = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)

    wf.classifiers.add(
        wtl.ElementClassifier(
            name="dementor",
            enabled=True,
            highlight=False,
            action=wtl.actions.Remove,
            callback=lambda e, _: e,  # Will label _all_ elements removable
        )
    )

    wf.run()
    wf.quit()
コード例 #5
0
from .util import parse_cli_args


@wtl.single_tab
def policy(workflow: wtl.Workflow, view: wtl.View) -> wtl.Action:
    assert workflow.duplicate_loop_idx == workflow.loop_idx

    # With some small probabilty, refresh instead of clicking.
    return choice(view.actions.by_type(
        Click)) if random() < 0.95 else view.actions.by_type(Refresh).unique()


def set_duplicate_loop_idx(workflow: wtl.Workflow):
    workflow.duplicate_loop_idx = workflow.loop_idx


if __name__ == "__main__":
    cli_args = parse_cli_args()

    wf = wtl.Workflow(config=wtl.Config(cli_args.config),
                      policy=policy,
                      url=cli_args.url,
                      output=cli_args.output)

    wf.classifiers.add(wtl.ActiveElementFilter(action=Click))

    wf.postload_callbacks.append(partial(set_duplicate_loop_idx, wf))

    wf.run()
    wf.quit()
コード例 #6
0
goal = N_STEPS(2)


@wtl.single_tab
def policy(workflow: wtl.Workflow, view: wtl.View) -> Optional[wtl.Action]:
    if len(workflow.history) == 1:
        images_by_size = sorted(
            view.snapshot.elements.by_score("image"), key=lambda element: element.bounds.area, reverse=True
        )
        return Click(images_by_size[0])

    print("\n", view.snapshot.page_metadata["url"] != workflow.history[0].snapshot.page_metadata["url"], "\n")
    return None


def image_classifier_func(elements, _):
    return [elem for elem in elements if elem.metadata["tag"] == "img"]


if __name__ == "__main__":
    cli_args = parse_cli_args()

    wf = wtl.Workflow(
        config=wtl.Config(cli_args.config), policy=policy, goal=goal, url=cli_args.url, output=cli_args.output
    )

    wf.classifiers.add(wtl.ElementClassifier(name="image", highlight=True, callback=image_classifier_func))

    wf.run()
    wf.quit()