Ejemplo n.º 1
0
def wtl_thread(cli_args):
    workflow = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)

    workflow.classifiers.add(wtl.ActiveElementFilter(action=wtl.actions.Click, highlight=True))

    workflow.run()
    workflow.quit()
Ejemplo n.º 2
0
def test_complex():
    @wtl.single_tab
    def policy(w, view):
        menu_actions = view.actions.by_type(Click).by_score("menu")
        w.metadata["clicks"] = w.metadata["clicks"] + 1
        return random.choice(menu_actions)

    def menu_classifier_func(elements, _):
        return [
            elem for elem in elements if elem.location.x < 10
            and elem.location.y < 200 and elem.metadata["tag"] == "a"
        ]

    config = Config.default(["headless", "desktop"])

    workflow = wtl.Workflow(url=TESTURL,
                            config=config,
                            policy=policy,
                            goal=wtl.goals.N_STEPS(3))
    workflow.metadata["clicks"] = 0
    workflow.classifiers.add(wtl.ActiveElementFilter(action=Click))
    workflow.classifiers.add(
        wtl.ElementClassifier(name="menu",
                              action=Click,
                              subset="is_active",
                              highlight=True,
                              callback=menu_classifier_func))

    workflow.run()
    assert workflow.success
    assert workflow.metadata["clicks"] == 3
    workflow.quit()
Ejemplo n.º 3
0
def test_abort():
    # Abort a workflow

    config = Config.default(["headless"])

    workflow = wtl.Workflow(
        url=TESTURL,
        config=config,
        policy=lambda *_, **__: {wtl.Workflow.SINGLE_TAB: Abort()},
        goal=wtl.goals.N_STEPS(3),
    )

    workflow.run()
    assert not workflow.success
    workflow.quit()
Ejemplo n.º 4
0
def test_multiple():
    config = Config.default(["headless"])

    @wtl.multi_tab_coroutine
    def policy():
        yield
        _, views = yield {}
        urls = set(v.snapshot.page_metadata["url"] for v in views.values())
        print([v.snapshot.page_metadata["url"] for v in views.values()])
        assert len(urls) == 1

        _, views = yield {
            "1": wtl.actions.Click(wtl.Selector(".sidenav a:nth-of-type(2)")),
            "2": None,
            "3": wtl.actions.Click(wtl.Selector(".sidenav a:nth-of-type(3)")),
            "4": None,
        }
        urls = set(v.snapshot.page_metadata["url"] for v in views.values())
        print([v.snapshot.page_metadata["url"] for v in views.values()])
        assert len(urls) == 3

        _, views = yield {
            "4": wtl.actions.Click(wtl.Selector(".sidenav a:nth-of-type(4)"))
        }
        urls = set(v.snapshot.page_metadata["url"] for v in views.values())
        assert len(urls) == 4

        yield {}

    workflow = wtl.Workflow(url={
        "A": {
            "1": TESTURL,
            "2": TESTURL
        },
        "C": {
            "3": TESTURL,
            "4": TESTURL
        }
    },
                            config=config,
                            policy=policy)

    workflow.run()
    assert workflow.loop_idx == 4
    workflow.quit()
Ejemplo n.º 5
0
def test_simple(browser):
    # Just navigate to a single tab, do nothing.

    config = Config.default(["headless", browser])

    workflow = wtl.Workflow(
        url=TESTURL,
        config=config,
        policy=lambda *_, **__: {wtl.Workflow.SINGLE_TAB: Wait(1)},
        goal=wtl.goals.N_STEPS(3),
    )

    workflow.run()
    assert workflow.success

    workflow.quit()
    with pytest.raises(Error):
        workflow.run()
def test_mhtml_export():
    OUTPUT_DIR = Path("./mhtml/")

    with Xvfb():
        workflow = wtl.Workflow(
            url=TESTURL,
            policy=test_policy,
            config=wtl.Config([
                "default", "browser.enable_mhtml=True",
                "scraping.save_mhtml=True", "debug.save=True"
            ]),
            output=OUTPUT_DIR,
        )
        workflow.run()

    assert (OUTPUT_DIR / "0" / "page.mhtml").exists()
    assert (OUTPUT_DIR / "1" / "page.mhtml").exists()
    assert os.stat(OUTPUT_DIR / "0" / "page.mhtml").st_size
    assert os.stat(OUTPUT_DIR / "1" / "page.mhtml").st_size

    shutil.rmtree(OUTPUT_DIR)
Ejemplo n.º 7
0

def _start_btn(elements, _):
    return [e for e in elements if e.metadata["id"] == "sync-task-cover" and "block" in e.metadata["display"]]


def _tile_div(elements, _):
    return [
        e
        for e in elements
        if e.metadata["tag"] == "span" and e.metadata["id"].startswith("ttt") and e.tag.parent.name == "div"
    ]


if __name__ == "__main__":
    cli_args = parse_cli_args()

    workflow = wtl.Workflow(
        config=wtl.Config(cli_args.config),
        policy=policy,
        url="https://stanfordnlp.github.io/miniwob-plusplus/html/miniwob/tic-tac-toe.html",
        output=cli_args.output,
    )

    workflow.classifiers.add(wtl.ActiveElementFilter())
    workflow.classifiers.add(wtl.ActiveElementFilter(name="start", callback=_start_btn, action=Click))
    workflow.classifiers.add(wtl.ActiveElementFilter(name="tile", callback=_tile_div, action=Click))

    workflow.run()
    workflow.quit()
Ejemplo n.º 8
0
    # After seven deletions, start over from step 3
    if workflow.loop_idx == 7:
        return wtl.actions.Revert(3)

    # Randomly pick one of the deleting actions
    return [
        random.choice(view.actions.by_type(wtl.actions.Remove)),
        wtl.actions.Wait(0.25),
        wtl.actions.Clear(viewport=False),
        wtl.actions.WaitForUser(),
    ]


if __name__ == "__main__":
    cli_args = parse_cli_args()

    wf = wtl.Workflow(config=wtl.Config(cli_args.config), policy=policy, url=cli_args.url, output=cli_args.output)

    wf.classifiers.add(
        wtl.ElementClassifier(
            name="dementor",
            enabled=True,
            highlight=False,
            action=wtl.actions.Remove,
            callback=lambda e, _: e,  # Will label _all_ elements removable
        )
    )

    wf.run()
    wf.quit()
Ejemplo n.º 9
0
    # For now, we consider all input fields where the type attribute has a specific value.
    return [
        e for e in elements
        if e.metadata["tag"] == "input" and e.metadata["type"] in ("text",
                                                                   "email",
                                                                   "password")
    ]


if __name__ == "__main__":
    cli_args = parse_cli_args()

    workflow = wtl.Workflow(
        config=wtl.Config(cli_args.config),
        policy=policy,
        goal=goal,
        url="https://www.getharvest.com/signup",
        output=cli_args.output,
    )

    # We just need a text field classifier, no need to consider what's active (all of them should be).
    workflow.classifiers.add(
        wtl.ElementClassifier(name="textfield",
                              action=FillText,
                              callback=text_field_classifier_func,
                              highlight=True))

    workflow.run()
    workflow.quit()

    print("Workflow successful?", workflow.success)
                wtl.actions.Navigate(search_url),
                Click(search_results[i + 1])
            ]
            i += 1
        except IndexError:
            print("Search result exhausted!!")
            break

        yield None


if __name__ == "__main__":
    cli_args = parse_cli_args()

    wf = wtl.Workflow(
        config=wtl.Config(cli_args.config),
        policy=policy,
        url="https://en.wikipedia.org/wiki/Special:Random",
        output=cli_args.output,
    )

    wf.classifiers.add(wtl.ActiveElementFilter(action=Click))

    wf.classifiers.add(
        wtl.ElementClassifier(name="textfield",
                              action=wtl.actions.FillText,
                              highlight=True))

    wf.run()
    wf.quit()
Ejemplo n.º 11
0
def policy(_, view: wtl.View) -> Dict[wtl.View, wtl.Action]:
    return {
        v: choice(v.actions.by_type(wtl.actions.Click))
        for v in view.values()
    }


if __name__ == "__main__":
    cli_args = parse_cli_args()

    workflow = wtl.Workflow(
        config=wtl.Config(cli_args.config),
        policy=policy,
        url={
            "first": {
                "A": "www.uppsalahandkraft.se",
                "B": "https://www.uppsalamodemassa.se"
            },
            "second": {
                "C": "shop.biskopsgarden.com"
            },
        },
        output=cli_args.output,
    )

    workflow.classifiers.add(wtl.ActiveElementFilter(action=wtl.actions.Click))

    workflow.run()
    workflow.quit()
Ejemplo n.º 12
0
def test_workflow(browser):
    config = wtl.Config.default(["headless", f"browser.browser={browser}"])
    workflow = wtl.Workflow(url="about:blank",
                            config=config,
                            policy=wtl.policies.DUMMY)
    assert workflow
Ejemplo n.º 13
0
import webtraversallibrary as wtl
from webtraversallibrary.actions import Clear, Click, Highlight

from .util import parse_cli_args, start_server


@wtl.single_tab_coroutine
def policy():
    # Highlight some titles, and then click a menu item.
    # Once the generator is exhausted, workflow will interpret StopIteration as cancelling the tabs.

    yield
    for i in range(1, 6):
        yield [
            Clear(),
            Highlight(target=wtl.Selector(f"h2:nth-of-type({i}) > a"))
        ]
    yield Click(wtl.Selector("h2:nth-of-type(1) > a"))


if __name__ == "__main__":
    cli_args = parse_cli_args()

    workflow = wtl.Workflow(config=wtl.Config(cli_args.config),
                            policy=policy,
                            url=start_server() + "/blog",
                            output=cli_args.output)

    workflow.run()
    workflow.quit()