Ejemplo n.º 1
0
def found_in_intersect(status: Status, history: History,
                       rev_root_path: str) -> bool:
    """Whether wiki race end path was found in newly discovered links.

        If the  wiki race end path was discovered through a page both searches found (an intersection)
        Results traversed path: path of current search + (path of reverse search).reversed()
        This is because one computes forward based on links and the other backwards based on links_to
        Finalize results: by sending the finalized results traversed path to status of the search & reverse search

        Args:
            status: Status of current search.
            history: History of current search.
            rev_root_path: The root_path of the same search going in reverse.
        """
    status_rev = Status(status_db, rev_root_path)
    intersection = history.traversed_intersection(status.root_path,
                                                  rev_root_path)
    if intersection:
        path_to_goal = history.intersection_path(status.root_path,
                                                 rev_root_path)
        status.finalize_results(path_to_goal)
        path_to_goal_rev = path_to_goal.copy()
        path_to_goal_rev.reverse()
        # also set results in the reverse search db
        status_rev.finalize_results(path_to_goal_rev)
        logger.info(
            f"Intersection End link found!! path traversed and time to complete: {path_to_goal} or {path_to_goal_rev}"
        )
        return True
    return False
Ejemplo n.º 2
0
def history_cls_rev(redis_mock_status, redis_mock_visited, redis_mock_scores,
                    redis_mock_traversed):
    status = Status(redis_mock_status, "path_root", "end_path", "start_path")
    history = History(
        status,
        redis_mock_visited,
        redis_mock_scores,
        redis_mock_traversed,
        "end_path",
    )
    return history
Ejemplo n.º 3
0
def nlp_cls(redis_mock_status, redis_mock_visited, redis_mock_scores,
            redis_mock_traversed):
    status = Status(redis_mock_status, "root_path", "start_path", "end_path")
    history = History(
        status,
        redis_mock_visited,
        redis_mock_scores,
        redis_mock_traversed,
        "start_path",
    )
    nlp_ini = NLP(status, history)
    return nlp_ini
Ejemplo n.º 4
0
def history_cls(
    redis_mock_status, redis_mock_visited, redis_mock_scores, redis_mock_traversed
):
    status = Status(
        redis_mock_status,
        "Mike Tyson-Albany, New York",
        "Mike Tyson",
        "Albany, New York",
    )
    history = History(
        status,
        redis_mock_visited,
        redis_mock_scores,
        redis_mock_traversed,
        "Mike Tyson",
    )
    return history
Ejemplo n.º 5
0
def test_history_init(redis_mock_status, redis_mock_visited, redis_mock_scores,
                      redis_mock_traversed):
    status = Status(redis_mock_status, "root_path", "start_path", "end_path")
    history = History(
        status,
        redis_mock_visited,
        redis_mock_scores,
        redis_mock_traversed,
        "start_path",
    )
    assert isinstance(history.status, Status)
    assert history.status == history.status
    assert history.redis_client_visited == redis_mock_visited
    assert history.redis_client_scores == redis_mock_scores
    assert history.redis_client_traversed == redis_mock_traversed
    assert history.start_path == "start_path"
    assert history.scores == []
Ejemplo n.º 6
0
def find(root_path: str, start_path: str, rev_root_path: str, rev=False):
    """Celery task that plays wiki racer game.

    This task only kicks off if the search is still active.
    Sets history: Based on search status and current page bering queried.
    Keeps track of visited: If a node is already visited do not visit again (prevent cycles)
    Upon discovery of a new page: Scrape page for new links.
    When new links obtained: Score links based on similarity to wiki race end path.
    Track game completion: When wiki game end path is found in newly discovered links end the game.
    If wiki page end game not found, send another task to find with:
        start_path/query: [highest scoring page discovered so far].


    Args:
        root_path: Search key composed of wiki racer start page and end page.
        start_path: Page being queried.
        rev_root_path: The path reversed of this one.
        rev: are we going in reverse?
    """
    # Weird edge cases:
    if not root_path or not start_path or not rev_root_path:
        raise ValueError(
            f"You need to specify root_path, start_path, and rev_root_path")

    status = Status(status_db, root_path)

    # Dont start find if task is done
    if not status.is_active():
        return

    # Weird edge cases:
    if status.start_path == status.end_path:
        result = [start_path]
        status.finalize_results(result)
        status_rev = Status(status_db, rev_root_path)
        status_rev.finalize_results(result)
        return

    # Populates history
    history = History(
        status,
        visited_db,
        scores_db,
        traversed_db,
        start_path,
    )

    if start_path == status.start_path:
        history.traversed_path = [status.start_path]

    if not history.is_visited(start_path):
        history.add_to_visited(start_path)

        # links from wikipedia
        all_links = Wikipedia(status, start_path, rev).scrape_page()

        # return if found in links on current page before bothering to score them
        if found_in_page(status, history, all_links, rev_root_path):
            return

        # score found links
        nlp_scores = NLP(status, history).score_links(all_links)

        # set their new traversed paths
        history.bulk_add_to_new_links_traversed_paths(all_links)

        # add them onto scores set
        history.bulk_add_to_scores(nlp_scores)

        # return if found in the intersection between forward and reverse search
        if found_in_intersect(status, history, rev_root_path):
            return

    # Dont kick off next find find if task is done or no more pages left to search
    if not status.is_active() or len(history.scores) < 1:
        return

    # kick off another find task with highest scoring page found so far
    app.send_task(
        "tasks.find",
        kwargs=dict(
            root_path=root_path,
            start_path=history.next_highest_score(),
            rev_root_path=rev_root_path,
            rev=rev,
        ),
        queue="find_rev" if rev else "find",
    )