def test_history_init(redis_mock_status, redis_mock_visited, redis_mock_scores, redis_mock_traversed): status = Status(redis_mock_status, "root_path", "start_path", "end_path") history = History( status, redis_mock_visited, redis_mock_scores, redis_mock_traversed, "start_path", ) assert isinstance(history.status, Status) assert history.status == history.status assert history.redis_client_visited == redis_mock_visited assert history.redis_client_scores == redis_mock_scores assert history.redis_client_traversed == redis_mock_traversed assert history.start_path == "start_path" assert history.scores == []
def history_cls_rev( redis_mock_status, redis_mock_visited, redis_mock_scores, redis_mock_traversed ): status = Status( redis_mock_status, "Albany, New York-Mike Tyson", "Albany, New York", "Mike Tyson", ) history = History( status, redis_mock_visited, redis_mock_scores, redis_mock_traversed, "Albany, New York", ) return history
def get_task_status_by_id(self, task_id): """Get the status of the task. Args: task_id (String): The id of the task Return: status (Status): The status of the task """ logger = Logger().get() logger.debug(f"start get_task_status_by_id, task_id:{task_id}") try: # Find the task using id field = {"status"} condition = {"_id": ObjectId(task_id)} task_doc = self.__tasks_collection.find_one(condition, field) return Status(task_doc['status']) except Exception as e: logger.error(f"something wrong in get_task_status_by_id, Exception: {e}") return None
def get_task_by_id(self, task_id): """Return a Task object of the specific task Arg: task_id (String): the id of the specific task Return: task: the Task object of the specific id """ logger = Logger().get() logger.debug(f"start get_task_by_id, task_id:{task_id}") try: # Find the task using id condition = {"_id": ObjectId(task_id)} task_doc = self.__tasks_collection.find_one(condition) # Retrieve the output files and log files # Transform the dict into list of filenames output_list = [] for filename in task_doc["output_files"].keys(): output_list.append(filename) # Rebuild the Task object from the query result task = Task() task.job_id = str(task_doc["job_id"]) task.task_id = task_id task.program_name = task_doc['program_name'] task.input_file_args = task_doc['input_file_args'] task.input_text_args = task_doc['input_text_args'] task.input_flag_args = task_doc['input_flag_args'] task.output_file_args = task_doc['output_file_args'] task.output = output_list task.stdout = task_doc["stdout"] task.stderr = task_doc["stderr"] task.status = Status(task_doc["status"]) task.finished_time = task_doc["finished_time"] logger.debug(f"get_task_by_id successfully, task_id:{task_id}") return task except Exception as e: logger.error(f"something wrong in get_task_by_id, Exception: {e}")
def status_cls_edge_case_rev(redis_mock_status): return Status( redis_mock_status, "Mike Tyson-Mike Tyson", "Mike Tyson", "Mike Tyson", )
def wikipedia_cls_rev(redis_mock_status): status = Status(redis_mock_status, "root_path", "start_path", "end_path") return Wikipedia(status, "start_path", True)
def find(start_path: str, end_path: str) -> str: """ Kicks off wikipedia game based on API request received. Find kick off the same search going forward and in reverse. For both forward and reverse search: Initiates status based on API request received. Send async task to find in order to start searching. Record task id of find parent task in order to terminate sub-tasks when done. Args: start_path: Wiki racer game start path. end_path: Wiki racer game end path. Returns: results traversed path || "Pending" if not done Upon first request to find returns "Pending". Subsequent requests to find until the solution is found will return "Pending" as well. If solution is found request to find will return the traversed path and the time it took to complete in seconds. """ root_path_forward = build_root_path(start_path, end_path) root_path_backward = build_root_path(end_path, start_path) if Status.exists(status_db, root_path_forward): status = Status(status_db, root_path_forward) return ( "Pending" if status.results_pending() else f"solution is: {status.results_str()} time spent: {str(status.end_time)} seconds" ) # GOING FORWARD########################################################### # Initialize status status_forward = Status(status_db, root_path_forward, start_path, end_path) task_forward = app.send_task( "tasks.find", kwargs=dict( root_path=root_path_forward, start_path=start_path, rev_root_path=root_path_backward, ), queue="find", ) # Assign associated task id to status table status_forward.task_id = task_forward.id # GOING BACKWARD########################################################### # Initialize status status_backwards = Status(status_db, root_path_backward, end_path, start_path) task_backward = app.send_task( "tasks.find", kwargs=dict( root_path=root_path_backward, start_path=end_path, rev_root_path=root_path_forward, rev=True, ), queue="find_rev", ) # Assign associated task id to status table status_forward.task_id = task_backward.id return "Pending"
def test_exists(redis_mock_status): assert Status.exists(redis_mock_status, "root_path") is False Status(redis_mock_status, "root_path", "start_path", "end_path") assert Status.exists(redis_mock_status, "root_path") is True
def status_cls(redis_mock_status): return Status(redis_mock_status, "root_path", "start_path", "end_path")
def find(root_path: str, start_path: str, rev_root_path: str, rev=False): """Celery task that plays wiki racer game. This task only kicks off if the search is still active. Sets history: Based on search status and current page bering queried. Keeps track of visited: If a node is already visited do not visit again (prevent cycles) Upon discovery of a new page: Scrape page for new links. When new links obtained: Score links based on similarity to wiki race end path. Track game completion: When wiki game end path is found in newly discovered links end the game. If wiki page end game not found, send another task to find with: start_path/query: [highest scoring page discovered so far]. Args: root_path: Search key composed of wiki racer start page and end page. start_path: Page being queried. rev_root_path: The path reversed of this one. rev: are we going in reverse? """ # Weird edge cases: if not root_path or not start_path or not rev_root_path: raise ValueError( f"You need to specify root_path, start_path, and rev_root_path") status = Status(status_db, root_path) # Dont start find if task is done if not status.is_active(): return # Weird edge cases: if status.start_path == status.end_path: result = [start_path] status.finalize_results(result) status_rev = Status(status_db, rev_root_path) status_rev.finalize_results(result) return # Populates history history = History( status, visited_db, scores_db, traversed_db, start_path, ) if start_path == status.start_path: history.traversed_path = [status.start_path] if not history.is_visited(start_path): history.add_to_visited(start_path) # links from wikipedia all_links = Wikipedia(status, start_path, rev).scrape_page() # return if found in links on current page before bothering to score them if found_in_page(status, history, all_links, rev_root_path): return # score found links nlp_scores = NLP(status, history).score_links(all_links) # set their new traversed paths history.bulk_add_to_new_links_traversed_paths(all_links) # add them onto scores set history.bulk_add_to_scores(nlp_scores) # return if found in the intersection between forward and reverse search if found_in_intersect(status, history, rev_root_path): return # Dont kick off next find find if task is done or no more pages left to search if not status.is_active() or len(history.scores) < 1: return # kick off another find task with highest scoring page found so far app.send_task( "tasks.find", kwargs=dict( root_path=root_path, start_path=history.next_highest_score(), rev_root_path=rev_root_path, rev=rev, ), queue="find_rev" if rev else "find", )