def get_task_run(self) -> "TaskRun": """Return the TaskRun this agent is working within""" if self._task_run is None: from mephisto.data_model.task_run import TaskRun self._task_run = TaskRun.get(self.db, self.task_run_id) return self._task_run
def get_units_for_run_id(self, run_id: str) -> List[Unit]: """ Return a list of all Units in a terminal completed state from the task run with the given run_id """ task_run = TaskRun.get(self.db, run_id) return self._get_units_for_task_runs([task_run])
def setUp(self): self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") assert self.DB_CLASS is not None, "Did not specify db to use" self.db = self.DB_CLASS(database_path) self.task_run_id = get_test_task_run(self.db) self.task_run = TaskRun.get(self.db, self.task_run_id)
def get_task_run(self) -> TaskRun: """ Return the task run that this assignment is part of """ if self.__task_run is None: self.__task_run = TaskRun.get(self.db, self.task_run_id) return self.__task_run
def setUp(self): self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") assert self.DB_CLASS is not None, "Did not specify db to use" self.db = self.DB_CLASS(database_path) self.task_id = self.db.new_task("test_mock", MockBlueprint.BLUEPRINT_TYPE) self.task_run_id = get_test_task_run(self.db) self.task_run = TaskRun.get(self.db, self.task_run_id) self.live_run = None architect_config = OmegaConf.structured( MephistoConfig(architect=MockArchitectArgs( should_run_server=True))) self.architect = MockArchitect(self.db, architect_config, EMPTY_STATE, self.task_run, self.data_dir) self.architect.prepare() self.architect.deploy() self.urls = self.architect._get_socket_urls() # FIXME self.url = self.urls[0] self.provider = MockProvider(self.db) self.provider.setup_resources_for_task_run(self.task_run, self.task_run.args, EMPTY_STATE, self.url) self.launcher = TaskLauncher(self.db, self.task_run, self.get_mock_assignment_data_array()) self.launcher.create_assignments() self.launcher.launch_units(self.url) self.client_io = ClientIOHandler(self.db) self.worker_pool = WorkerPool(self.db)
def test_task_run(self) -> None: """Test creation and querying of task_runs""" assert self.db is not None, "No db initialized" db: MephistoDB = self.db task_name, task_id = get_test_task(db) requester_name, requester_id = get_test_requester(db) # Check creation and retrieval of a task_run init_params = json.dumps(OmegaConf.to_yaml(TaskRunArgs.get_mock_params())) task_run_id = db.new_task_run( task_id, requester_id, init_params, "mock", "mock" ) self.assertIsNotNone(task_run_id) self.assertTrue(isinstance(task_run_id, str)) task_run_row = db.get_task_run(task_run_id) self.assertEqual(task_run_row["init_params"], init_params) task_run = TaskRun.get(db, task_run_id) self.assertEqual(task_run.task_id, task_id) # Check finding for task_runs task_runs = db.find_task_runs() self.assertEqual(len(task_runs), 1) self.assertTrue(isinstance(task_runs[0], TaskRun)) self.assertEqual(task_runs[0].db_id, task_run_id) self.assertEqual(task_runs[0].task_id, task_id) self.assertEqual(task_runs[0].requester_id, requester_id) # Check finding for specific task_runs task_runs = db.find_task_runs(task_id=task_id) self.assertEqual(len(task_runs), 1) self.assertTrue(isinstance(task_runs[0], TaskRun)) self.assertEqual(task_runs[0].db_id, task_run_id) self.assertEqual(task_runs[0].task_id, task_id) self.assertEqual(task_runs[0].requester_id, requester_id) task_runs = db.find_task_runs(requester_id=requester_id) self.assertEqual(len(task_runs), 1) self.assertTrue(isinstance(task_runs[0], TaskRun)) self.assertEqual(task_runs[0].db_id, task_run_id) self.assertEqual(task_runs[0].task_id, task_id) self.assertEqual(task_runs[0].requester_id, requester_id) task_runs = db.find_task_runs(task_id=self.get_fake_id("TaskRun")) self.assertEqual(len(task_runs), 0) task_runs = db.find_task_runs(is_completed=True) self.assertEqual(len(task_runs), 0) # Test updating the completion status, requery db.update_task_run(task_run_id, True) task_runs = db.find_task_runs(is_completed=True) self.assertEqual(len(task_runs), 1) self.assertTrue(isinstance(task_runs[0], TaskRun)) self.assertEqual(task_runs[0].db_id, task_run_id)
def get_task_run(self) -> "TaskRun": """Return the TaskRun this agent is working within""" if self._task_run is None: if self._unit is not None: self._task_run = self._unit.get_task_run() elif self._assignment is not None: self._task_run = self._assignment.get_task_run() else: from mephisto.data_model.task_run import TaskRun self._task_run = TaskRun.get(self.db, self.task_run_id) return self._task_run
def setUp(self) -> None: """ Setup should put together any requirements for starting the database for a test. """ self.data_dir = tempfile.mkdtemp() self.build_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path) # TODO(#97) we need to actually pull the task type from the Blueprint self.task_run = TaskRun.get(self.db, get_test_task_run(self.db)) # TODO(#97) create a mock agent with the given task type? self.TaskRunnerClass = self.BlueprintClass.TaskRunnerClass self.AgentStateClass = self.BlueprintClass.AgentStateClass self.TaskBuilderClass = self.BlueprintClass.TaskBuilderClass
def get_reviewable_task_runs(): """ Find reviewable task runs by querying for all reviewable tasks and getting their runs """ db = app.extensions["db"] units = db.find_units(status=AssignmentState.COMPLETED) reviewable_count = len(units) task_run_ids = set( [u.get_assignment().get_task_run().db_id for u in units]) task_runs = [TaskRun.get(db, db_id) for db_id in task_run_ids] dict_tasks = [t.to_dict() for t in task_runs] # TODO(OWN) maybe include warning for auto approve date once that's tracked return jsonify({ "task_runs": dict_tasks, "total_reviewable": reviewable_count })
def setUp(self) -> None: """ Setup should put together any requirements for starting the database for a test. """ try: _ = self.ArchitectClass except: raise unittest.SkipTest("Skipping test as no ArchitectClass set") if not self.warned_about_setup: print( "Architect tests may require using an account with the server provider " "in order to function properly. Make sure these are configured before testing." ) self.warned_about_setup = True self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path) self.build_dir = tempfile.mkdtemp() self.task_run = TaskRun.get(self.db, get_test_task_run(self.db)) builder = MockTaskBuilder(self.task_run, OmegaConf.create({})) builder.build_in_dir(self.build_dir)
def test_assignment_fails(self) -> None: """Ensure assignments fail to be created or loaded under failure conditions""" assert self.db is not None, "No db initialized" db: MephistoDB = self.db task_run_id = get_test_task_run(db) task_run = TaskRun.get(db, task_run_id) # Can't create task run with invalid ids with self.assertRaises(EntryDoesNotExistException): assignment_id = db.new_assignment( task_run.task_id, self.get_fake_id("TaskRun"), task_run.requester_id, task_run.task_type, task_run.provider_type, task_run.sandbox, ) # Ensure no assignments were created assignments = db.find_assignments() self.assertEqual(len(assignments), 0)
def test_assignment(self) -> None: """Test creation and querying of assignments""" assert self.db is not None, "No db initialized" db: MephistoDB = self.db task_run_id = get_test_task_run(db) task_run = TaskRun.get(db, task_run_id) # Check creation and retrieval of an assignment assignment_id = db.new_assignment( task_run.task_id, task_run_id, task_run.requester_id, task_run.task_type, task_run.provider_type, task_run.sandbox, ) self.assertIsNotNone(assignment_id) self.assertTrue(isinstance(assignment_id, str)) assignment_row = db.get_assignment(assignment_id) self.assertEqual(assignment_row["task_run_id"], task_run_id) assignment = Assignment.get(db, assignment_id) self.assertEqual(assignment.task_run_id, task_run_id) # Check finding for assignments assignments = db.find_assignments() self.assertEqual(len(assignments), 1) self.assertTrue(isinstance(assignments[0], Assignment)) self.assertEqual(assignments[0].db_id, assignment_id) self.assertEqual(assignments[0].task_run_id, task_run_id) # Check finding for specific assignments assignments = db.find_assignments(task_run_id=task_run_id) self.assertEqual(len(assignments), 1) self.assertTrue(isinstance(assignments[0], Assignment)) self.assertEqual(assignments[0].db_id, assignment_id) self.assertEqual(assignments[0].task_run_id, task_run_id) assignments = db.find_assignments(task_run_id=self.get_fake_id("Assignment")) self.assertEqual(len(assignments), 0)
def main(): task_run_id = input("Please enter the task_run_id you'd like to check: ") db = LocalMephistoDB() task_run = TaskRun.get(db, task_run_id) requester = task_run.get_requester() if not isinstance(requester, MTurkRequester): print( "Must be checking a task launched on MTurk, this one uses the following requester:" ) print(requester) exit(0) turk_db = db.get_datastore_for_provider("mturk") hits = turk_db.get_unassigned_hit_ids(task_run_id) print(f"Found the following HIT ids unassigned: {hits}") # print all of the HITs found above from mephisto.abstractions.providers.mturk.mturk_utils import get_hit for hit_id in hits: hit_info = get_hit(requester._get_client(requester._requester_name), hits[0]) print(f"MTurk HIT data for {hit_id}:\n{hit_info}\n")
def test_onboarding_agents(self) -> None: """Ensure that the db can create and manipulate onboarding agents""" assert self.db is not None, "No db initialized" db: MephistoDB = self.db task_run_id = get_test_task_run(db) task_run = TaskRun.get(db, task_run_id) task = task_run.get_task() worker_name, worker_id = get_test_worker(db) onboarding_agent_id = db.new_onboarding_agent( worker_id, task.db_id, task_run_id, "mock" ) self.assertIsNotNone(onboarding_agent_id) onboarding_agent = OnboardingAgent.get(db, onboarding_agent_id) self.assertIsInstance(onboarding_agent, OnboardingAgent) found_agents = db.find_onboarding_agents(worker_id=worker_id) self.assertEqual(len(found_agents), 1) self.assertIsInstance(found_agents[0], OnboardingAgent) found_agent = found_agents[0] self.assertEqual(found_agent.db_id, onboarding_agent_id) self.assertEqual(found_agent.get_status(), AgentState.STATUS_NONE)
def get_submitted_data(): try: task_run_ids = request.args.getlist("task_run_id") task_names = request.args.getlist("task_name") assignment_ids = request.args.getlist("assignment_id") unit_ids = request.args.getlist("unit_ids") statuses = request.args.getlist("status") db = app.extensions["db"] units = [] assignments = [] assert len( task_names) == 0, "Searching via task names not yet supported" task_runs = [ TaskRun.get(db, task_run_id) for task_run_id in task_run_ids ] for task_run in task_runs: assignments += task_run.get_assignments() assignments += [ Assignment.get(db, assignment_id) for assignment_id in assignment_ids ] if len(statuses) == 0: statuses = [ AssignmentState.COMPLETED, AssignmentState.ACCEPTED, AssignmentState.REJECTED, ] filtered_assignments = [ a for a in assignments if a.get_status() in statuses ] for assignment in assignments: units += assignment.get_units() units += [Unit.get(db, unit_id) for unit_id in unit_ids] all_unit_data = [] for unit in units: unit_data = { "assignment_id": unit.assignment_id, "task_run_id": unit.task_run_id, "status": unit.db_status, "unit_id": unit.db_id, "worker_id": unit.worker_id, "data": None, } agent = unit.get_assigned_agent() if agent is not None: unit_data["data"] = agent.state.get_data() unit_data["worker_id"] = agent.worker_id all_unit_data.append(unit_data) print(all_unit_data) return jsonify({"success": True, "units": all_unit_data}) except Exception as e: import traceback traceback.print_exc() return jsonify({"success": False, "msg": str(e)})
def launch_task_run_or_die( self, run_config: DictConfig, shared_state: Optional[SharedTaskState] = None) -> str: """ Parse the given arguments and launch a job. """ set_mephisto_log_level(level=run_config.get("log_level", "info")) requester, provider_type = self._get_requester_and_provider_from_config( run_config) # Next get the abstraction classes, and run validation # before anything is actually created in the database blueprint_type = run_config.blueprint._blueprint_type architect_type = run_config.architect._architect_type BlueprintClass = get_blueprint_from_type(blueprint_type) ArchitectClass = get_architect_from_type(architect_type) CrowdProviderClass = get_crowd_provider_from_type(provider_type) if shared_state is None: shared_state = BlueprintClass.SharedStateClass() BlueprintClass.assert_task_args(run_config, shared_state) ArchitectClass.assert_task_args(run_config, shared_state) CrowdProviderClass.assert_task_args(run_config, shared_state) # Find an existing task or create a new one task_name = run_config.task.get("task_name", None) if task_name is None: task_name = blueprint_type logger.warning( f"Task is using the default blueprint name {task_name} as a name, " "as no task_name is provided") tasks = self.db.find_tasks(task_name=task_name) task_id = None if len(tasks) == 0: task_id = self.db.new_task(task_name, blueprint_type) else: task_id = tasks[0].db_id logger.info(f"Creating a task run under task name: {task_name}") # Create a new task run new_run_id = self.db.new_task_run( task_id, requester.db_id, json.dumps(OmegaConf.to_yaml(run_config, resolve=True)), provider_type, blueprint_type, requester.is_sandbox(), ) task_run = TaskRun.get(self.db, new_run_id) live_run = self._create_live_task_run( run_config, shared_state, task_run, ArchitectClass, BlueprintClass, CrowdProviderClass, ) try: # If anything fails after here, we have to cleanup the architect # Setup and deploy the server built_dir = live_run.architect.prepare() task_url = live_run.architect.deploy() # TODO(#102) maybe the cleanup (destruction of the server configuration?) should only # happen after everything has already been reviewed, this way it's possible to # retrieve the exact build directory to review a task for real live_run.architect.cleanup() # Register the task with the provider live_run.provider.setup_resources_for_task_run( task_run, run_config, shared_state, task_url) live_run.client_io.launch_channels() except (KeyboardInterrupt, Exception) as e: logger.error( "Encountered error while launching run, shutting down", exc_info=True) try: live_run.architect.shutdown() except (KeyboardInterrupt, Exception) as architect_exception: logger.exception( f"Could not shut down architect: {architect_exception}", exc_info=True, ) raise e live_run.task_launcher.create_assignments() live_run.task_launcher.launch_units(task_url) self._task_runs_tracked[task_run.db_id] = live_run task_run.update_completion_progress(status=False) return task_run.db_id
def setUp(self) -> None: self.data_dir = tempfile.mkdtemp() database_path = os.path.join(self.data_dir, "mephisto.db") self.db = LocalMephistoDB(database_path) self.task_run = TaskRun.get(self.db, get_test_task_run(self.db))
def main(): """ Script to launch makeup tasks for workers that can't be bonused via other avenues. Creates a task for a worker, qualifying them directly, and marks as a soft_rejected HIT for the given task name. """ db = LocalMephistoDB() task_name = input( "Please enter a task name for bookkeeping. This task name will be tied to " "the additional spend granted through this script, and should be the same " "as the task you originally launched that you now need to compensate for:\n>> " ) tasks = db.find_tasks(task_name=task_name) if len(tasks) == 0: print("No tasks found with the given name...") all_tasks = db.find_tasks() all_names = set([t.task_name for t in all_tasks]) print( f"Choose an existing task of {all_names} to use this functionality." ) print(f"Compensation hits must be tied to an existing task") return 0 task = tasks[0] req_name = input( "Please enter an MTurkRequester name to use to bonus from:\n>> ") requesters = db.find_requesters(requester_name=req_name) if len(requesters) == 0: print("Could not find a requester by that name...") return 0 requester = requesters[0] client = requester._get_client(requester._requester_name) print( "You can now enter a worker id, amount, and reason for as many compensation tasks " "as you want to launch for this.") compensation_hits = [] amount = None reason = None while True: worker_id = input( "Enter a worker id to compensate. Leave blank to move on to launching: \n>> " ).strip() if len(worker_id) == 0: break prev_amount = "" if amount is None else f" (leave blank for ${amount})" next_amount = input( f"Enter the amount in dollars to pay out in this compensation task{prev_amount}:\n>> $" ) amount = float(next_amount) if len( next_amount.strip()) != 0 else amount assert amount is not None, "Amount can not be left blank" prev_reason = "" if reason is None else f" (leave blank for '{reason}'" next_reason = input( f"Provide reason for launching this compensation task. This will be sent to the worker{prev_reason}:\n>> " ) reason = next_reason if len(next_reason.strip()) != 0 else reason assert reason is not None, "Reason can not be left blank" compensation_hits.append({ "worker_id": worker_id, "amount": amount, "reason": reason, }) if len(compensation_hits) == 0: print("No compensation details provided, exiting") return 0 print(f"You entered the following tasks:\n{compensation_hits}") input("Input anything to confirm and continue...") # Iterate through and launch tasks for comp_dict in compensation_hits: # Create the MTurk qualification for this specific worker worker_id = comp_dict["worker_id"] qual_name = f"compensation-for-{worker_id}-on-{task_name}" print(f"Creating qualification for {worker_id}: {qual_name}....") qualification = make_qualification_dict(qual_name, QUAL_EXISTS, None) qual_map = requester.datastore.get_qualification_mapping(qual_name) if qual_map is None: qualification[ "QualificationTypeId"] = requester._create_new_mturk_qualification( qual_name) else: qualification["QualificationTypeId"] = qual_map[ "mturk_qualification_id"] give_worker_qualification(client, worker_id, qualification["QualificationTypeId"]) # Create the task run for this HIT print(f"Creating task run and data model components for this HIT") config = build_task_config(comp_dict, requester) init_params = OmegaConf.to_yaml(OmegaConf.structured(config)) new_run_id = db.new_task_run( task.db_id, requester.db_id, json.dumps(init_params), requester.provider_type, "mock", requester.is_sandbox(), ) task_run = TaskRun.get(db, new_run_id) # Create an assignment, unit, agent, and mark as assigned # Assignment creation task_args = task_run.get_task_args() assignment_id = db.new_assignment( task_run.task_id, task_run.db_id, task_run.requester_id, task_run.task_type, task_run.provider_type, task_run.sandbox, ) data = InitializationData({}, [{}]) assignment = Assignment.get(db, assignment_id) assignment.write_assignment_data(data) # Unit creation unit_id = db.new_unit( task_run.task_id, task_run.db_id, task_run.requester_id, assignment_id, COMPENSATION_UNIT_INDEX, task_args.task_reward, task_run.provider_type, task_run.task_type, task_run.sandbox, ) compensation_unit = Unit.get(db, unit_id) print(f"Created {task_run}, {assignment}, and {compensation_unit}...") # Set up HIT type hit_type_id = create_hit_type( client, task_run.get_task_args(), [qualification], auto_approve_delay=30, skip_locale_qual=True, ) # Create the task on MTurk, email the worker print("Creating and deploying task on MTurk") duration = 60 * 60 * 24 run_id = task_run.db_id hit_link, hit_id, response = create_compensation_hit_with_hit_type( client, comp_dict["reason"], hit_type_id) requester.datastore.new_hit(hit_id, hit_link, duration, task_run.db_id) print("Sending email to worker...") result = email_worker( client, worker_id, "Compensation HIT Launched", ("Hello Worker,\n We've launched a compensation hit for a task that you've worked on " f"for us in the past. The reason supplied for this task was: {reason}. This task is " f"only doable by you, and should reward ${comp_dict['amount']}. Thanks for being a valued " "contributor to our tasks, and for allowing us to try and resolve the issue.\n\n" f"Your task can be accessed at the following link: {hit_link}."), ) if not result[0]: print( f"Email send failed, for reason {result[1]}\n" f"Please send {hit_link} to {worker_id} yourself if they reached out about this issue." ) # Mark the agent as soft_rejected, such that we've "paid" it compensation_unit.set_db_status(AssignmentState.SOFT_REJECTED)