def _create_single_assignment(self, assignment_data) -> None: """ Create a single assignment in the database using its read assignment_data """ task_run = self.task_run task_config = task_run.get_task_config() assignment_id = self.db.new_assignment( task_run.task_id, task_run.db_id, task_run.requester_id, task_run.task_type, task_run.provider_type, task_run.sandbox, ) assignment = Assignment(self.db, assignment_id) assignment.write_assignment_data(assignment_data) self.assignments.append(assignment) unit_count = len(assignment_data["unit_data"]) for unit_idx in range(unit_count): unit_id = self.db.new_unit( task_run.task_id, task_run.db_id, task_run.requester_id, assignment_id, unit_idx, task_config.task_reward, task_run.provider_type, task_run.task_type, task_run.sandbox, ) self.units.append(Unit(self.db, unit_id)) with self.unlaunched_units_access_condition: self.unlaunched_units[unit_id] = Unit(self.db, unit_id)
def get_unit(self) -> "Unit": """ Return the Unit that this agent is working on. """ if self._unit is None: from mephisto.data_model.assignment import Unit self._unit = Unit(self.db, self.unit_id) return self._unit
def get_mturk_ids_from_unit_id(db, unit_id: str) -> Dict[str, Optional[str]]: """ Find the relevant mturk ids from the given mephisto unit id """ mturk_unit = Unit(db, unit_id) assignment_id = mturk_unit.get_mturk_assignment_id() hit_id = mturk_unit.get_mturk_hit_id() agent = mturk_unit.get_assigned_agent() worker_id = None if agent is not None: worker_id = agent.get_worker().get_mturk_worker_id() return { "assignment_id": assignment_id, "hit_id": hit_id, "worker_id": worker_id }
def test_unit_fails(self) -> None: """Ensure units fail to be created or loaded under failure conditions""" assert self.db is not None, "No db initialized" db: MephistoDB = self.db # Cant get non-existent entry with self.assertRaises(EntryDoesNotExistException): unit = Unit(db, self.get_fake_id("Unit")) assignment_id = get_test_assignment(db) assignment = Assignment(db, assignment_id) unit_index = 0 pay_amount = 15.0 provider_type = PROVIDER_TYPE # Can't use invalid assignment_id name with self.assertRaises(EntryDoesNotExistException): unit_id = db.new_unit( assignment.task_id, assignment.task_run_id, assignment.requester_id, self.get_fake_id("Assignment"), unit_index, pay_amount, provider_type, assignment.sandbox, ) unit_id = db.new_unit( assignment.task_id, assignment.task_run_id, assignment.requester_id, assignment.db_id, unit_index, pay_amount, provider_type, assignment.sandbox, ) # Can't create same unit again with self.assertRaises(EntryAlreadyExistsException): unit_id = db.new_unit( assignment.task_id, assignment.task_run_id, assignment.requester_id, assignment.db_id, unit_index, pay_amount, provider_type, assignment.sandbox, ) # Ensure no units were created units = db.find_units() self.assertEqual(len(units), 1)
def make_completed_unit(db: MephistoDB) -> str: """ Creates a completed unit for the most recently created task run using some worker. Assumes """ workers = db.find_workers() assert len(workers) > 0, "Must have at least one worker in database" worker = workers[-1] task_runs = db.find_task_runs(is_completed=False) assert len(task_runs) > 0, "Must be at least one incomplete task run" task_run = task_runs[-1] assign_id = db.new_assignment( task_run.task_id, task_run.db_id, task_run.requester_id, task_run.task_type, task_run.provider_type, ) unit_id = db.new_unit( task_run.task_id, task_run.db_id, task_run.requester_id, assign_id, 0, 0.2, task_run.provider_type, task_run.task_type, ) agent_id = db.new_agent( worker.db_id, unit_id, task_run.task_id, task_run.db_id, assign_id, task_run.task_type, task_run.provider_type, ) agent = Agent(db, agent_id) agent.mark_done() unit = Unit(db, unit_id) unit.sync_status() return unit.db_id
def get_data_from_unit(self, unit: Unit) -> Dict[str, Any]: agent = unit.get_assigned_agent() assert ( agent is not None ), f"Trying to get completed data from unassigned unit {unit}" return { "worker_id": agent.worker_id, "unit_id": unit.db_id, "assignment_id": unit.assignment_id, "status": agent.db_status, "data": agent.state.get_parsed_data(), "task_start": agent.state.get_task_start(), "task_end": agent.state.get_task_end(), }
def test_unit(self) -> None: """Test creation and querying of units""" assert self.db is not None, "No db initialized" db: MephistoDB = self.db # Check creation and retrieval of a unit assignment_id = get_test_assignment(db) assignment = Assignment(db, assignment_id) unit_index = 0 pay_amount = 15.0 provider_type = PROVIDER_TYPE unit_id = db.new_unit( assignment.task_id, assignment.task_run_id, assignment.requester_id, assignment.db_id, unit_index, pay_amount, provider_type, assignment.sandbox, ) self.assertIsNotNone(unit_id) self.assertTrue(isinstance(unit_id, str)) unit_row = db.get_unit(unit_id) self.assertEqual(unit_row["assignment_id"], assignment_id) self.assertEqual(unit_row["pay_amount"], pay_amount) self.assertEqual(unit_row["status"], AssignmentState.CREATED) unit = Unit(db, unit_id) self.assertEqual(unit.assignment_id, assignment_id) # Check finding for units units = db.find_units() self.assertEqual(len(units), 1) self.assertTrue(isinstance(units[0], Unit)) self.assertEqual(units[0].db_id, unit_id) self.assertEqual(units[0].assignment_id, assignment_id) self.assertEqual(units[0].pay_amount, pay_amount) # Check finding for specific units units = db.find_units(assignment_id=assignment_id) self.assertEqual(len(units), 1) self.assertTrue(isinstance(units[0], Unit)) self.assertEqual(units[0].db_id, unit_id) self.assertEqual(units[0].assignment_id, assignment_id) self.assertEqual(units[0].pay_amount, pay_amount) units = db.find_units(assignment_id=self.get_fake_id("Assignment")) self.assertEqual(len(units), 0)
def test_agent(self) -> None: """Test creation and querying of agents""" assert self.db is not None, "No db initialized" db: MephistoDB = self.db # Check creation and retrieval of a agent worker_name, worker_id = get_test_worker(db) unit_id = get_test_unit(db) unit = Unit(db, unit_id) agent_id = db.new_agent( worker_id, unit_id, unit.task_id, unit.task_run_id, unit.assignment_id, unit.task_type, unit.provider_type, ) self.assertIsNotNone(agent_id) self.assertTrue(isinstance(agent_id, str)) agent_row = db.get_agent(agent_id) self.assertEqual(agent_row["worker_id"], worker_id) self.assertEqual(agent_row["unit_id"], unit_id) self.assertEqual(agent_row["status"], AgentState.STATUS_NONE) # ensure the unit is assigned now units = db.find_units(status=AssignmentState.ASSIGNED) self.assertEqual(len(units), 1) agent = Agent(db, agent_id) self.assertEqual(agent.worker_id, worker_id) # Check finding for agents agents = db.find_agents() self.assertEqual(len(agents), 1) self.assertTrue(isinstance(agents[0], Agent)) self.assertEqual(agents[0].db_id, agent_id) self.assertEqual(agents[0].worker_id, worker_id) # Check finding for specific agents agents = db.find_agents(worker_id=worker_id) self.assertEqual(len(agents), 1) self.assertTrue(isinstance(agents[0], Agent)) self.assertEqual(agents[0].db_id, agent_id) self.assertEqual(agents[0].worker_id, worker_id) agents = db.find_agents(worker_id=self.get_fake_id("Worker")) self.assertEqual(len(agents), 0)
def get_test_agent(db: MephistoDB, unit_id=None) -> str: # Check creation and retrieval of a agent worker_name, worker_id = get_test_worker(db) if unit_id is None: unit_id = get_test_unit(db) provider_type = "mock" task_type = "mock" unit = Unit(db, unit_id) return db.new_agent( worker_id, unit.db_id, unit.task_id, unit.task_run_id, unit.assignment_id, unit.task_type, unit.provider_type, )
def test_agent_fails(self) -> None: """Ensure agents fail to be created or loaded under failure conditions""" assert self.db is not None, "No db initialized" db: MephistoDB = self.db # Cant get non-existent entry with self.assertRaises(EntryDoesNotExistException): agent = Agent(db, self.get_fake_id("Agent")) unit_id = get_test_unit(db) worker_name, worker_id = get_test_worker(db) unit = Unit(db, unit_id) # Can't use invalid worker id with self.assertRaises(EntryDoesNotExistException): agent_id = db.new_agent( self.get_fake_id("Worker"), unit_id, unit.task_id, unit.task_run_id, unit.assignment_id, unit.task_type, unit.provider_type, ) # Can't use invalid unit id with self.assertRaises(EntryDoesNotExistException): agent_id = db.new_agent( worker_id, self.get_fake_id("Unit"), unit.task_id, unit.task_run_id, unit.assignment_id, unit.task_type, unit.provider_type, ) # Ensure no agents were created agents = db.find_agents() self.assertEqual(len(agents), 0)
def format_for_printing_data(data): # Custom tasks can define methods for how to display their data in a relevant way worker_name = Worker(db, data["worker_id"]).worker_name contents = data["data"] duration = contents["times"]["task_end"] - contents["times"]["task_start"] metadata_string = ( f"Worker: {worker_name}\nUnit: {data['unit_id']}\n" f"Duration: {int(duration)}\nStatus: {data['status']}\n") inputs = contents["inputs"] inputs_string = f"Character: {inputs['character_name']}\nDescription: {inputs['character_description']}\n" outputs = contents["outputs"] output_string = f" Rating: {outputs['rating']}\n" found_files = outputs.get("files") if found_files is not None: file_dir = Unit(db, data["unit_id"]).get_assigned_agent().get_data_dir() output_string += f" Files: {found_files}\n" output_string += f" File directory {file_dir}\n" else: output_string += f" Files: No files attached\n" return f"-------------------\n{metadata_string}{inputs_string}{output_string}"
class Agent(ABC): """ This class encompasses a worker as they are working on an individual assignment. It maintains details for the current task at hand such as start and end time, connection status, etc. """ def __init__(self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None): self.db: "MephistoDB" = db if row is None: row = db.get_agent(db_id) assert row is not None, f"Given db_id {db_id} did not exist in given db" self.db_id: str = row["agent_id"] self.db_status = row["status"] self.worker_id = row["worker_id"] self.unit_id = row["unit_id"] self.task_type = row["task_type"] self.provider_type = row["provider_type"] self.pending_observations: List["Packet"] = [] self.pending_actions: List["Packet"] = [] self.has_action = threading.Event() self.has_action.clear() self.wants_action = threading.Event() self.wants_action.clear() self.has_updated_status = threading.Event() self.assignment_id = row["assignment_id"] self.task_run_id = row["task_run_id"] self.task_id = row["task_id"] self.did_submit = threading.Event() # Deferred loading of related entities self._worker: Optional["Worker"] = None self._unit: Optional["Unit"] = None self._assignment: Optional["Assignment"] = None self._task_run: Optional["TaskRun"] = None self._task: Optional["Task"] = None # Follow-up initialization self.state = AgentState(self) # type: ignore def __new__(cls, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None) -> "Agent": """ The new method is overridden to be able to automatically generate the expected Agent class without needing to specifically find it for a given db_id. As such it is impossible to create a base Agent as you will instead be returned the correct Agent class according to the crowdprovider associated with this Agent. """ from mephisto.core.registry import get_crowd_provider_from_type if cls == Agent: # We are trying to construct a Agent, find what type to use and # create that instead if row is None: row = db.get_agent(db_id) assert row is not None, f"Given db_id {db_id} did not exist in given db" correct_class = get_crowd_provider_from_type( row["provider_type"]).AgentClass return super().__new__(correct_class) else: # We are constructing another instance directly return super().__new__(cls) def get_agent_id(self) -> str: """Return this agent's id""" return self.db_id def get_worker(self) -> Worker: """ Return the worker that is using this agent for a task """ if self._worker is None: self._worker = Worker(self.db, self.worker_id) return self._worker def get_unit(self) -> "Unit": """ Return the Unit that this agent is working on. """ if self._unit is None: from mephisto.data_model.assignment import Unit self._unit = Unit(self.db, self.unit_id) return self._unit def get_assignment(self) -> "Assignment": """Return the assignment this agent is working on""" if self._assignment is None: if self._unit is not None: self._assignment = self._unit.get_assignment() else: from mephisto.data_model.assignment import Assignment self._assignment = Assignment(self.db, self.assignment_id) return self._assignment def get_task_run(self) -> "TaskRun": """Return the TaskRun this agent is working within""" if self._task_run is None: if self._unit is not None: self._task_run = self._unit.get_task_run() elif self._assignment is not None: self._task_run = self._assignment.get_task_run() else: from mephisto.data_model.task import TaskRun self._task_run = TaskRun(self.db, self.task_run_id) return self._task_run def get_task(self) -> "Task": """Return the Task this agent is working within""" if self._task is None: if self._unit is not None: self._task = self._unit.get_task() elif self._assignment is not None: self._task = self._assignment.get_task() elif self._task_run is not None: self._task = self._task_run.get_task() else: from mephisto.data_model.task import Task self._task = Task(self.db, self.task_id) return self._task def get_data_dir(self) -> str: """ Return the directory to be storing any agent state for this agent into """ assignment_dir = self.get_assignment().get_data_dir() return os.path.join(assignment_dir, self.db_id) def update_status(self, new_status: str) -> None: """Update the database status of this agent, and possibly send a message to the frontend agent informing them of this update""" if self.db_status == new_status: return # Noop, this is already the case if self.db_status in AgentState.complete(): print(f"Updating a final status, was {self.db_status} " f"and want to set to {new_status}") self.db.update_agent(self.db_id, status=new_status) self.db_status = new_status self.has_updated_status.set() if new_status in [ AgentState.STATUS_RETURNED, AgentState.STATUS_DISCONNECT ]: # Disconnect statuses should free any pending acts self.has_action.set() self.did_submit.set() @staticmethod def _register_agent(db: "MephistoDB", worker: Worker, unit: "Unit", provider_type: str) -> "Agent": """ Create this agent in the mephisto db with the correct setup """ db_id = db.new_agent( worker.db_id, unit.db_id, unit.task_id, unit.task_run_id, unit.assignment_id, unit.task_type, provider_type, ) a = Agent(db, db_id) a.update_status(AgentState.STATUS_ACCEPTED) return a # Specialized child cases may need to implement the following @classmethod def new_from_provider_data( cls, db: "MephistoDB", worker: Worker, unit: "Unit", provider_data: Dict[str, Any], ) -> "Agent": """ Wrapper around the new method that allows registering additional bookkeeping information from a crowd provider for this agent """ agent = cls.new(db, worker, unit) unit.worker_id = worker.db_id agent._unit = unit return agent def observe(self, packet: "Packet") -> None: """ Pass the observed information to the AgentState, then queue the information to be pushed to the user """ sending_packet = packet.copy() sending_packet.receiver_id = self.db_id self.state.update_data(sending_packet) self.pending_observations.append(sending_packet) def act(self, timeout: Optional[int] = None) -> Optional["Packet"]: """ Request information from the Agent's frontend. If non-blocking, (timeout is None) should return None if no actions are ready to be returned. """ if len(self.pending_actions) == 0: self.wants_action.set() if timeout is None or timeout == 0: return None self.has_action.wait(timeout) if len(self.pending_actions) == 0: # various disconnect cases status = self.get_status() if status == AgentState.STATUS_DISCONNECT: raise AgentDisconnectedError(self.db_id) elif status == AgentState.STATUS_RETURNED: raise AgentReturnedError(self.db_id) self.update_status(AgentState.STATUS_TIMEOUT) raise AgentTimeoutError(timeout, self.db_id) assert len( self.pending_actions) > 0, "has_action released without an action!" act = self.pending_actions.pop(0) if "MEPHISTO_is_submit" in act.data and act.data["MEPHISTO_is_submit"]: self.did_submit.set() if len(self.pending_actions) == 0: self.has_action.clear() self.state.update_data(act) return act def get_status(self) -> str: """Get the status of this agent in their work on their unit""" if self.db_status not in AgentState.complete(): row = self.db.get_agent(self.db_id) if row["status"] != self.db_status: if row["status"] in [ AgentState.STATUS_RETURNED, AgentState.STATUS_DISCONNECT, ]: # Disconnect statuses should free any pending acts self.has_action.set() self.has_updated_status.set() self.db_status = row["status"] return self.db_status # Children classes should implement the following methods def approve_work(self) -> None: """Approve the work done on this agent's specific Unit""" raise NotImplementedError() def soft_reject_work(self) -> None: """ Pay a worker for attempted work, but mark it as below the quality bar for this assignment """ # TODO(OWN) extend this method to assign a soft block # qualification automatically if a threshold of # soft rejects as a proportion of total accepts # is exceeded self.approve_work() self.update_status(AgentState.STATUS_SOFT_REJECTED) def reject_work(self, reason) -> None: """Reject the work done on this agent's specific Unit""" raise NotImplementedError() def mark_done(self) -> None: """ Take any required step with the crowd_provider to ensure that the worker can submit their work and be marked as complete via a call to get_status """ raise NotImplementedError() @staticmethod def new(db: "MephistoDB", worker: Worker, unit: "Unit") -> "Agent": """ Create an agent for this worker to be used for work on the given Unit. Implementation should return the result of _register_agent when sure the agent can be successfully created to have it put into the db. """ raise NotImplementedError()
def get_submitted_data(): try: task_run_ids = request.args.getlist("task_run_id") task_names = request.args.getlist("task_name") assignment_ids = request.args.getlist("assignment_id") unit_ids = request.args.getlist("unit_ids") statuses = request.args.getlist("status") db = app.extensions["db"] units = [] assignments = [] assert len( task_names) == 0, "Searching via task names not yet supported" task_runs = [TaskRun(db, task_run_id) for task_run_id in task_run_ids] for task_run in task_runs: assignments += task_run.get_assignments() assignments += [ Assignment(db, assignment_id) for assignment_id in assignment_ids ] if len(statuses) == 0: statuses = [ AssignmentState.COMPLETED, AssignmentState.ACCEPTED, AssignmentState.REJECTED, ] filtered_assignments = [ a for a in assignments if a.get_status() in statuses ] for assignment in assignments: units += assignment.get_units() units += [Unit(db, unit_id) for unit_id in unit_ids] all_unit_data = [] for unit in units: unit_data = { "assignment_id": unit.assignment_id, "task_run_id": unit.task_run_id, "status": unit.db_status, "unit_id": unit.db_id, "worker_id": unit.worker_id, "data": None, } agent = unit.get_assigned_agent() if agent is not None: unit_data["data"] = agent.state.get_data() unit_data["worker_id"] = agent.worker_id all_unit_data.append(unit_data) print(all_unit_data) return jsonify({"success": True, "units": all_unit_data}) except Exception as e: import traceback traceback.print_exc() return jsonify({"success": False, "msg": str(e)})