def test_onboarding_agents(self) -> None: """Ensure that the db can create and manipulate onboarding agents""" assert self.db is not None, "No db initialized" db: MephistoDB = self.db task_run_id = get_test_task_run(db) task_run = TaskRun(db, task_run_id) task = task_run.get_task() worker_name, worker_id = get_test_worker(db) onboarding_agent_id = db.new_onboarding_agent(worker_id, task.db_id, task_run_id, "mock") self.assertIsNotNone(onboarding_agent_id) onboarding_agent = OnboardingAgent(db, onboarding_agent_id) self.assertIsInstance(onboarding_agent, OnboardingAgent) found_agents = db.find_onboarding_agents(worker_id=worker_id) self.assertEqual(len(found_agents), 1) self.assertIsInstance(found_agents[0], OnboardingAgent) found_agent = found_agents[0] self.assertEqual(found_agent.db_id, onboarding_agent_id) self.assertEqual(found_agent.get_status(), AgentState.STATUS_NONE)
class Unit(ABC): """ This class tracks the status of an individual worker's contribution to a higher level assignment. It is the smallest 'unit' of work to complete the assignment, and this class is only responsible for checking the status of that work itself being done. It should be extended for usage with a specific crowd provider """ def __init__(self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None): self.db: "MephistoDB" = db if row is None: row = db.get_unit(db_id) assert row is not None, f"Given db_id {db_id} did not exist in given db" self.db_id: str = row["unit_id"] self.assignment_id = row["assignment_id"] self.unit_index = row["unit_index"] self.pay_amount = row["pay_amount"] self.agent_id = row["agent_id"] self.provider_type = row["provider_type"] self.db_status = row["status"] self.task_type = row["task_type"] self.task_id = row["task_id"] self.task_run_id = row["task_run_id"] self.sandbox = row["sandbox"] self.requester_id = row["requester_id"] self.worker_id = row["worker_id"] # Deferred loading of related entities self.__task: Optional["Task"] = None self.__task_run: Optional["TaskRun"] = None self.__assignment: Optional["Assignment"] = None self.__requester: Optional["Requester"] = None self.__agent: Optional["Agent"] = None self.__worker: Optional["Worker"] = None def __new__(cls, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None) -> "Unit": """ The new method is overridden to be able to automatically generate the expected Unit class without needing to specifically find it for a given db_id. As such it is impossible to create a Unit as you will instead be returned the correct Unit class according to the crowdprovider associated with this Unit. """ if cls == Unit: # We are trying to construct a Unit, find what type to use and # create that instead from mephisto.operations.registry import get_crowd_provider_from_type if row is None: row = db.get_unit(db_id) assert row is not None, f"Given db_id {db_id} did not exist in given db" correct_class = get_crowd_provider_from_type( row["provider_type"]).UnitClass return super().__new__(correct_class) else: # We are constructing another instance directly return super().__new__(cls) def get_crowd_provider_class(self) -> Type["CrowdProvider"]: """Get the CrowdProvider class that manages this Unit""" from mephisto.operations.registry import get_crowd_provider_from_type return get_crowd_provider_from_type(self.provider_type) def get_assignment_data(self) -> Optional[Dict[str, Any]]: """Return the specific assignment data for this assignment""" return self.get_assignment().get_assignment_data() def sync_status(self) -> None: """ Ensure that the queried status from this unit and the db status are up to date """ # TODO(102) this will need to be run periodically/on crashes # to sync any lost state self.set_db_status(self.get_status()) def get_db_status(self) -> str: """ Return the status as currently stored in the database """ if self.db_status in AssignmentState.final_unit(): return self.db_status row = self.db.get_unit(self.db_id) assert row is not None, f"Unit {self.db_id} stopped existing in the db..." return row["status"] def set_db_status(self, status: str) -> None: """ Set the status reflected in the database for this Unit """ assert ( status in AssignmentState.valid_unit() ), f"{status} not valid Assignment Status, not in {AssignmentState.valid_unit()}" self.db_status = status self.db.update_unit(self.db_id, status=status) def get_assignment(self) -> "Assignment": """ Return the assignment that this Unit is part of. """ if self.__assignment is None: from mephisto.data_model.assignment import Assignment self.__assignment = Assignment(self.db, self.assignment_id) return self.__assignment def get_task_run(self) -> TaskRun: """ Return the task run that this assignment is part of """ if self.__task_run is None: if self.__assignment is not None: self.__task_run = self.__assignment.get_task_run() else: self.__task_run = TaskRun(self.db, self.task_run_id) return self.__task_run def get_task(self) -> Task: """ Return the task that this assignment is part of """ if self.__task is None: if self.__assignment is not None: self.__task = self.__assignment.get_task() elif self.__task_run is not None: self.__task = self.__task_run.get_task() else: self.__task = Task(self.db, self.task_id) return self.__task def get_requester(self) -> "Requester": """ Return the requester who offered this Unit """ if self.__requester is None: if self.__assignment is not None: self.__requester = self.__assignment.get_requester() elif self.__task_run is not None: self.__requester = self.__task_run.get_requester() else: self.__requester = Requester(self.db, self.requester_id) return self.__requester def clear_assigned_agent(self) -> None: """Clear the agent that is assigned to this unit""" self.db.clear_unit_agent_assignment(self.db_id) self.agent_id = None self.__agent = None def get_assigned_agent(self) -> Optional[Agent]: """ Get the agent assigned to this Unit if there is one, else return None """ # In these statuses, we know the agent isn't changing anymore, and thus will # not need to be re-queried # TODO(#97) add test to ensure this behavior/assumption holds always if self.db_status in AssignmentState.final_unit(): if self.agent_id is None: return None return Agent(self.db, self.agent_id) # Query the database to get the most up-to-date assignment, as this can # change after instantiation if the Unit status isn't final # TODO(#101) this may not be particularly efficient row = self.db.get_unit(self.db_id) assert row is not None, f"Unit {self.db_id} stopped existing in the db..." agent_id = row["agent_id"] if agent_id is not None: return Agent(self.db, agent_id) return None @staticmethod def _register_unit( db: "MephistoDB", assignment: "Assignment", index: int, pay_amount: float, provider_type: str, ) -> "Unit": """ Create an entry for this unit in the database """ db_id = db.new_unit( assignment.task_id, assignment.task_run_id, assignment.requester_id, assignment.db_id, index, pay_amount, provider_type, assignment.task_type, ) return Unit(db, db_id) def get_pay_amount(self) -> float: """ Return the amount that this Unit is costing against the budget, calculating additional fees as relevant """ return self.pay_amount # Children classes may need to override the following def get_status(self) -> str: """ Get the status of this unit, as determined by whether there's a worker working on it at the moment, and any other possible states. Should return one of UNIT_STATUSES Accurate status is crowd-provider dependent, and thus this method should be defined in the child class to ensure that the local record matches the ground truth in the provider """ from mephisto.abstractions.blueprint import AgentState db_status = self.db_status computed_status = AssignmentState.LAUNCHED agent = self.get_assigned_agent() if agent is None: row = self.db.get_unit(self.db_id) computed_status = row["status"] else: agent_status = agent.get_status() if agent_status == AgentState.STATUS_NONE: computed_status = AssignmentState.LAUNCHED elif agent_status in [ AgentState.STATUS_ACCEPTED, AgentState.STATUS_ONBOARDING, AgentState.STATUS_PARTNER_DISCONNECT, AgentState.STATUS_WAITING, AgentState.STATUS_IN_TASK, ]: computed_status = AssignmentState.ASSIGNED elif agent_status in [AgentState.STATUS_COMPLETED]: computed_status = AssignmentState.COMPLETED elif agent_status in [AgentState.STATUS_SOFT_REJECTED]: computed_status = AssignmentState.SOFT_REJECTED elif agent_status in [AgentState.STATUS_EXPIRED]: computed_status = AssignmentState.EXPIRED elif agent_status in [ AgentState.STATUS_DISCONNECT, AgentState.STATUS_RETURNED, ]: computed_status = AssignmentState.ASSIGNED elif agent_status == AgentState.STATUS_APPROVED: computed_status = AssignmentState.ACCEPTED elif agent_status == AgentState.STATUS_REJECTED: computed_status = AssignmentState.REJECTED if computed_status != db_status: self.set_db_status(computed_status) return computed_status # Children classes should implement the below methods def launch(self, task_url: str) -> None: """ Make this Unit available on the crowdsourcing vendor. Depending on the task type, this could mean a number of different setup steps. Some crowd providers require setting up a configuration for the very first launch, and this method should call a helper to manage that step if necessary. """ raise NotImplementedError() def expire(self) -> float: """ Expire this unit, removing it from being workable on the vendor. Return the maximum time needed to wait before we know it's taken down. """ raise NotImplementedError() def is_expired(self) -> bool: """Determine if this unit is expired as according to the vendor.""" raise NotImplementedError() @staticmethod def new(db: "MephistoDB", assignment: "Assignment", index: int, pay_amount: float) -> "Unit": """ Create a Unit for the given assignment Implementation should return the result of _register_unit when sure the unit can be successfully created to have it put into the db. """ raise NotImplementedError()
class Assignment: """ This class tracks an individual run of a specific task, and handles state management for the set of units within via abstracted database helpers """ def __init__(self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None): self.db: "MephistoDB" = db if row is None: row = db.get_assignment(db_id) assert row is not None, f"Given db_id {db_id} did not exist in given db" self.db_id: str = row["assignment_id"] self.task_run_id = row["task_run_id"] self.sandbox = row["sandbox"] self.task_id = row["task_id"] self.requester_id = row["requester_id"] self.task_type = row["task_type"] self.provider_type = row["provider_type"] # Deferred loading of related entities self.__task_run: Optional["TaskRun"] = None self.__task: Optional["Task"] = None self.__requester: Optional["Requester"] = None def get_data_dir(self) -> str: """Return the directory we expect to find assignment data in""" task_run = self.get_task_run() run_dir = task_run.get_run_dir() return os.path.join(run_dir, self.db_id) def get_assignment_data(self) -> InitializationData: """Return the specific assignment data for this assignment""" assign_data_filename = os.path.join(self.get_data_dir(), ASSIGNMENT_DATA_FILE) assert os.path.exists( assign_data_filename), "No data exists for assignment" with open(assign_data_filename, "r") as json_file: return InitializationData.loadFromJSON(json_file) def write_assignment_data(self, data: InitializationData) -> None: """Set the assignment data for this assignment""" assign_data_filename = os.path.join(self.get_data_dir(), ASSIGNMENT_DATA_FILE) os.makedirs(self.get_data_dir(), exist_ok=True) with open(assign_data_filename, "w+") as json_file: data.dumpJSON(json_file) def get_agents(self) -> List[Optional["Agent"]]: """ Return all of the agents for this assignment """ units = self.get_units() return [u.get_assigned_agent() for u in units] def get_status(self) -> str: """ Get the status of this assignment, as determined by the status of the units """ units = self.get_units() statuses = set(unit.get_status() for unit in units) if len(statuses) == 1: return statuses.pop() if len(statuses) == 0: return AssignmentState.CREATED if AssignmentState.CREATED in statuses: # TODO(#99) handle the case where new units are created after # everything else is launched return AssignmentState.CREATED if any([s == AssignmentState.LAUNCHED for s in statuses]): # If any are only launched, consider the whole thing launched return AssignmentState.LAUNCHED if any([s == AssignmentState.ASSIGNED for s in statuses]): # If any are still assigned, consider the whole thing assigned return AssignmentState.ASSIGNED if all([ s in [AssignmentState.ACCEPTED, AssignmentState.REJECTED] for s in statuses ]): return AssignmentState.MIXED if all([s in AssignmentState.final_agent() for s in statuses]): return AssignmentState.COMPLETED raise NotImplementedError( f"Unexpected set of unit statuses {statuses}") def get_task_run(self) -> TaskRun: """ Return the task run that this assignment is part of """ if self.__task_run is None: self.__task_run = TaskRun(self.db, self.task_run_id) return self.__task_run def get_task(self) -> Task: """ Return the task run that this assignment is part of """ if self.__task is None: if self.__task_run is not None: self.__task = self.__task_run.get_task() else: self.__task = Task(self.db, self.task_id) return self.__task def get_requester(self) -> Requester: """ Return the requester who offered this Assignment """ if self.__requester is None: if self.__task_run is not None: self.__requester = self.__task_run.get_requester() else: self.__requester = Requester(self.db, self.requester_id) return self.__requester def get_units(self, status: Optional[str] = None) -> List["Unit"]: """ Get units for this assignment, optionally constrained by the specific status. """ assert (status is None or status in AssignmentState.valid_unit()), "Invalid assignment status" units = self.db.find_units(assignment_id=self.db_id) if status is not None: units = [u for u in units if u.get_status() == status] return units def get_workers(self) -> List["Worker"]: """ Get the list of workers that have worked on this specific assignment """ units = self.get_units() pos_agents = [s.get_assigned_agent() for s in units] agents = [a for a in pos_agents if a is not None] workers = [a.get_worker() for a in agents] return workers def get_cost_of_statuses(self, statuses: List[str]) -> float: """ Return the sum of all pay_amounts for every unit of this assignment with any of the given statuses """ units = [u for u in self.get_units() if u.get_status() in statuses] sum_cost = 0.0 for unit in units: sum_cost += unit.get_pay_amount() return sum_cost def __repr__(self) -> str: return f"Assignment({self.db_id})" # TODO(100) add helpers to manage retrieving results as well @staticmethod def new(db: "MephistoDB", task_run: TaskRun, assignment_data: Optional[Dict[str, Any]]) -> "Assignment": """ Create an assignment for the given task. Initialize the folders for storing the results for this assignment. Can take assignment_data to save and load for this particular assignment. """ # TODO(101) consider offloading this state management to the MephistoDB # as it is data handling and can theoretically be done differently # in different implementations db_id = db.new_assignment( task_run.db_id, task_run.requester_id, task_run.task_type, task_run.provider_type, task_run.sandbox, ) run_dir = task_run.get_run_dir() assign_dir = os.path.join(run_dir, db_id) os.makedirs(assign_dir) if assignment_data is not None: with open(os.path.join(assign_dir, ASSIGNMENT_DATA_FILE), "w+") as json_file: json.dump(assignment_data, json_file) assignment = Assignment(db, db_id) logger.debug(f"{assignment} created for {task_run}") return assignment
class OnboardingAgent(ABC): """ Onboarding agents are a special extension of agents used in tasks that have a separate onboarding step. These agents are designed to work without being linked to an explicit unit, and instead are tied to the task run and task name. Blueprints that require OnboardingAgents should implement an OnboardingAgentState (to process the special task), and their TaskRunners should have a run_onboarding and cleanup_onboarding method. """ DISPLAY_PREFIX = "onboarding_" def __init__(self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None): self.db: "MephistoDB" = db if row is None: row = db.get_onboarding_agent(db_id) assert row is not None, f"Given db_id {db_id} did not exist in given db" self.db_id: str = row["onboarding_agent_id"] self.db_status = row["status"] self.worker_id = row["worker_id"] self.task_type = row["task_type"] self.pending_observations: List["Packet"] = [] self.pending_actions: List["Packet"] = [] self.has_action = threading.Event() self.has_action.clear() self.wants_action = threading.Event() self.wants_action.clear() self.has_updated_status = threading.Event() self.task_run_id = row["task_run_id"] self.task_id = row["task_id"] self.did_submit = threading.Event() # Deferred loading of related entities self._worker: Optional["Worker"] = None self._task_run: Optional["TaskRun"] = None self._task: Optional["Task"] = None # Follow-up initialization self.state = AgentState(self) # type: ignore def get_agent_id(self) -> str: """Return an id to use for onboarding agent requests""" return f"{self.DISPLAY_PREFIX}{self.db_id}" @classmethod def is_onboarding_id(cls, agent_id: str) -> bool: """return if the given id is for an onboarding agent""" return agent_id.startswith(cls.DISPLAY_PREFIX) @classmethod def get_db_id_from_agent_id(cls, agent_id: str) -> str: """Extract the db_id for an onboarding_agent""" assert agent_id.startswith( cls.DISPLAY_PREFIX ), f"Provided id {agent_id} is not an onboarding_id" return agent_id[len(cls.DISPLAY_PREFIX):] def get_worker(self) -> Worker: """ Return the worker that is using this agent for a task """ if self._worker is None: self._worker = Worker(self.db, self.worker_id) return self._worker def get_task_run(self) -> "TaskRun": """Return the TaskRun this agent is working within""" if self._task_run is None: from mephisto.data_model.task_run import TaskRun self._task_run = TaskRun(self.db, self.task_run_id) return self._task_run def get_task(self) -> "Task": """Return the Task this agent is working within""" if self._task is None: if self._task_run is not None: self._task = self._task_run.get_task() else: from mephisto.data_model.task import Task self._task = Task(self.db, self.task_id) return self._task def get_data_dir(self) -> str: """ Return the directory to be storing any agent state for this agent into """ task_run_dir = self.get_task_run().get_run_dir() return os.path.join(task_run_dir, "onboarding", self.get_agent_id()) def update_status(self, new_status: str) -> None: """Update the database status of this agent, and possibly send a message to the frontend agent informing them of this update""" if self.db_status == new_status: return # Noop, this is already the case if self.db_status in AgentState.complete(): print(f"Updating a final status, was {self.db_status} " f"and want to set to {new_status}") self.db.update_onboarding_agent(self.db_id, status=new_status) self.db_status = new_status self.has_updated_status.set() if new_status in [ AgentState.STATUS_RETURNED, AgentState.STATUS_DISCONNECT ]: # Disconnect statuses should free any pending acts self.has_action.set() self.did_submit.set() def observe(self, packet: "Packet") -> None: """ Pass the observed information to the AgentState, then queue the information to be pushed to the user """ sending_packet = packet.copy() sending_packet.receiver_id = self.get_agent_id() self.state.update_data(sending_packet) self.pending_observations.append(sending_packet) def act(self, timeout: Optional[int] = None) -> Optional["Packet"]: """ Request information from the Agent's frontend. If non-blocking, (timeout is None) should return None if no actions are ready to be returned. """ if len(self.pending_actions) == 0: self.wants_action.set() if timeout is None or timeout == 0: return None self.has_action.wait(timeout) if len(self.pending_actions) == 0: # various disconnect cases status = self.get_status() if status == AgentState.STATUS_DISCONNECT: raise AgentDisconnectedError(self.db_id) elif status == AgentState.STATUS_RETURNED: raise AgentReturnedError(self.db_id) self.update_status(AgentState.STATUS_TIMEOUT) raise AgentTimeoutError(timeout, self.db_id) assert len( self.pending_actions) > 0, "has_action released without an action!" act = self.pending_actions.pop(0) if "MEPHISTO_is_submit" in act.data and act.data["MEPHISTO_is_submit"]: self.did_submit.set() if len(self.pending_actions) == 0: self.has_action.clear() self.state.update_data(act) return act def get_status(self) -> str: """Get the status of this agent in their work on their unit""" if self.db_status not in AgentState.complete(): row = self.db.get_onboarding_agent(self.db_id) if row["status"] != self.db_status: if row["status"] in [ AgentState.STATUS_RETURNED, AgentState.STATUS_DISCONNECT, ]: # Disconnect statuses should free any pending acts self.has_action.set() self.has_updated_status.set() self.db_status = row["status"] return self.db_status def mark_done(self) -> None: """Mark this agent as done by setting the status to a terminal onboarding state""" # TODO the logic for when onboarding gets marked as waiting or approved/rejected # should likely be cleaned up to remove these conditionals. if self.get_status not in [ AgentState.STATUS_APPROVED, AgentState.STATUS_REJECTED, ]: self.update_status(AgentState.STATUS_WAITING) @staticmethod def new(db: "MephistoDB", worker: Worker, task_run: "TaskRun") -> "OnboardingAgent": """ Create an OnboardingAgent for a worker to use as part of a task run """ db_id = db.new_onboarding_agent(worker.db_id, task_run.task_id, task_run.db_id, task_run.task_type) return OnboardingAgent(db, db_id)
class Agent(ABC): """ This class encompasses a worker as they are working on an individual assignment. It maintains details for the current task at hand such as start and end time, connection status, etc. """ def __init__(self, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None): self.db: "MephistoDB" = db if row is None: row = db.get_agent(db_id) assert row is not None, f"Given db_id {db_id} did not exist in given db" self.db_id: str = row["agent_id"] self.db_status = row["status"] self.worker_id = row["worker_id"] self.unit_id = row["unit_id"] self.task_type = row["task_type"] self.provider_type = row["provider_type"] self.pending_observations: List["Packet"] = [] self.pending_actions: List["Packet"] = [] self.has_action = threading.Event() self.has_action.clear() self.wants_action = threading.Event() self.wants_action.clear() self.has_updated_status = threading.Event() self.assignment_id = row["assignment_id"] self.task_run_id = row["task_run_id"] self.task_id = row["task_id"] self.did_submit = threading.Event() # Deferred loading of related entities self._worker: Optional["Worker"] = None self._unit: Optional["Unit"] = None self._assignment: Optional["Assignment"] = None self._task_run: Optional["TaskRun"] = None self._task: Optional["Task"] = None # Follow-up initialization self.state = AgentState(self) # type: ignore def __new__(cls, db: "MephistoDB", db_id: str, row: Optional[Mapping[str, Any]] = None) -> "Agent": """ The new method is overridden to be able to automatically generate the expected Agent class without needing to specifically find it for a given db_id. As such it is impossible to create a base Agent as you will instead be returned the correct Agent class according to the crowdprovider associated with this Agent. """ from mephisto.operations.registry import get_crowd_provider_from_type if cls == Agent: # We are trying to construct a Agent, find what type to use and # create that instead if row is None: row = db.get_agent(db_id) assert row is not None, f"Given db_id {db_id} did not exist in given db" correct_class = get_crowd_provider_from_type( row["provider_type"]).AgentClass return super().__new__(correct_class) else: # We are constructing another instance directly return super().__new__(cls) def get_agent_id(self) -> str: """Return this agent's id""" return self.db_id def get_worker(self) -> Worker: """ Return the worker that is using this agent for a task """ if self._worker is None: self._worker = Worker(self.db, self.worker_id) return self._worker def get_unit(self) -> "Unit": """ Return the Unit that this agent is working on. """ if self._unit is None: from mephisto.data_model.unit import Unit self._unit = Unit(self.db, self.unit_id) return self._unit def get_assignment(self) -> "Assignment": """Return the assignment this agent is working on""" if self._assignment is None: if self._unit is not None: self._assignment = self._unit.get_assignment() else: from mephisto.data_model.assignment import Assignment self._assignment = Assignment(self.db, self.assignment_id) return self._assignment def get_task_run(self) -> "TaskRun": """Return the TaskRun this agent is working within""" if self._task_run is None: if self._unit is not None: self._task_run = self._unit.get_task_run() elif self._assignment is not None: self._task_run = self._assignment.get_task_run() else: from mephisto.data_model.task_run import TaskRun self._task_run = TaskRun(self.db, self.task_run_id) return self._task_run def get_task(self) -> "Task": """Return the Task this agent is working within""" if self._task is None: if self._unit is not None: self._task = self._unit.get_task() elif self._assignment is not None: self._task = self._assignment.get_task() elif self._task_run is not None: self._task = self._task_run.get_task() else: from mephisto.data_model.task import Task self._task = Task(self.db, self.task_id) return self._task def get_data_dir(self) -> str: """ Return the directory to be storing any agent state for this agent into """ assignment_dir = self.get_assignment().get_data_dir() return os.path.join(assignment_dir, self.db_id) def update_status(self, new_status: str) -> None: """Update the database status of this agent, and possibly send a message to the frontend agent informing them of this update""" if self.db_status == new_status: return # Noop, this is already the case if self.db_status in AgentState.complete(): print(f"Updating a final status, was {self.db_status} " f"and want to set to {new_status}") self.db.update_agent(self.db_id, status=new_status) self.db_status = new_status self.has_updated_status.set() if new_status in [ AgentState.STATUS_RETURNED, AgentState.STATUS_DISCONNECT ]: # Disconnect statuses should free any pending acts self.has_action.set() self.did_submit.set() @staticmethod def _register_agent(db: "MephistoDB", worker: Worker, unit: "Unit", provider_type: str) -> "Agent": """ Create this agent in the mephisto db with the correct setup """ db_id = db.new_agent( worker.db_id, unit.db_id, unit.task_id, unit.task_run_id, unit.assignment_id, unit.task_type, provider_type, ) a = Agent(db, db_id) a.update_status(AgentState.STATUS_ACCEPTED) return a # Specialized child cases may need to implement the following @classmethod def new_from_provider_data( cls, db: "MephistoDB", worker: Worker, unit: "Unit", provider_data: Dict[str, Any], ) -> "Agent": """ Wrapper around the new method that allows registering additional bookkeeping information from a crowd provider for this agent """ agent = cls.new(db, worker, unit) unit.worker_id = worker.db_id agent._unit = unit return agent def observe(self, packet: "Packet") -> None: """ Pass the observed information to the AgentState, then queue the information to be pushed to the user """ if packet.data.get("message_id") is None: packet.data["message_id"] = str(uuid4()) sending_packet = packet.copy() sending_packet.receiver_id = self.db_id self.state.update_data(sending_packet) self.pending_observations.append(sending_packet) def act(self, timeout: Optional[int] = None) -> Optional["Packet"]: """ Request information from the Agent's frontend. If non-blocking, (timeout is None) should return None if no actions are ready to be returned. """ if len(self.pending_actions) == 0: self.wants_action.set() if timeout is None or timeout == 0: return None self.has_action.wait(timeout) if len(self.pending_actions) == 0: # various disconnect cases status = self.get_status() if status == AgentState.STATUS_DISCONNECT: raise AgentDisconnectedError(self.db_id) elif status == AgentState.STATUS_RETURNED: raise AgentReturnedError(self.db_id) self.update_status(AgentState.STATUS_TIMEOUT) raise AgentTimeoutError(timeout, self.db_id) assert len( self.pending_actions) > 0, "has_action released without an action!" act = self.pending_actions.pop(0) if "MEPHISTO_is_submit" in act.data and act.data["MEPHISTO_is_submit"]: self.did_submit.set() if len(self.pending_actions) == 0: self.has_action.clear() self.state.update_data(act) return act def get_status(self) -> str: """Get the status of this agent in their work on their unit""" if self.db_status not in AgentState.complete(): row = self.db.get_agent(self.db_id) if row["status"] != self.db_status: if row["status"] in [ AgentState.STATUS_RETURNED, AgentState.STATUS_DISCONNECT, ]: # Disconnect statuses should free any pending acts self.has_action.set() self.has_updated_status.set() self.db_status = row["status"] return self.db_status # Children classes should implement the following methods def approve_work(self) -> None: """Approve the work done on this agent's specific Unit""" raise NotImplementedError() def soft_reject_work(self) -> None: """ Pay a worker for attempted work, but mark it as below the quality bar for this assignment """ # TODO(OWN) extend this method to assign a soft block # qualification automatically if a threshold of # soft rejects as a proportion of total accepts # is exceeded self.approve_work() self.update_status(AgentState.STATUS_SOFT_REJECTED) def reject_work(self, reason) -> None: """Reject the work done on this agent's specific Unit""" raise NotImplementedError() def mark_done(self) -> None: """ Take any required step with the crowd_provider to ensure that the worker can submit their work and be marked as complete via a call to get_status """ raise NotImplementedError() @staticmethod def new(db: "MephistoDB", worker: Worker, unit: "Unit") -> "Agent": """ Create an agent for this worker to be used for work on the given Unit. Implementation should return the result of _register_agent when sure the agent can be successfully created to have it put into the db. """ raise NotImplementedError()