예제 #1
0
    def get_task_run(self) -> "TaskRun":
        """Return the TaskRun this agent is working within"""
        if self._task_run is None:
            from mephisto.data_model.task_run import TaskRun

            self._task_run = TaskRun.get(self.db, self.task_run_id)
        return self._task_run
예제 #2
0
 def get_units_for_run_id(self, run_id: str) -> List[Unit]:
     """
     Return a list of all Units in a terminal completed state from the
     task run with the given run_id
     """
     task_run = TaskRun.get(self.db, run_id)
     return self._get_units_for_task_runs([task_run])
 def setUp(self):
     self.data_dir = tempfile.mkdtemp()
     database_path = os.path.join(self.data_dir, "mephisto.db")
     assert self.DB_CLASS is not None, "Did not specify db to use"
     self.db = self.DB_CLASS(database_path)
     self.task_run_id = get_test_task_run(self.db)
     self.task_run = TaskRun.get(self.db, self.task_run_id)
예제 #4
0
 def get_task_run(self) -> TaskRun:
     """
     Return the task run that this assignment is part of
     """
     if self.__task_run is None:
         self.__task_run = TaskRun.get(self.db, self.task_run_id)
     return self.__task_run
예제 #5
0
    def setUp(self):
        self.data_dir = tempfile.mkdtemp()
        database_path = os.path.join(self.data_dir, "mephisto.db")
        assert self.DB_CLASS is not None, "Did not specify db to use"
        self.db = self.DB_CLASS(database_path)
        self.task_id = self.db.new_task("test_mock",
                                        MockBlueprint.BLUEPRINT_TYPE)
        self.task_run_id = get_test_task_run(self.db)
        self.task_run = TaskRun.get(self.db, self.task_run_id)
        self.live_run = None

        architect_config = OmegaConf.structured(
            MephistoConfig(architect=MockArchitectArgs(
                should_run_server=True)))

        self.architect = MockArchitect(self.db, architect_config, EMPTY_STATE,
                                       self.task_run, self.data_dir)
        self.architect.prepare()
        self.architect.deploy()
        self.urls = self.architect._get_socket_urls()  # FIXME
        self.url = self.urls[0]
        self.provider = MockProvider(self.db)
        self.provider.setup_resources_for_task_run(self.task_run,
                                                   self.task_run.args,
                                                   EMPTY_STATE, self.url)
        self.launcher = TaskLauncher(self.db, self.task_run,
                                     self.get_mock_assignment_data_array())
        self.launcher.create_assignments()
        self.launcher.launch_units(self.url)
        self.client_io = ClientIOHandler(self.db)
        self.worker_pool = WorkerPool(self.db)
    def test_task_run(self) -> None:
        """Test creation and querying of task_runs"""
        assert self.db is not None, "No db initialized"
        db: MephistoDB = self.db

        task_name, task_id = get_test_task(db)
        requester_name, requester_id = get_test_requester(db)

        # Check creation and retrieval of a task_run
        init_params = json.dumps(OmegaConf.to_yaml(TaskRunArgs.get_mock_params()))
        task_run_id = db.new_task_run(
            task_id, requester_id, init_params, "mock", "mock"
        )
        self.assertIsNotNone(task_run_id)
        self.assertTrue(isinstance(task_run_id, str))
        task_run_row = db.get_task_run(task_run_id)
        self.assertEqual(task_run_row["init_params"], init_params)
        task_run = TaskRun.get(db, task_run_id)
        self.assertEqual(task_run.task_id, task_id)

        # Check finding for task_runs
        task_runs = db.find_task_runs()
        self.assertEqual(len(task_runs), 1)
        self.assertTrue(isinstance(task_runs[0], TaskRun))
        self.assertEqual(task_runs[0].db_id, task_run_id)
        self.assertEqual(task_runs[0].task_id, task_id)
        self.assertEqual(task_runs[0].requester_id, requester_id)

        # Check finding for specific task_runs
        task_runs = db.find_task_runs(task_id=task_id)
        self.assertEqual(len(task_runs), 1)
        self.assertTrue(isinstance(task_runs[0], TaskRun))
        self.assertEqual(task_runs[0].db_id, task_run_id)
        self.assertEqual(task_runs[0].task_id, task_id)
        self.assertEqual(task_runs[0].requester_id, requester_id)

        task_runs = db.find_task_runs(requester_id=requester_id)
        self.assertEqual(len(task_runs), 1)
        self.assertTrue(isinstance(task_runs[0], TaskRun))
        self.assertEqual(task_runs[0].db_id, task_run_id)
        self.assertEqual(task_runs[0].task_id, task_id)
        self.assertEqual(task_runs[0].requester_id, requester_id)

        task_runs = db.find_task_runs(task_id=self.get_fake_id("TaskRun"))
        self.assertEqual(len(task_runs), 0)

        task_runs = db.find_task_runs(is_completed=True)
        self.assertEqual(len(task_runs), 0)

        # Test updating the completion status, requery
        db.update_task_run(task_run_id, True)
        task_runs = db.find_task_runs(is_completed=True)
        self.assertEqual(len(task_runs), 1)
        self.assertTrue(isinstance(task_runs[0], TaskRun))
        self.assertEqual(task_runs[0].db_id, task_run_id)
예제 #7
0
    def get_task_run(self) -> "TaskRun":
        """Return the TaskRun this agent is working within"""
        if self._task_run is None:
            if self._unit is not None:
                self._task_run = self._unit.get_task_run()
            elif self._assignment is not None:
                self._task_run = self._assignment.get_task_run()
            else:
                from mephisto.data_model.task_run import TaskRun

                self._task_run = TaskRun.get(self.db, self.task_run_id)
        return self._task_run
예제 #8
0
 def setUp(self) -> None:
     """
     Setup should put together any requirements for starting the database for a test.
     """
     self.data_dir = tempfile.mkdtemp()
     self.build_dir = tempfile.mkdtemp()
     database_path = os.path.join(self.data_dir, "mephisto.db")
     self.db = LocalMephistoDB(database_path)
     # TODO(#97) we need to actually pull the task type from the Blueprint
     self.task_run = TaskRun.get(self.db, get_test_task_run(self.db))
     # TODO(#97) create a mock agent with the given task type?
     self.TaskRunnerClass = self.BlueprintClass.TaskRunnerClass
     self.AgentStateClass = self.BlueprintClass.AgentStateClass
     self.TaskBuilderClass = self.BlueprintClass.TaskBuilderClass
예제 #9
0
def get_reviewable_task_runs():
    """
    Find reviewable task runs by querying for all reviewable tasks
    and getting their runs
    """
    db = app.extensions["db"]
    units = db.find_units(status=AssignmentState.COMPLETED)
    reviewable_count = len(units)
    task_run_ids = set(
        [u.get_assignment().get_task_run().db_id for u in units])
    task_runs = [TaskRun.get(db, db_id) for db_id in task_run_ids]
    dict_tasks = [t.to_dict() for t in task_runs]
    # TODO(OWN) maybe include warning for auto approve date once that's tracked
    return jsonify({
        "task_runs": dict_tasks,
        "total_reviewable": reviewable_count
    })
예제 #10
0
 def setUp(self) -> None:
     """
     Setup should put together any requirements for starting the database for a test.
     """
     try:
         _ = self.ArchitectClass
     except:
         raise unittest.SkipTest("Skipping test as no ArchitectClass set")
     if not self.warned_about_setup:
         print(
             "Architect tests may require using an account with the server provider "
             "in order to function properly. Make sure these are configured before testing."
         )
         self.warned_about_setup = True
     self.data_dir = tempfile.mkdtemp()
     database_path = os.path.join(self.data_dir, "mephisto.db")
     self.db = LocalMephistoDB(database_path)
     self.build_dir = tempfile.mkdtemp()
     self.task_run = TaskRun.get(self.db, get_test_task_run(self.db))
     builder = MockTaskBuilder(self.task_run, OmegaConf.create({}))
     builder.build_in_dir(self.build_dir)
    def test_assignment_fails(self) -> None:
        """Ensure assignments fail to be created or loaded under failure conditions"""
        assert self.db is not None, "No db initialized"
        db: MephistoDB = self.db

        task_run_id = get_test_task_run(db)
        task_run = TaskRun.get(db, task_run_id)
        # Can't create task run with invalid ids
        with self.assertRaises(EntryDoesNotExistException):
            assignment_id = db.new_assignment(
                task_run.task_id,
                self.get_fake_id("TaskRun"),
                task_run.requester_id,
                task_run.task_type,
                task_run.provider_type,
                task_run.sandbox,
            )

        # Ensure no assignments were created
        assignments = db.find_assignments()
        self.assertEqual(len(assignments), 0)
    def test_assignment(self) -> None:
        """Test creation and querying of assignments"""
        assert self.db is not None, "No db initialized"
        db: MephistoDB = self.db

        task_run_id = get_test_task_run(db)
        task_run = TaskRun.get(db, task_run_id)

        # Check creation and retrieval of an assignment
        assignment_id = db.new_assignment(
            task_run.task_id,
            task_run_id,
            task_run.requester_id,
            task_run.task_type,
            task_run.provider_type,
            task_run.sandbox,
        )
        self.assertIsNotNone(assignment_id)
        self.assertTrue(isinstance(assignment_id, str))
        assignment_row = db.get_assignment(assignment_id)
        self.assertEqual(assignment_row["task_run_id"], task_run_id)
        assignment = Assignment.get(db, assignment_id)
        self.assertEqual(assignment.task_run_id, task_run_id)

        # Check finding for assignments
        assignments = db.find_assignments()
        self.assertEqual(len(assignments), 1)
        self.assertTrue(isinstance(assignments[0], Assignment))
        self.assertEqual(assignments[0].db_id, assignment_id)
        self.assertEqual(assignments[0].task_run_id, task_run_id)

        # Check finding for specific assignments
        assignments = db.find_assignments(task_run_id=task_run_id)
        self.assertEqual(len(assignments), 1)
        self.assertTrue(isinstance(assignments[0], Assignment))
        self.assertEqual(assignments[0].db_id, assignment_id)
        self.assertEqual(assignments[0].task_run_id, task_run_id)

        assignments = db.find_assignments(task_run_id=self.get_fake_id("Assignment"))
        self.assertEqual(len(assignments), 0)
예제 #13
0
def main():
    task_run_id = input("Please enter the task_run_id you'd like to check: ")
    db = LocalMephistoDB()
    task_run = TaskRun.get(db, task_run_id)
    requester = task_run.get_requester()
    if not isinstance(requester, MTurkRequester):
        print(
            "Must be checking a task launched on MTurk, this one uses the following requester:"
        )
        print(requester)
        exit(0)

    turk_db = db.get_datastore_for_provider("mturk")
    hits = turk_db.get_unassigned_hit_ids(task_run_id)

    print(f"Found the following HIT ids unassigned: {hits}")

    # print all of the HITs found above
    from mephisto.abstractions.providers.mturk.mturk_utils import get_hit

    for hit_id in hits:
        hit_info = get_hit(requester._get_client(requester._requester_name),
                           hits[0])
        print(f"MTurk HIT data for {hit_id}:\n{hit_info}\n")
    def test_onboarding_agents(self) -> None:
        """Ensure that the db can create and manipulate onboarding agents"""
        assert self.db is not None, "No db initialized"
        db: MephistoDB = self.db

        task_run_id = get_test_task_run(db)
        task_run = TaskRun.get(db, task_run_id)
        task = task_run.get_task()
        worker_name, worker_id = get_test_worker(db)

        onboarding_agent_id = db.new_onboarding_agent(
            worker_id, task.db_id, task_run_id, "mock"
        )
        self.assertIsNotNone(onboarding_agent_id)

        onboarding_agent = OnboardingAgent.get(db, onboarding_agent_id)
        self.assertIsInstance(onboarding_agent, OnboardingAgent)

        found_agents = db.find_onboarding_agents(worker_id=worker_id)
        self.assertEqual(len(found_agents), 1)
        self.assertIsInstance(found_agents[0], OnboardingAgent)
        found_agent = found_agents[0]
        self.assertEqual(found_agent.db_id, onboarding_agent_id)
        self.assertEqual(found_agent.get_status(), AgentState.STATUS_NONE)
예제 #15
0
def get_submitted_data():
    try:
        task_run_ids = request.args.getlist("task_run_id")
        task_names = request.args.getlist("task_name")
        assignment_ids = request.args.getlist("assignment_id")
        unit_ids = request.args.getlist("unit_ids")
        statuses = request.args.getlist("status")

        db = app.extensions["db"]
        units = []
        assignments = []
        assert len(
            task_names) == 0, "Searching via task names not yet supported"

        task_runs = [
            TaskRun.get(db, task_run_id) for task_run_id in task_run_ids
        ]
        for task_run in task_runs:
            assignments += task_run.get_assignments()

        assignments += [
            Assignment.get(db, assignment_id)
            for assignment_id in assignment_ids
        ]

        if len(statuses) == 0:
            statuses = [
                AssignmentState.COMPLETED,
                AssignmentState.ACCEPTED,
                AssignmentState.REJECTED,
            ]

        filtered_assignments = [
            a for a in assignments if a.get_status() in statuses
        ]

        for assignment in assignments:
            units += assignment.get_units()

        units += [Unit.get(db, unit_id) for unit_id in unit_ids]

        all_unit_data = []
        for unit in units:
            unit_data = {
                "assignment_id": unit.assignment_id,
                "task_run_id": unit.task_run_id,
                "status": unit.db_status,
                "unit_id": unit.db_id,
                "worker_id": unit.worker_id,
                "data": None,
            }
            agent = unit.get_assigned_agent()
            if agent is not None:
                unit_data["data"] = agent.state.get_data()
                unit_data["worker_id"] = agent.worker_id
            all_unit_data.append(unit_data)

        print(all_unit_data)
        return jsonify({"success": True, "units": all_unit_data})
    except Exception as e:
        import traceback

        traceback.print_exc()
        return jsonify({"success": False, "msg": str(e)})
예제 #16
0
    def launch_task_run_or_die(
            self,
            run_config: DictConfig,
            shared_state: Optional[SharedTaskState] = None) -> str:
        """
        Parse the given arguments and launch a job.
        """
        set_mephisto_log_level(level=run_config.get("log_level", "info"))

        requester, provider_type = self._get_requester_and_provider_from_config(
            run_config)

        # Next get the abstraction classes, and run validation
        # before anything is actually created in the database
        blueprint_type = run_config.blueprint._blueprint_type
        architect_type = run_config.architect._architect_type
        BlueprintClass = get_blueprint_from_type(blueprint_type)
        ArchitectClass = get_architect_from_type(architect_type)
        CrowdProviderClass = get_crowd_provider_from_type(provider_type)

        if shared_state is None:
            shared_state = BlueprintClass.SharedStateClass()

        BlueprintClass.assert_task_args(run_config, shared_state)
        ArchitectClass.assert_task_args(run_config, shared_state)
        CrowdProviderClass.assert_task_args(run_config, shared_state)

        # Find an existing task or create a new one
        task_name = run_config.task.get("task_name", None)
        if task_name is None:
            task_name = blueprint_type
            logger.warning(
                f"Task is using the default blueprint name {task_name} as a name, "
                "as no task_name is provided")
        tasks = self.db.find_tasks(task_name=task_name)
        task_id = None
        if len(tasks) == 0:
            task_id = self.db.new_task(task_name, blueprint_type)
        else:
            task_id = tasks[0].db_id

        logger.info(f"Creating a task run under task name: {task_name}")

        # Create a new task run
        new_run_id = self.db.new_task_run(
            task_id,
            requester.db_id,
            json.dumps(OmegaConf.to_yaml(run_config, resolve=True)),
            provider_type,
            blueprint_type,
            requester.is_sandbox(),
        )
        task_run = TaskRun.get(self.db, new_run_id)

        live_run = self._create_live_task_run(
            run_config,
            shared_state,
            task_run,
            ArchitectClass,
            BlueprintClass,
            CrowdProviderClass,
        )

        try:
            # If anything fails after here, we have to cleanup the architect
            # Setup and deploy the server
            built_dir = live_run.architect.prepare()
            task_url = live_run.architect.deploy()

            # TODO(#102) maybe the cleanup (destruction of the server configuration?) should only
            # happen after everything has already been reviewed, this way it's possible to
            # retrieve the exact build directory to review a task for real
            live_run.architect.cleanup()

            # Register the task with the provider
            live_run.provider.setup_resources_for_task_run(
                task_run, run_config, shared_state, task_url)

            live_run.client_io.launch_channels()
        except (KeyboardInterrupt, Exception) as e:
            logger.error(
                "Encountered error while launching run, shutting down",
                exc_info=True)
            try:
                live_run.architect.shutdown()
            except (KeyboardInterrupt, Exception) as architect_exception:
                logger.exception(
                    f"Could not shut down architect: {architect_exception}",
                    exc_info=True,
                )
            raise e

        live_run.task_launcher.create_assignments()
        live_run.task_launcher.launch_units(task_url)

        self._task_runs_tracked[task_run.db_id] = live_run
        task_run.update_completion_progress(status=False)

        return task_run.db_id
예제 #17
0
 def setUp(self) -> None:
     self.data_dir = tempfile.mkdtemp()
     database_path = os.path.join(self.data_dir, "mephisto.db")
     self.db = LocalMephistoDB(database_path)
     self.task_run = TaskRun.get(self.db, get_test_task_run(self.db))
예제 #18
0
def main():
    """
    Script to launch makeup tasks for workers that
    can't be bonused via other avenues.

    Creates a task for a worker, qualifying them directly,
    and marks as a soft_rejected HIT for the given task name.
    """
    db = LocalMephistoDB()

    task_name = input(
        "Please enter a task name for bookkeeping. This task name will be tied to "
        "the additional spend granted through this script, and should be the same "
        "as the task you originally launched that you now need to compensate for:\n>> "
    )
    tasks = db.find_tasks(task_name=task_name)
    if len(tasks) == 0:
        print("No tasks found with the given name...")
        all_tasks = db.find_tasks()
        all_names = set([t.task_name for t in all_tasks])
        print(
            f"Choose an existing task of {all_names} to use this functionality."
        )
        print(f"Compensation hits must be tied to an existing task")
        return 0
    task = tasks[0]

    req_name = input(
        "Please enter an MTurkRequester name to use to bonus from:\n>> ")
    requesters = db.find_requesters(requester_name=req_name)
    if len(requesters) == 0:
        print("Could not find a requester by that name...")
        return 0
    requester = requesters[0]
    client = requester._get_client(requester._requester_name)

    print(
        "You can now enter a worker id, amount, and reason for as many compensation tasks "
        "as you want to launch for this.")
    compensation_hits = []
    amount = None
    reason = None
    while True:
        worker_id = input(
            "Enter a worker id to compensate. Leave blank to move on to launching: \n>> "
        ).strip()
        if len(worker_id) == 0:
            break
        prev_amount = "" if amount is None else f" (leave blank for ${amount})"
        next_amount = input(
            f"Enter the amount in dollars to pay out in this compensation task{prev_amount}:\n>> $"
        )
        amount = float(next_amount) if len(
            next_amount.strip()) != 0 else amount
        assert amount is not None, "Amount can not be left blank"
        prev_reason = "" if reason is None else f" (leave blank for '{reason}'"
        next_reason = input(
            f"Provide reason for launching this compensation task. This will be sent to the worker{prev_reason}:\n>> "
        )
        reason = next_reason if len(next_reason.strip()) != 0 else reason
        assert reason is not None, "Reason can not be left blank"
        compensation_hits.append({
            "worker_id": worker_id,
            "amount": amount,
            "reason": reason,
        })
    if len(compensation_hits) == 0:
        print("No compensation details provided, exiting")
        return 0

    print(f"You entered the following tasks:\n{compensation_hits}")
    input("Input anything to confirm and continue...")

    # Iterate through and launch tasks
    for comp_dict in compensation_hits:
        # Create the MTurk qualification for this specific worker
        worker_id = comp_dict["worker_id"]
        qual_name = f"compensation-for-{worker_id}-on-{task_name}"
        print(f"Creating qualification for {worker_id}: {qual_name}....")
        qualification = make_qualification_dict(qual_name, QUAL_EXISTS, None)
        qual_map = requester.datastore.get_qualification_mapping(qual_name)
        if qual_map is None:
            qualification[
                "QualificationTypeId"] = requester._create_new_mturk_qualification(
                    qual_name)
        else:
            qualification["QualificationTypeId"] = qual_map[
                "mturk_qualification_id"]
        give_worker_qualification(client, worker_id,
                                  qualification["QualificationTypeId"])

        # Create the task run for this HIT
        print(f"Creating task run and data model components for this HIT")
        config = build_task_config(comp_dict, requester)
        init_params = OmegaConf.to_yaml(OmegaConf.structured(config))
        new_run_id = db.new_task_run(
            task.db_id,
            requester.db_id,
            json.dumps(init_params),
            requester.provider_type,
            "mock",
            requester.is_sandbox(),
        )
        task_run = TaskRun.get(db, new_run_id)

        # Create an assignment, unit, agent, and mark as assigned
        # Assignment creation
        task_args = task_run.get_task_args()
        assignment_id = db.new_assignment(
            task_run.task_id,
            task_run.db_id,
            task_run.requester_id,
            task_run.task_type,
            task_run.provider_type,
            task_run.sandbox,
        )
        data = InitializationData({}, [{}])
        assignment = Assignment.get(db, assignment_id)
        assignment.write_assignment_data(data)

        # Unit creation
        unit_id = db.new_unit(
            task_run.task_id,
            task_run.db_id,
            task_run.requester_id,
            assignment_id,
            COMPENSATION_UNIT_INDEX,
            task_args.task_reward,
            task_run.provider_type,
            task_run.task_type,
            task_run.sandbox,
        )
        compensation_unit = Unit.get(db, unit_id)
        print(f"Created {task_run}, {assignment}, and {compensation_unit}...")

        # Set up HIT type
        hit_type_id = create_hit_type(
            client,
            task_run.get_task_args(),
            [qualification],
            auto_approve_delay=30,
            skip_locale_qual=True,
        )

        # Create the task on MTurk, email the worker
        print("Creating and deploying task on MTurk")
        duration = 60 * 60 * 24
        run_id = task_run.db_id
        hit_link, hit_id, response = create_compensation_hit_with_hit_type(
            client, comp_dict["reason"], hit_type_id)
        requester.datastore.new_hit(hit_id, hit_link, duration, task_run.db_id)

        print("Sending email to worker...")
        result = email_worker(
            client,
            worker_id,
            "Compensation HIT Launched",
            ("Hello Worker,\n We've launched a compensation hit for a task that you've worked on "
             f"for us in the past. The reason supplied for this task was: {reason}. This task is "
             f"only doable by you, and should reward ${comp_dict['amount']}. Thanks for being a valued "
             "contributor to our tasks, and for allowing us to try and resolve the issue.\n\n"
             f"Your task can be accessed at the following link: {hit_link}."),
        )

        if not result[0]:
            print(
                f"Email send failed, for reason {result[1]}\n"
                f"Please send {hit_link} to {worker_id} yourself if they reached out about this issue."
            )

        # Mark the agent as soft_rejected, such that we've "paid" it
        compensation_unit.set_db_status(AssignmentState.SOFT_REJECTED)