コード例 #1
0
    def test_worker(self) -> None:
        """Test creation and querying of workers"""
        assert self.db is not None, "No db initialized"
        db: MephistoDB = self.db

        # Check creation and retrieval of a worker
        worker_name = "test_worker"
        provider_type = PROVIDER_TYPE
        worker_id = db.new_worker(worker_name, provider_type)
        self.assertIsNotNone(worker_id)
        self.assertTrue(isinstance(worker_id, str))
        worker_row = db.get_worker(worker_id)
        self.assertEqual(worker_row["worker_name"], worker_name)

        worker = Worker(db, worker_id)
        self.assertEqual(worker.worker_name, worker_name)

        # Check finding for workers
        workers = db.find_workers()
        self.assertEqual(len(workers), 1)
        self.assertTrue(isinstance(workers[0], Worker))
        self.assertEqual(workers[0].db_id, worker_id)
        self.assertEqual(workers[0].worker_name, worker_name)

        # Check finding for specific workers
        workers = db.find_workers(worker_name=worker_name)
        self.assertEqual(len(workers), 1)
        self.assertTrue(isinstance(workers[0], Worker))
        self.assertEqual(workers[0].db_id, worker_id)
        self.assertEqual(workers[0].worker_name, worker_name)

        workers = db.find_workers(worker_name="fake_name")
        self.assertEqual(len(workers), 0)
コード例 #2
0
 def get_worker(self) -> Worker:
     """
     Return the worker that is using this agent for a task
     """
     if self._worker is None:
         self._worker = Worker(self.db, self.worker_id)
     return self._worker
コード例 #3
0
def timing_charts(run_id: int) -> None:
    completed_units = retrieve_units(run_id)
    db = LocalMephistoDB()
    data_browser = DataBrowser(db=db)
    workers = {"total": []}
    unit_timing = {"total": [], "end": []}
    question_results = {1: [], 2: [], 3: [], 4: []}
    pass_rates = {1: [], 2: [], 3: [], 4: []}
    starttime = math.inf
    endtime = -math.inf
    feedback = []
    num_correct_hist = []
    bug_count = 0
    for unit in completed_units:
        data = data_browser.get_data_from_unit(unit)
        worker = Worker(db, data["worker_id"]).worker_name
        workers["total"].append(worker)
        starttime, endtime, unit_timing = hit_timing(data["data"], starttime, endtime, unit_timing)

        outputs = data["data"]["outputs"]
        feedback.append(outputs["feedback"])
        if outputs["bug"] == "true":
            bug_count += 1
        num_correct = 0
        for q in question_results.keys():
            key = "q" + str(q) + "Answer"
            question_results[q].append(outputs[key])
            if outputs[key] == "true":
                num_correct += 1
        num_correct_hist.append(num_correct)

    print(f"Job start time: {datetime.fromtimestamp(starttime)}")
    print(f"Job end time: {datetime.fromtimestamp(endtime)}")

    plot_hist_sorted(
        unit_timing["total"], cutoff=1200, target_val=600, xlabel="", ylabel="Total HIT Time (sec)"
    )
    calc_percentiles(unit_timing["total"], "HIT Length")

    for q in question_results.keys():
        results_dict = Counter(question_results[q])
        pass_rates[q] = (
            results_dict["true"] / (results_dict["true"] + results_dict["false"])
        ) * 100
        print(
            f"Question #{q} pass rate: {(results_dict['true']/(results_dict['true'] + results_dict['false']))*100:.1f}%"
        )
    plot_hist(pass_rates, xlabel="Question #", ylabel=f"Pass Rate %")
    print(
        f"Number of workers who didn't get any right: {len([x for x in num_correct_hist if x == 0])}"
    )

    keys = range(len(num_correct_hist))
    vals_dict = dict(zip(keys, num_correct_hist))
    plot_hist(vals_dict, xlabel="HIT #", ylabel="# Correct", ymax=4)

    print(f"Number of workers who experienced a window crash: {bug_count}")
    print(feedback)
コード例 #4
0
def format_for_printing_data(data):
    # Custom tasks can define methods for how to display their data in a relevant way
    worker_name = Worker(db, data["worker_id"]).worker_name
    contents = data["data"]
    duration = contents["times"]["task_end"] - contents["times"]["task_start"]
    metadata_string = (
        f"Worker: {worker_name}\nUnit: {data['unit_id']}\n"
        f"Duration: {int(duration)}\nStatus: {data['status']}\n")

    inputs = contents["inputs"]
    inputs_string = f"Domain: {inputs['subdomain']}\n"

    outputs = contents["outputs"]
    output_string = ""
    try:
        output_string += f"Usability Rating: {outputs['usability-rating']}\n"
    except:
        pass
    try:
        output_string += f"Self Performance Rating: {outputs['self-rating']}\n"
    except:
        pass
    try:
        output_string += f"Instructions Read Time (sec): {outputs['instructionsReadTime']}\n"
    except:
        pass
    try:
        output_string += f"Pre Interaction Time (sec): {outputs['preInteractTime']}\n"
    except:
        pass
    try:
        output_string += f"Interaction Time (sec): {outputs['interactTime']}\n"
    except:
        pass
    try:
        output_string += f"Clicks (timestamp): {outputs['clickedElements']}\n"
    except:
        pass
    try:
        output_string += f"OS & Browser Info: {outputs['userAgent']}\n"
    except:
        pass
    try:
        output_string += f"User Feeback: {outputs['feedback']}\n"
    except:
        pass
    # found_files = outputs.get("files")
    # if found_files is not None:
    #    file_dir = Unit(db, data["unit_id"]).get_assigned_agent().get_data_dir()
    #    output_string += f"   Files: {found_files}\n"
    #    output_string += f"   File directory {file_dir}\n"
    # else:
    #    output_string += f"   Files: No files attached\n"
    return f"-------------------\n{metadata_string}{inputs_string}{output_string}"
コード例 #5
0
    def test_worker(self) -> None:
        """Ensure we can query and use a worker"""
        db: MephistoDB = self.db
        requester = self.get_test_requester()
        WorkerClass = self.CrowdProviderClass.WorkerClass
        test_worker = WorkerClass.new(db, self.get_test_worker_name())
        test_worker_2 = Worker(db, test_worker.db_id)
        self.assertEqual(
            test_worker.worker_name,
            test_worker_2.worker_name,
            "Worker gotten from db not same as first init",
        )

        # Ensure blocking is doable
        test_worker.block_worker("Test reason", requester=requester)
        self.assertTrue(test_worker.is_blocked(requester))
        test_worker.unblock_worker("Test reason", requester=requester)
        self.assertFalse(test_worker.is_blocked(requester))
コード例 #6
0
 def find_workers(
     self, worker_name: Optional[str] = None, provider_type: Optional[str] = None
 ) -> List[Worker]:
     """
     Try to find any worker that matches the above. When called with no arguments,
     return all workers.
     """
     with self.table_access_condition:
         conn = self._get_connection()
         c = conn.cursor()
         c.execute(
             """
             SELECT * from workers
             WHERE (?1 IS NULL OR worker_name = ?1)
             AND (?2 IS NULL OR provider_type = ?2)
             """,
             (worker_name, provider_type),
         )
         rows = c.fetchall()
         return [Worker(self, str(r["worker_id"]), row=r) for r in rows]
コード例 #7
0
def format_for_printing_data(data):
    # Custom tasks can define methods for how to display their data in a relevant way
    worker_name = Worker(db, data["worker_id"]).worker_name
    contents = data["data"]
    duration = contents["times"]["task_end"] - contents["times"]["task_start"]
    metadata_string = (
        f"Worker: {worker_name}\nUnit: {data['unit_id']}\n"
        f"Duration: {int(duration)}\nStatus: {data['status']}\n")

    inputs = contents["inputs"]
    inputs_string = f"Domain: {inputs['subdomain']}\n"

    outputs = contents["outputs"]
    output_string = ""
    try:
        output_string += f"Question #1 Result: {outputs['q1Answer']}\n"
    except:
        pass
    try:
        output_string += f"Question #2 Result: {outputs['q2Answer']}\n"
    except:
        pass
    try:
        output_string += f"Question #3 Result: {outputs['q3Answer']}\n"
    except:
        pass
    try:
        output_string += f"Question #4 Result: {outputs['q4Answer']}\n"
    except:
        pass
    try:
        output_string += f"OS & Browser Info: {outputs['userAgent']}\n"
    except:
        pass
    try:
        output_string += f"User Feeback: {outputs['feedback']}\n"
    except:
        pass

    return f"-------------------\n{metadata_string}{inputs_string}{output_string}"
コード例 #8
0
    def test_create_and_find_worker(self) -> None:
        """Ensure we can find a worker by MTurk id"""
        db = self.db
        TEST_MTURK_WORKER_ID = "ABCDEFGHIJ"

        test_worker = MTurkWorker.new(db, TEST_MTURK_WORKER_ID)
        test_worker_2 = Worker(db, test_worker.db_id)
        self.assertEqual(
            test_worker.worker_name,
            test_worker_2.worker_name,
            "Worker gotten from db not same as first init",
        )

        test_worker_3 = MTurkWorker.get_from_mturk_worker_id(db, TEST_MTURK_WORKER_ID)
        self.assertEqual(
            test_worker.worker_name,
            test_worker_3.worker_name,
            "Worker gotten from db not same as first init",
        )

        failed_worker = MTurkWorker.get_from_mturk_worker_id(db, "FAKE_ID")
        self.assertIsNone(failed_worker, f"Found worker {failed_worker} from a fake id")
コード例 #9
0
def format_for_printing_data(data):
    # Custom tasks can define methods for how to display their data in a relevant way
    worker_name = Worker(db, data["worker_id"]).worker_name
    contents = data["data"]
    duration = contents["times"]["task_end"] - contents["times"]["task_start"]
    metadata_string = (
        f"Worker: {worker_name}\nUnit: {data['unit_id']}\n"
        f"Duration: {int(duration)}\nStatus: {data['status']}\n")

    inputs = contents["inputs"]
    inputs_string = f"Character: {inputs['character_name']}\nDescription: {inputs['character_description']}\n"

    outputs = contents["outputs"]
    output_string = f"   Rating: {outputs['rating']}\n"
    found_files = outputs.get("files")
    if found_files is not None:
        file_dir = Unit(db,
                        data["unit_id"]).get_assigned_agent().get_data_dir()
        output_string += f"   Files: {found_files}\n"
        output_string += f"   File directory {file_dir}\n"
    else:
        output_string += f"   Files: No files attached\n"
    return f"-------------------\n{metadata_string}{inputs_string}{output_string}"
コード例 #10
0
    def test_worker_fails(self) -> None:
        """Ensure workers fail to be created or loaded under failure conditions"""
        assert self.db is not None, "No db initialized"
        db: MephistoDB = self.db

        # Cant get non-existent entry
        with self.assertRaises(EntryDoesNotExistException):
            worker = Worker(db, self.get_fake_id("Worker"))

        worker_name = "test_worker"
        provider_type = PROVIDER_TYPE
        worker_id = db.new_worker(worker_name, provider_type)

        # Can't create same worker again
        with self.assertRaises(EntryAlreadyExistsException):
            worker_id = db.new_worker(worker_name, provider_type)

        # Can't use no name
        with self.assertRaises(MephistoDBException):
            worker_id = db.new_worker("", provider_type)

        # Ensure no workers were created
        workers = db.find_workers()
        self.assertEqual(len(workers), 1)
コード例 #11
0
ファイル: supervisor.py プロジェクト: vaibhavad/Mephisto
    def _register_agent(self, packet: Packet, channel_info: ChannelInfo):
        """Process an agent registration packet to register an agent"""
        # First see if this is a reconnection
        crowd_data = packet.data["provider_data"]
        agent_registration_id = crowd_data["agent_registration_id"]
        logger.debug(
            f"Incoming request to register agent {agent_registration_id}.")
        if agent_registration_id in self.agents_by_registration_id:
            agent = self.agents_by_registration_id[agent_registration_id].agent
            # Update the source channel, in case it has changed
            self.agents[
                agent.get_agent_id()].used_channel_id = channel_info.channel_id
            self.message_queue.append(
                Packet(
                    packet_type=PACKET_TYPE_PROVIDER_DETAILS,
                    sender_id=SYSTEM_CHANNEL_ID,
                    receiver_id=channel_info.channel_id,
                    data={
                        "request_id": packet.data["request_id"],
                        "agent_id": agent.get_agent_id(),
                    },
                ))
            logger.debug(
                f"Found existing agent_registration_id {agent_registration_id}, "
                f"reconnecting to agent {agent.get_agent_id()}.")
            return

        # Process a new agent
        task_runner = channel_info.job.task_runner
        task_run = task_runner.task_run
        worker_id = crowd_data["worker_id"]
        worker = Worker(self.db, worker_id)

        # get the list of tentatively valid units
        units = task_run.get_valid_units_for_worker(worker)
        if len(units) == 0:
            self.message_queue.append(
                Packet(
                    packet_type=PACKET_TYPE_PROVIDER_DETAILS,
                    sender_id=SYSTEM_CHANNEL_ID,
                    receiver_id=channel_info.channel_id,
                    data={
                        "request_id": packet.data["request_id"],
                        "agent_id": None
                    },
                ))
            logger.debug(
                f"Found existing agent_registration_id {agent_registration_id}, "
                f"had no valid units.")
            return

        # If there's onboarding, see if this worker has already been disqualified
        worker_id = crowd_data["worker_id"]
        worker = Worker(self.db, worker_id)
        blueprint = task_run.get_blueprint(args=task_runner.args)
        if isinstance(blueprint,
                      OnboardingRequired) and blueprint.use_onboarding:
            if worker.is_disqualified(blueprint.onboarding_qualification_name):
                self.message_queue.append(
                    Packet(
                        packet_type=PACKET_TYPE_PROVIDER_DETAILS,
                        sender_id=SYSTEM_CHANNEL_ID,
                        receiver_id=channel_info.channel_id,
                        data={
                            "request_id": packet.data["request_id"],
                            "agent_id": None,
                        },
                    ))
                logger.debug(
                    f"Worker {worker_id} is already disqualified by onboarding "
                    f"qual {blueprint.onboarding_qualification_name}.")
                return
            elif not worker.is_qualified(
                    blueprint.onboarding_qualification_name):
                # Send a packet with onboarding information
                onboard_data = blueprint.get_onboarding_data(worker.db_id)
                onboard_agent = OnboardingAgent.new(self.db, worker, task_run)
                onboard_agent.state.set_init_state(onboard_data)
                agent_info = AgentInfo(agent=onboard_agent,
                                       used_channel_id=channel_info.channel_id)
                onboard_id = onboard_agent.get_agent_id()
                # register onboarding agent
                self.agents[onboard_id] = agent_info
                self.onboarding_packets[onboard_id] = packet
                self.message_queue.append(
                    Packet(
                        packet_type=PACKET_TYPE_PROVIDER_DETAILS,
                        sender_id=SYSTEM_CHANNEL_ID,
                        receiver_id=channel_info.channel_id,
                        data={
                            "request_id": packet.data["request_id"],
                            "agent_id": onboard_id,
                            "onboard_data": onboard_data,
                        },
                    ))

                logger.debug(
                    f"Worker {worker_id} is starting onboarding thread with "
                    f"onboarding agent id {onboard_id}.")

                # Create an onboarding thread
                onboard_thread = threading.Thread(
                    target=self._launch_and_run_onboarding,
                    args=(agent_info, channel_info.job.task_runner),
                    name=f"Onboard-thread-{onboard_id}",
                )

                onboard_agent.update_status(AgentState.STATUS_ONBOARDING)
                agent_info.assignment_thread = onboard_thread
                onboard_thread.start()
                return

        # Not onboarding, so just register directly
        self._assign_unit_to_agent(packet, channel_info, units)
コード例 #12
0
ファイル: supervisor.py プロジェクト: vaibhavad/Mephisto
    def _assign_unit_to_agent(self, packet: Packet, channel_info: ChannelInfo,
                              units: List["Unit"]):
        """Handle creating an agent for the specific worker to register an agent"""

        crowd_data = packet.data["provider_data"]
        task_run = channel_info.job.task_runner.task_run
        crowd_provider = channel_info.job.provider
        worker_id = crowd_data["worker_id"]
        worker = Worker(self.db, worker_id)

        logger.debug(f"Worker {worker_id} is being assigned one of "
                     f"{len(units)} units.")

        reserved_unit = None
        while len(units) > 0 and reserved_unit is None:
            unit = units.pop(0)
            reserved_unit = task_run.reserve_unit(unit)
        if reserved_unit is None:
            self.message_queue.append(
                Packet(
                    packet_type=PACKET_TYPE_PROVIDER_DETAILS,
                    sender_id=SYSTEM_CHANNEL_ID,
                    receiver_id=channel_info.channel_id,
                    data={
                        "request_id": packet.data["request_id"],
                        "agent_id": None
                    },
                ))
        else:
            agent = crowd_provider.AgentClass.new_from_provider_data(
                self.db, worker, unit, crowd_data)
            logger.debug(f"Created agent {agent}, {agent.db_id}.")
            self.message_queue.append(
                Packet(
                    packet_type=PACKET_TYPE_PROVIDER_DETAILS,
                    sender_id=SYSTEM_CHANNEL_ID,
                    receiver_id=channel_info.channel_id,
                    data={
                        "request_id": packet.data["request_id"],
                        "agent_id": agent.get_agent_id(),
                    },
                ))
            agent_info = AgentInfo(agent=agent,
                                   used_channel_id=channel_info.channel_id)
            self.agents[agent.get_agent_id()] = agent_info
            self.agents_by_registration_id[
                crowd_data["agent_registration_id"]] = agent_info

            # Launch individual tasks
            if not channel_info.job.task_runner.is_concurrent:
                unit_thread = threading.Thread(
                    target=self._launch_and_run_unit,
                    args=(unit, agent_info, channel_info.job.task_runner),
                    name=f"Unit-thread-{unit.db_id}",
                )
                agent_info.assignment_thread = unit_thread
                unit_thread.start()
            else:
                # See if the concurrent unit is ready to launch
                assignment = unit.get_assignment()
                agents = assignment.get_agents()
                if None in agents:
                    agent.update_status(AgentState.STATUS_WAITING)
                    return  # need to wait for all agents to be here to launch

                # Launch the backend for this assignment
                agent_infos = [
                    self.agents[a.db_id] for a in agents if a is not None
                ]

                assign_thread = threading.Thread(
                    target=self._launch_and_run_assignment,
                    args=(assignment, agent_infos,
                          channel_info.job.task_runner),
                    name=f"Assignment-thread-{assignment.db_id}",
                )

                for agent_info in agent_infos:
                    agent_info.agent.update_status(AgentState.STATUS_IN_TASK)
                    agent_info.assignment_thread = assign_thread

                assign_thread.start()
コード例 #13
0
def issue_bonuses(task_name: str) -> list:
    logging.info(f"Initializing bonus script for Mephisto task_name: {task_name}")

    # Download the shared list of issued bonuses and pull out unique reference tuples to check against
    logging.info(f"Downloading interaction bonus records from S3...")
    with open("bonus_records.csv", "wb") as f:
        s3.download_fileobj("droidlet-hitl", "bonus_records.csv", f)

    logging.info(f"Building list of already issued bonuses...")
    previously_issued_units = []
    with open("bonus_records.csv", newline="") as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            previously_issued_units.append(
                (row[0], row[1])
            )  # the combination of task_name and unit_id is essentially unique

    # Get completed units from the run_id
    logging.info(f"Retrieving units from Mephisto based on task_name...")
    units = data_browser.get_units_for_task_name(task_name)
    completed_units = []
    for unit in units:
        if unit.db_status == "completed":
            completed_units.append(unit)

    logging.info(f"Completed units for job {task_name} retrieved")

    # Retrieve bonus info from DB and issue
    new_bonus_records = []
    bonus_results = []
    total_bonus = 0
    units_skipped = 0
    for unit in completed_units:
        data = data_browser.get_data_from_unit(unit)
        unit_id = data["unit_id"]
        if (task_name, unit_id) not in previously_issued_units:
            worker = Worker(db, data["worker_id"])
            outputs = data["data"]["outputs"]
            clean_click_string = outputs["clickedElements"].replace("'", "")
            clicks = json.loads(clean_click_string)
            bonus_result = False
            if clicks:
                for click in clicks:
                    if "interactionScores" in click["id"]:
                        try:
                            amount = float(
                                f'{(click["id"]["interactionScores"]["stoplight"] * 0.30):.2f}'
                            )
                            bonus_result, _ = worker.bonus_worker(
                                amount, "Virtual assistant interaction quality bonus", unit
                            )
                            total_bonus += amount
                            new_bonus_records.append(
                                (task_name, unit_id, worker.worker_name, amount)
                            )
                        except:
                            logging.error(
                                f"Exception raised on bonus issue for {worker.worker_name}, debug"
                            )
                            new_bonus_records.append(
                                (task_name, unit_id, worker.worker_name, "ERR")
                            )
                            pass
                if not bonus_result:
                    logging.info(
                        f"Bonus NOT successfully issued for worker {worker.worker_name}, but no error was raised.  \
                        Make sure interaction score exists and retry."
                    )
            else:
                logging.info(
                    f"Recorded click data not found for {worker.worker_name}, no bonus will be issued"
                )
            bonus_results.append(bonus_result)
        else:
            units_skipped += 1

    logging.info(f"Num completed units: {len(completed_units)}")
    logging.info(
        f"Num bonuses skipped because bonus was issued previously for the same unit: {units_skipped}"
    )
    logging.info(f"Num new bonuses issued: {len([x for x in bonus_results if x])}")
    logging.info(f"Num bonuses FAILED: {len([x for x in bonus_results if not x])}")
    logging.info(f"Total bonus amount issued: {total_bonus}")

    if new_bonus_records:
        logging.info(f"There are newly issued bonuses to record")
        logging.info(f"Writing new bonuses to csv and uploading to S3...")
        with open("bonus_records.csv", "a") as f:
            writer = csv.writer(f)
            for record in new_bonus_records:
                writer.writerow(record)
        s3.upload_file("bonus_records.csv", "droidlet-hitl", "bonus_records.csv")

    os.remove("bonus_records.csv")
    logging.info(f"Finished issuing bonuses!")

    return