def test_worker(self) -> None: """Test creation and querying of workers""" assert self.db is not None, "No db initialized" db: MephistoDB = self.db # Check creation and retrieval of a worker worker_name = "test_worker" provider_type = PROVIDER_TYPE worker_id = db.new_worker(worker_name, provider_type) self.assertIsNotNone(worker_id) self.assertTrue(isinstance(worker_id, str)) worker_row = db.get_worker(worker_id) self.assertEqual(worker_row["worker_name"], worker_name) worker = Worker(db, worker_id) self.assertEqual(worker.worker_name, worker_name) # Check finding for workers workers = db.find_workers() self.assertEqual(len(workers), 1) self.assertTrue(isinstance(workers[0], Worker)) self.assertEqual(workers[0].db_id, worker_id) self.assertEqual(workers[0].worker_name, worker_name) # Check finding for specific workers workers = db.find_workers(worker_name=worker_name) self.assertEqual(len(workers), 1) self.assertTrue(isinstance(workers[0], Worker)) self.assertEqual(workers[0].db_id, worker_id) self.assertEqual(workers[0].worker_name, worker_name) workers = db.find_workers(worker_name="fake_name") self.assertEqual(len(workers), 0)
def get_worker(self) -> Worker: """ Return the worker that is using this agent for a task """ if self._worker is None: self._worker = Worker(self.db, self.worker_id) return self._worker
def timing_charts(run_id: int) -> None: completed_units = retrieve_units(run_id) db = LocalMephistoDB() data_browser = DataBrowser(db=db) workers = {"total": []} unit_timing = {"total": [], "end": []} question_results = {1: [], 2: [], 3: [], 4: []} pass_rates = {1: [], 2: [], 3: [], 4: []} starttime = math.inf endtime = -math.inf feedback = [] num_correct_hist = [] bug_count = 0 for unit in completed_units: data = data_browser.get_data_from_unit(unit) worker = Worker(db, data["worker_id"]).worker_name workers["total"].append(worker) starttime, endtime, unit_timing = hit_timing(data["data"], starttime, endtime, unit_timing) outputs = data["data"]["outputs"] feedback.append(outputs["feedback"]) if outputs["bug"] == "true": bug_count += 1 num_correct = 0 for q in question_results.keys(): key = "q" + str(q) + "Answer" question_results[q].append(outputs[key]) if outputs[key] == "true": num_correct += 1 num_correct_hist.append(num_correct) print(f"Job start time: {datetime.fromtimestamp(starttime)}") print(f"Job end time: {datetime.fromtimestamp(endtime)}") plot_hist_sorted( unit_timing["total"], cutoff=1200, target_val=600, xlabel="", ylabel="Total HIT Time (sec)" ) calc_percentiles(unit_timing["total"], "HIT Length") for q in question_results.keys(): results_dict = Counter(question_results[q]) pass_rates[q] = ( results_dict["true"] / (results_dict["true"] + results_dict["false"]) ) * 100 print( f"Question #{q} pass rate: {(results_dict['true']/(results_dict['true'] + results_dict['false']))*100:.1f}%" ) plot_hist(pass_rates, xlabel="Question #", ylabel=f"Pass Rate %") print( f"Number of workers who didn't get any right: {len([x for x in num_correct_hist if x == 0])}" ) keys = range(len(num_correct_hist)) vals_dict = dict(zip(keys, num_correct_hist)) plot_hist(vals_dict, xlabel="HIT #", ylabel="# Correct", ymax=4) print(f"Number of workers who experienced a window crash: {bug_count}") print(feedback)
def format_for_printing_data(data): # Custom tasks can define methods for how to display their data in a relevant way worker_name = Worker(db, data["worker_id"]).worker_name contents = data["data"] duration = contents["times"]["task_end"] - contents["times"]["task_start"] metadata_string = ( f"Worker: {worker_name}\nUnit: {data['unit_id']}\n" f"Duration: {int(duration)}\nStatus: {data['status']}\n") inputs = contents["inputs"] inputs_string = f"Domain: {inputs['subdomain']}\n" outputs = contents["outputs"] output_string = "" try: output_string += f"Usability Rating: {outputs['usability-rating']}\n" except: pass try: output_string += f"Self Performance Rating: {outputs['self-rating']}\n" except: pass try: output_string += f"Instructions Read Time (sec): {outputs['instructionsReadTime']}\n" except: pass try: output_string += f"Pre Interaction Time (sec): {outputs['preInteractTime']}\n" except: pass try: output_string += f"Interaction Time (sec): {outputs['interactTime']}\n" except: pass try: output_string += f"Clicks (timestamp): {outputs['clickedElements']}\n" except: pass try: output_string += f"OS & Browser Info: {outputs['userAgent']}\n" except: pass try: output_string += f"User Feeback: {outputs['feedback']}\n" except: pass # found_files = outputs.get("files") # if found_files is not None: # file_dir = Unit(db, data["unit_id"]).get_assigned_agent().get_data_dir() # output_string += f" Files: {found_files}\n" # output_string += f" File directory {file_dir}\n" # else: # output_string += f" Files: No files attached\n" return f"-------------------\n{metadata_string}{inputs_string}{output_string}"
def test_worker(self) -> None: """Ensure we can query and use a worker""" db: MephistoDB = self.db requester = self.get_test_requester() WorkerClass = self.CrowdProviderClass.WorkerClass test_worker = WorkerClass.new(db, self.get_test_worker_name()) test_worker_2 = Worker(db, test_worker.db_id) self.assertEqual( test_worker.worker_name, test_worker_2.worker_name, "Worker gotten from db not same as first init", ) # Ensure blocking is doable test_worker.block_worker("Test reason", requester=requester) self.assertTrue(test_worker.is_blocked(requester)) test_worker.unblock_worker("Test reason", requester=requester) self.assertFalse(test_worker.is_blocked(requester))
def find_workers( self, worker_name: Optional[str] = None, provider_type: Optional[str] = None ) -> List[Worker]: """ Try to find any worker that matches the above. When called with no arguments, return all workers. """ with self.table_access_condition: conn = self._get_connection() c = conn.cursor() c.execute( """ SELECT * from workers WHERE (?1 IS NULL OR worker_name = ?1) AND (?2 IS NULL OR provider_type = ?2) """, (worker_name, provider_type), ) rows = c.fetchall() return [Worker(self, str(r["worker_id"]), row=r) for r in rows]
def format_for_printing_data(data): # Custom tasks can define methods for how to display their data in a relevant way worker_name = Worker(db, data["worker_id"]).worker_name contents = data["data"] duration = contents["times"]["task_end"] - contents["times"]["task_start"] metadata_string = ( f"Worker: {worker_name}\nUnit: {data['unit_id']}\n" f"Duration: {int(duration)}\nStatus: {data['status']}\n") inputs = contents["inputs"] inputs_string = f"Domain: {inputs['subdomain']}\n" outputs = contents["outputs"] output_string = "" try: output_string += f"Question #1 Result: {outputs['q1Answer']}\n" except: pass try: output_string += f"Question #2 Result: {outputs['q2Answer']}\n" except: pass try: output_string += f"Question #3 Result: {outputs['q3Answer']}\n" except: pass try: output_string += f"Question #4 Result: {outputs['q4Answer']}\n" except: pass try: output_string += f"OS & Browser Info: {outputs['userAgent']}\n" except: pass try: output_string += f"User Feeback: {outputs['feedback']}\n" except: pass return f"-------------------\n{metadata_string}{inputs_string}{output_string}"
def test_create_and_find_worker(self) -> None: """Ensure we can find a worker by MTurk id""" db = self.db TEST_MTURK_WORKER_ID = "ABCDEFGHIJ" test_worker = MTurkWorker.new(db, TEST_MTURK_WORKER_ID) test_worker_2 = Worker(db, test_worker.db_id) self.assertEqual( test_worker.worker_name, test_worker_2.worker_name, "Worker gotten from db not same as first init", ) test_worker_3 = MTurkWorker.get_from_mturk_worker_id(db, TEST_MTURK_WORKER_ID) self.assertEqual( test_worker.worker_name, test_worker_3.worker_name, "Worker gotten from db not same as first init", ) failed_worker = MTurkWorker.get_from_mturk_worker_id(db, "FAKE_ID") self.assertIsNone(failed_worker, f"Found worker {failed_worker} from a fake id")
def format_for_printing_data(data): # Custom tasks can define methods for how to display their data in a relevant way worker_name = Worker(db, data["worker_id"]).worker_name contents = data["data"] duration = contents["times"]["task_end"] - contents["times"]["task_start"] metadata_string = ( f"Worker: {worker_name}\nUnit: {data['unit_id']}\n" f"Duration: {int(duration)}\nStatus: {data['status']}\n") inputs = contents["inputs"] inputs_string = f"Character: {inputs['character_name']}\nDescription: {inputs['character_description']}\n" outputs = contents["outputs"] output_string = f" Rating: {outputs['rating']}\n" found_files = outputs.get("files") if found_files is not None: file_dir = Unit(db, data["unit_id"]).get_assigned_agent().get_data_dir() output_string += f" Files: {found_files}\n" output_string += f" File directory {file_dir}\n" else: output_string += f" Files: No files attached\n" return f"-------------------\n{metadata_string}{inputs_string}{output_string}"
def test_worker_fails(self) -> None: """Ensure workers fail to be created or loaded under failure conditions""" assert self.db is not None, "No db initialized" db: MephistoDB = self.db # Cant get non-existent entry with self.assertRaises(EntryDoesNotExistException): worker = Worker(db, self.get_fake_id("Worker")) worker_name = "test_worker" provider_type = PROVIDER_TYPE worker_id = db.new_worker(worker_name, provider_type) # Can't create same worker again with self.assertRaises(EntryAlreadyExistsException): worker_id = db.new_worker(worker_name, provider_type) # Can't use no name with self.assertRaises(MephistoDBException): worker_id = db.new_worker("", provider_type) # Ensure no workers were created workers = db.find_workers() self.assertEqual(len(workers), 1)
def _register_agent(self, packet: Packet, channel_info: ChannelInfo): """Process an agent registration packet to register an agent""" # First see if this is a reconnection crowd_data = packet.data["provider_data"] agent_registration_id = crowd_data["agent_registration_id"] logger.debug( f"Incoming request to register agent {agent_registration_id}.") if agent_registration_id in self.agents_by_registration_id: agent = self.agents_by_registration_id[agent_registration_id].agent # Update the source channel, in case it has changed self.agents[ agent.get_agent_id()].used_channel_id = channel_info.channel_id self.message_queue.append( Packet( packet_type=PACKET_TYPE_PROVIDER_DETAILS, sender_id=SYSTEM_CHANNEL_ID, receiver_id=channel_info.channel_id, data={ "request_id": packet.data["request_id"], "agent_id": agent.get_agent_id(), }, )) logger.debug( f"Found existing agent_registration_id {agent_registration_id}, " f"reconnecting to agent {agent.get_agent_id()}.") return # Process a new agent task_runner = channel_info.job.task_runner task_run = task_runner.task_run worker_id = crowd_data["worker_id"] worker = Worker(self.db, worker_id) # get the list of tentatively valid units units = task_run.get_valid_units_for_worker(worker) if len(units) == 0: self.message_queue.append( Packet( packet_type=PACKET_TYPE_PROVIDER_DETAILS, sender_id=SYSTEM_CHANNEL_ID, receiver_id=channel_info.channel_id, data={ "request_id": packet.data["request_id"], "agent_id": None }, )) logger.debug( f"Found existing agent_registration_id {agent_registration_id}, " f"had no valid units.") return # If there's onboarding, see if this worker has already been disqualified worker_id = crowd_data["worker_id"] worker = Worker(self.db, worker_id) blueprint = task_run.get_blueprint(args=task_runner.args) if isinstance(blueprint, OnboardingRequired) and blueprint.use_onboarding: if worker.is_disqualified(blueprint.onboarding_qualification_name): self.message_queue.append( Packet( packet_type=PACKET_TYPE_PROVIDER_DETAILS, sender_id=SYSTEM_CHANNEL_ID, receiver_id=channel_info.channel_id, data={ "request_id": packet.data["request_id"], "agent_id": None, }, )) logger.debug( f"Worker {worker_id} is already disqualified by onboarding " f"qual {blueprint.onboarding_qualification_name}.") return elif not worker.is_qualified( blueprint.onboarding_qualification_name): # Send a packet with onboarding information onboard_data = blueprint.get_onboarding_data(worker.db_id) onboard_agent = OnboardingAgent.new(self.db, worker, task_run) onboard_agent.state.set_init_state(onboard_data) agent_info = AgentInfo(agent=onboard_agent, used_channel_id=channel_info.channel_id) onboard_id = onboard_agent.get_agent_id() # register onboarding agent self.agents[onboard_id] = agent_info self.onboarding_packets[onboard_id] = packet self.message_queue.append( Packet( packet_type=PACKET_TYPE_PROVIDER_DETAILS, sender_id=SYSTEM_CHANNEL_ID, receiver_id=channel_info.channel_id, data={ "request_id": packet.data["request_id"], "agent_id": onboard_id, "onboard_data": onboard_data, }, )) logger.debug( f"Worker {worker_id} is starting onboarding thread with " f"onboarding agent id {onboard_id}.") # Create an onboarding thread onboard_thread = threading.Thread( target=self._launch_and_run_onboarding, args=(agent_info, channel_info.job.task_runner), name=f"Onboard-thread-{onboard_id}", ) onboard_agent.update_status(AgentState.STATUS_ONBOARDING) agent_info.assignment_thread = onboard_thread onboard_thread.start() return # Not onboarding, so just register directly self._assign_unit_to_agent(packet, channel_info, units)
def _assign_unit_to_agent(self, packet: Packet, channel_info: ChannelInfo, units: List["Unit"]): """Handle creating an agent for the specific worker to register an agent""" crowd_data = packet.data["provider_data"] task_run = channel_info.job.task_runner.task_run crowd_provider = channel_info.job.provider worker_id = crowd_data["worker_id"] worker = Worker(self.db, worker_id) logger.debug(f"Worker {worker_id} is being assigned one of " f"{len(units)} units.") reserved_unit = None while len(units) > 0 and reserved_unit is None: unit = units.pop(0) reserved_unit = task_run.reserve_unit(unit) if reserved_unit is None: self.message_queue.append( Packet( packet_type=PACKET_TYPE_PROVIDER_DETAILS, sender_id=SYSTEM_CHANNEL_ID, receiver_id=channel_info.channel_id, data={ "request_id": packet.data["request_id"], "agent_id": None }, )) else: agent = crowd_provider.AgentClass.new_from_provider_data( self.db, worker, unit, crowd_data) logger.debug(f"Created agent {agent}, {agent.db_id}.") self.message_queue.append( Packet( packet_type=PACKET_TYPE_PROVIDER_DETAILS, sender_id=SYSTEM_CHANNEL_ID, receiver_id=channel_info.channel_id, data={ "request_id": packet.data["request_id"], "agent_id": agent.get_agent_id(), }, )) agent_info = AgentInfo(agent=agent, used_channel_id=channel_info.channel_id) self.agents[agent.get_agent_id()] = agent_info self.agents_by_registration_id[ crowd_data["agent_registration_id"]] = agent_info # Launch individual tasks if not channel_info.job.task_runner.is_concurrent: unit_thread = threading.Thread( target=self._launch_and_run_unit, args=(unit, agent_info, channel_info.job.task_runner), name=f"Unit-thread-{unit.db_id}", ) agent_info.assignment_thread = unit_thread unit_thread.start() else: # See if the concurrent unit is ready to launch assignment = unit.get_assignment() agents = assignment.get_agents() if None in agents: agent.update_status(AgentState.STATUS_WAITING) return # need to wait for all agents to be here to launch # Launch the backend for this assignment agent_infos = [ self.agents[a.db_id] for a in agents if a is not None ] assign_thread = threading.Thread( target=self._launch_and_run_assignment, args=(assignment, agent_infos, channel_info.job.task_runner), name=f"Assignment-thread-{assignment.db_id}", ) for agent_info in agent_infos: agent_info.agent.update_status(AgentState.STATUS_IN_TASK) agent_info.assignment_thread = assign_thread assign_thread.start()
def issue_bonuses(task_name: str) -> list: logging.info(f"Initializing bonus script for Mephisto task_name: {task_name}") # Download the shared list of issued bonuses and pull out unique reference tuples to check against logging.info(f"Downloading interaction bonus records from S3...") with open("bonus_records.csv", "wb") as f: s3.download_fileobj("droidlet-hitl", "bonus_records.csv", f) logging.info(f"Building list of already issued bonuses...") previously_issued_units = [] with open("bonus_records.csv", newline="") as csvfile: reader = csv.reader(csvfile) for row in reader: previously_issued_units.append( (row[0], row[1]) ) # the combination of task_name and unit_id is essentially unique # Get completed units from the run_id logging.info(f"Retrieving units from Mephisto based on task_name...") units = data_browser.get_units_for_task_name(task_name) completed_units = [] for unit in units: if unit.db_status == "completed": completed_units.append(unit) logging.info(f"Completed units for job {task_name} retrieved") # Retrieve bonus info from DB and issue new_bonus_records = [] bonus_results = [] total_bonus = 0 units_skipped = 0 for unit in completed_units: data = data_browser.get_data_from_unit(unit) unit_id = data["unit_id"] if (task_name, unit_id) not in previously_issued_units: worker = Worker(db, data["worker_id"]) outputs = data["data"]["outputs"] clean_click_string = outputs["clickedElements"].replace("'", "") clicks = json.loads(clean_click_string) bonus_result = False if clicks: for click in clicks: if "interactionScores" in click["id"]: try: amount = float( f'{(click["id"]["interactionScores"]["stoplight"] * 0.30):.2f}' ) bonus_result, _ = worker.bonus_worker( amount, "Virtual assistant interaction quality bonus", unit ) total_bonus += amount new_bonus_records.append( (task_name, unit_id, worker.worker_name, amount) ) except: logging.error( f"Exception raised on bonus issue for {worker.worker_name}, debug" ) new_bonus_records.append( (task_name, unit_id, worker.worker_name, "ERR") ) pass if not bonus_result: logging.info( f"Bonus NOT successfully issued for worker {worker.worker_name}, but no error was raised. \ Make sure interaction score exists and retry." ) else: logging.info( f"Recorded click data not found for {worker.worker_name}, no bonus will be issued" ) bonus_results.append(bonus_result) else: units_skipped += 1 logging.info(f"Num completed units: {len(completed_units)}") logging.info( f"Num bonuses skipped because bonus was issued previously for the same unit: {units_skipped}" ) logging.info(f"Num new bonuses issued: {len([x for x in bonus_results if x])}") logging.info(f"Num bonuses FAILED: {len([x for x in bonus_results if not x])}") logging.info(f"Total bonus amount issued: {total_bonus}") if new_bonus_records: logging.info(f"There are newly issued bonuses to record") logging.info(f"Writing new bonuses to csv and uploading to S3...") with open("bonus_records.csv", "a") as f: writer = csv.writer(f) for record in new_bonus_records: writer.writerow(record) s3.upload_file("bonus_records.csv", "droidlet-hitl", "bonus_records.csv") os.remove("bonus_records.csv") logging.info(f"Finished issuing bonuses!") return