def reject_work(self, assignment_id, reason): """reject work for a given assignment through the mturk client""" client = mturk_utils.get_mturk_client(self.is_sandbox) client.reject_assignment( AssignmentId=assignment_id, RequesterFeedback=reason )
def pay_bonus(self, worker_id, bonus_amount, assignment_id, reason, unique_request_token): """Handles paying bonus to a turker, fails for insufficient funds. Returns True on success and False on failure """ total_cost = calculate_mturk_cost(payment_opt={ 'type': 'bonus', 'amount': bonus_amount }) if not check_mturk_balance(balance_needed=total_cost, is_sandbox=self.is_sandbox): print_and_log('Cannot pay bonus. Reason: Insufficient funds' ' in your MTurk account.') return False client = get_mturk_client(self.is_sandbox) # unique_request_token may be useful for handling future network errors client.send_bonus(WorkerId=worker_id, BonusAmount=str(bonus_amount), AssignmentId=assignment_id, Reason=reason, UniqueRequestToken=unique_request_token) print_and_log('Paid ${} bonus to WorkerId: {}'.format( bonus_amount, worker_id)) return True
def pay_bonus(self, worker_id, bonus_amount, assignment_id, reason, unique_request_token): """Handles paying bonus to a turker, fails for insufficient funds. Returns True on success and False on failure """ total_cost = mturk_utils.calculate_mturk_cost( payment_opt={'type': 'bonus', 'amount': bonus_amount} ) if not mturk_utils.check_mturk_balance(balance_needed=total_cost, is_sandbox=self.is_sandbox): shared_utils.print_and_log( logging.WARN, 'Cannot pay bonus. Reason: Insufficient ' 'funds in your MTurk account.', should_print=True ) return False client = mturk_utils.get_mturk_client(self.is_sandbox) # unique_request_token may be useful for handling future network errors client.send_bonus( WorkerId=worker_id, BonusAmount=str(bonus_amount), AssignmentId=assignment_id, Reason=reason, UniqueRequestToken=unique_request_token ) shared_utils.print_and_log( logging.INFO, 'Paid ${} bonus to WorkerId: {}'.format( bonus_amount, worker_id ) ) return True
def get_agent_work_status(self, assignment_id): """Get the current status of an assignment's work""" client = mturk_utils.get_mturk_client(self.is_sandbox) try: response = client.get_assignment(AssignmentId=assignment_id) status = response['Assignment']['AssignmentStatus'] worker_id = self.assignment_to_worker_id[assignment_id] agent = self._get_agent(worker_id, assignment_id) if agent is not None and status == MTurkAgent.ASSIGNMENT_DONE: agent.hit_is_complete = True return status except ClientError as e: # If the assignment isn't done, asking for the assignment will fail not_done_message = ('This operation can be called with a status ' 'of: Reviewable,Approved,Rejected') if not_done_message in e.response['Error']['Message']: return MTurkAgent.ASSIGNMENT_NOT_DONE else: shared_utils.print_and_log( logging.WARN, 'Unanticipated error in `get_agent_work_status`: ' + e.response['Error']['Message'], should_print=True) # Assume not done if status check seems to be faulty. return MTurkAgent.ASSIGNMENT_NOT_DONE
def expire_hit(self, hit_id): """Expire given HIT from the MTurk side Only works if the hit is in the "pending" state """ client = get_mturk_client(self.is_sandbox) # Update expiration to a time in the past, the HIT expires instantly past_time = datetime(2015, 1, 1) client.update_expiration_for_hit(HITId=hit_id, ExpireAt=past_time)
def main(): """This script should be used to compensate workers that have not recieved proper payment for the completion of tasks due to issues on our end. It's important to make sure you keep a requester reputation up. """ parser = argparse.ArgumentParser(description='Bonus workers directly') parser.add_argument( '--sandbox', dest='sandbox', default=False, action='store_true', help='Test bonus in sandbox', ) parser.add_argument( '--hit-id', dest='use_hit_id', default=False, action='store_true', help='Use HIT id instead of assignment id', ) opt = parser.parse_args() sandbox = opt.sandbox if sandbox: print('About to connect to the SANDBOX server. These bonuses will not ' 'actually be paid out.') else: print('About to connect to the LIVE server. These bonuses will be ' 'deducted from your account balance.') mturk_utils.setup_aws_credentials() client = mturk_utils.get_mturk_client(sandbox) while True: worker_id = input("Enter worker id: ") if len(worker_id) == 0: break bonus_amount = input("Enter bonus amount: ") if opt.use_hit_id: hit_id = input("Enter HIT id: ") listed = client.list_assignments_for_hit(HITId=hit_id) assignment_id = listed['Assignments'][0]['AssignmentId'] else: assignment_id = input("Enter assignment id: ") reason = input("Enter reason: ") input("Press enter to bonus {} to worker {} for reason '{}' on " "assignment {}.".format(bonus_amount, worker_id, reason, assignment_id)) resp = client.send_bonus( WorkerId=worker_id, BonusAmount=str(bonus_amount), AssignmentId=assignment_id, Reason=reason, ) print(resp)
def email_worker(self, worker_id, subject, message_text): """Send an email to a worker through the mturk client""" client = get_mturk_client(self.is_sandbox) response = client.notify_workers(Subject=subject, MessageText=message_text, WorkerIds=[worker_id]) if len(response['NotifyWorkersFailureStatuses']) > 0: failure_message = response['NotifyWorkersFailureStatuses'][0] return {'failure': failure_message['NotifyWorkersFailureMessage']} else: return {'success': True}
def get_agent_work_status(self, assignment_id): """Get the current status of an assignment's work""" client = mturk_utils.get_mturk_client(self.is_sandbox) try: response = client.get_assignment(AssignmentId=assignment_id) return response['Assignment']['AssignmentStatus'] except ClientError as e: # If the assignment isn't done, asking for the assignment will fail not_done_message = ('This operation can be called with a status ' 'of: Reviewable,Approved,Rejected') if not_done_message in e.response['Error']['Message']: return MTurkAgent.ASSIGNMENT_NOT_DONE
def get_agent_work_status(self, assignment_id): """Get the current status of an assignment's work""" client = get_mturk_client(self.is_sandbox) try: response = client.get_assignment(AssignmentId=assignment_id) return response['Assignment']['AssignmentStatus'] except ClientError as e: # If the assignment isn't done, asking for the assignment will fail not_done_message = ('This operation can be called with a status ' 'of: Reviewable,Approved,Rejected') if not_done_message in e.response['Error']['Message']: return MTurkAgent.ASSIGNMENT_NOT_DONE
def main(): """This script should be used to compensate workers that have not recieved proper payment for the completion of tasks due to issues on our end. It's important to make sure you keep a requester reputation up. """ parser = argparse.ArgumentParser(description='Bonus workers directly') parser.add_argument('--sandbox', dest='sandbox', default=False, action='store_true', help='Test bonus in sandbox') parser.add_argument('--hit-id', dest='use_hit_id', default=False, action='store_true', help='Use HIT id instead of assignment id') opt = parser.parse_args() sandbox = opt.sandbox if sandbox: print( 'About to connect to the SANDBOX server. These bonuses will not ' 'actually be paid out.' ) else: print( 'About to connect to the LIVE server. These bonuses will be ' 'deducted from your account balance.' ) mturk_utils.setup_aws_credentials() client = mturk_utils.get_mturk_client(sandbox) while True: worker_id = input("Enter worker id: ") if len(worker_id) == 0: break bonus_amount = input("Enter bonus amount: ") if opt.use_hit_id: hit_id = input("Enter HIT id: ") listed = client.list_assignments_for_hit(HITId=hit_id) assignment_id = listed['Assignments'][0]['AssignmentId'] else: assignment_id = input("Enter assignment id: ") reason = input("Enter reason: ") input( "Press enter to bonus {} to worker {} for reason '{}' on " "assignment {}.".format(bonus_amount, worker_id, reason, assignment_id) ) resp = client.send_bonus( WorkerId=worker_id, BonusAmount=str(bonus_amount), AssignmentId=assignment_id, Reason=reason ) print(resp)
def email_worker(self, worker_id, subject, message_text): """Send an email to a worker through the mturk client""" client = mturk_utils.get_mturk_client(self.is_sandbox) response = client.notify_workers( Subject=subject, MessageText=message_text, WorkerIds=[worker_id] ) if len(response['NotifyWorkersFailureStatuses']) > 0: failure_message = response['NotifyWorkersFailureStatuses'][0] return {'failure': failure_message['NotifyWorkersFailureMessage']} else: return {'success': True}
def __init__(self, opt, mturk_agents): mturk_utils.setup_aws_credentials() is_sandbox = True self.client = mturk_utils.get_mturk_client(is_sandbox) self.bonus_amount = 1.0 self.mturk_agents = mturk_agents for agent in mturk_agents: self.assignment_id = agent.assignment_id print(agent.assignment_id) if agent.demo_role == 'Drawer': self.drawer = agent elif agent.demo_role == 'Teller': self.teller = agent self.episodeDone = False self.turn_idx = 0 self.dialog = [] self.should_pay_bonus = False self.selectImageForTask() self.unique_task_id = self.drawer.worker_id + self.teller.worker_id + self.teller.assignment_id + self.drawer.assignment_id return
def get_assignment(self, assignment_id): """Gets assignment from mturk by assignment_id. Only works if the assignment is in a completed state """ client = mturk_utils.get_mturk_client(self.is_sandbox) return client.get_assignment(AssignmentId=assignment_id)
def main(opt): setup_aws_credentials() if opt['no_sandbox']: db_file, all_runs_dir = PATHS['live'] opt['is_sandbox'] = False else: db_file, all_runs_dir = PATHS['sandbox'] assert os.path.exists(db_file), f"DB file {db_file} doesn't exist!" assert os.path.isdir( all_runs_dir), f"run directory {all_runs_dir} doesn't exist!" db = MTurkDataHandler(file_name=db_file) mturk_manager = MTurkManager(opt, []) client = mturk_utils.get_mturk_client(not opt['no_sandbox']) # Get run IDs if opt['run_ids'] is None: run_ids = list(os.listdir(all_runs_dir)) run2worker = defaultdict(lambda: dict()) worker2run = defaultdict(lambda: dict()) for run_id in run_ids: run_dir = os.path.join(all_runs_dir, run_id) hits = os.listdir(run_dir) for hit in hits: # t_*/workers/{WORKER_ID}.json resps = os.listdir(f"{run_dir}/{hit}/workers/") assert len(resps) == 1, "More than one response found!" worker_id = resps[0].split('.')[0] worker_data = json.load( open(os.path.join(run_dir, hit, "workers", resps[0]))) run2worker[run_id][worker_id] = worker_data worker2run[worker_id][run_id] = worker_data else: run_ids = opt['run_ids'].split(',') def get_all_hits(): """ """ all_hits = [] resp = client.list_hits() all_hits.append(resp['HITs']) while 'NextToken' in resp and resp['NextToken']: resp = client.list_hits(NextToken=resp['NextToken']) all_hits += resp['HITs'] time.sleep(0.5) return all_hits def get_run_id_data(run_ids): """ """ print(f"Found following run IDs: ") n_hits = 0 run_data = list() for run_id in run_ids: run_datum = db.get_run_data(run_id) run_data.append((run_id, run_datum)) run_data.sort(key=lambda x: x[1]['launch_time']) for run_id, run_datum in run_data: start_time = datetime.fromtimestamp(run_datum['launch_time']) hits = db.get_pairings_for_run(run_id) n_hits += len(hits) print(f"{run_id} {len(hits)} HITS, started {start_time}") print(f"Total {n_hits} HITS over {len(run_ids)} runs") def approve_run_hits(run_id): """ """ to_approve = [] n_to_approve, n_approved = 0, 0 hits = db.get_pairings_for_run(run_id) data = [] for hit in hits: if hit['conversation_id'] is None: continue try: full_data = db.get_full_conversation_data( run_id, hit['conversation_id'], False) except FileNotFoundError: continue datum = next(iter(full_data['worker_data'].values())) if datum['response']['text'] in BAD_RESPONSES: continue n_to_approve += 1 to_approve.append(datum['assignment_id']) data.append(datum) print(f"To approve: {datum['assignment_id']}") print(f"Run ID {run_id}: to approve {n_to_approve} HITs") conf = input("Confirm? (y/n): ") if conf == "y": didnt_approve = list() for asgn_id in to_approve: try: mturk_manager.approve_work(asgn_id) n_approved += 1 print(f"Approved {asgn_id}") except: didnt_approve.append(asgn_id) print(f"Failed to approve: {asgn_id}") print(f"\tApproved {n_approved} HITs") if didnt_approve: print( f"\tFailed to approve assignments {','.join(didnt_approve)}" ) else: print("\tCancelled approvals") def approve_assignment(asgn_id): """ """ conf = input(f"Confirm approving assignment {asgn_id}? (y/n): ") if conf == "y": try: mturk_manager.approve_work(asgn_id, override_rejection=True) print(f"\tSuccessfully approved!") except: print(f"\tFailed to approve.") else: print("\tCancelled approvals.") def award_from_file(bonus_file, msg): awards = [r.split(',') for r in open(bonus_file, encoding="utf-8")] total_bonus = sum(float(award[-1]) for award in awards) conf = input( f"Confirm awarding total bonus ${total_bonus} to {len(awards)} workers? " ) if conf == "y": n_awarded = 0 amt_awarded = 0.0 didnt_award = list() for award in tqdm(awards): try: worker_id, asgn_id, request_tok, bonus_amt = award except ValueError: ipdb.set_trace() bonus_amt = float(bonus_amt) try: mturk_manager.pay_bonus(worker_id=worker_id, bonus_amount=bonus_amt, assignment_id=asgn_id, reason=msg, unique_request_token=request_tok) n_awarded += 1 amt_awarded += bonus_amt except: didnt_award.append( (worker_id, asgn_id, request_tok, bonus_amt)) #print(f"\tFailed to award bonus to {worker_id}") print(f"Awarded {amt_awarded} to {n_awarded} workers.") if didnt_award: print("Failed on:") for worker_id, asgn_id, request_tok, bonus_amt in didnt_award: #print(f"\tFailed to award bonus {bonus_amt} to {worker_id} for assignment {asgn_id} (tok: {request_tok})") print(f"{worker_id},{asgn_id},{request_tok},{bonus_amt}") else: print("\tCancelled bonus.") return def award_bonus(worker_id, bonus_amt, asgn_id, msg, request_tok): conf = input(f"Confirm awarding ${bonus_amt} to {worker_id}?") if conf == "y": try: mturk_manager.pay_bonus(worker_id=worker_id, bonus_amount=bonus_amt, assignment_id=asgn_id, reason=msg, unique_request_token=request_tok) print(f"\tSuccessfully approved!") except: print(f"\tFailed to approve.") else: print("\tCancelled bonus.") def inspect_assignment(asgn_id): """ """ raise NotImplementedError #asgn_data = db.get_assignment_data(asgn_id) #if asgn_data is None: # print("Assignment ID {asgn_id} not found.") def inspect_hit(hit_id): """ """ raise NotImplementedError #hit_data = db.get_hit_data(hit_id) #if hit_data is None: # print("HIT ID {hit_id} not found.") def inspect_run_worker_pair(run_id, worker_id): """ """ worker_data = run2worker[run_id][worker_id] asgn_id = worker_data['assignment_id'] answers = list() qsts = list() ctx = worker_data['task_data'][0]['conversations'][0]['dialog'][0][ 'text'] for task_datum in worker_data['task_data']: qst_d = task_datum['conversations'][1] qsts.append(qst_d['dialog'][0]['text']) if 'answer' in qst_d and 'answer' is not None: answers.append(qst_d['answer']) else: answers.append(None) try: choices = [ CHOICE2ANS[r['speakerChoice']] for r in worker_data['response']['task_data'] ] reasons = [ r['textReason'] for r in worker_data['response']['task_data'] ] except KeyError as e: print("Key error!") print("task_data not in worker response!") ipdb.set_trace() try: pair = db.get_worker_assignment_pairing(worker_id, asgn_id) hit_time = pair['task_end'] - pair['task_start'] except: ipdb.set_trace() print(f"\nAssignment ID: {worker_data['assignment_id']}") print(f"CONTEXT: {ctx}\n") for qst, ans, choice, reason in zip(qsts, answers, choices, reasons): print(f"QUESTION: {qst}") print(f"ANSWER: {ans}") print(f"CHOICE: {choice}") print(f"REASON: {reason}") print() print(f"HIT time: {hit_time}") resp = input("Accept (y/n) ? ") if resp == "y": #try: # mturk_manager.approve_work(asgn_id, override_rejection=True) # print("\tApproved!") #except: # ipdb.set_trace() mturk_manager.approve_work(asgn_id, override_rejection=True) print("\tApproved!") def inspect_hit_worker_pair(hit_id, worker_id): """ """ resp = client.list_assignments_for_hit(HITId=hit_id) all_asgns = list(resp['Assignments']) while 'NextToken' in resp and resp['NextToken']: resp = client.list_assignments_for_hit(HITId=hit_id, NextToken=resp['NextToken']) if resp['Assignments']: all_asgns.append(resp['Assignments']) time.sleep(0.5) assert len(all_asgns) == 1, ipdb.set_trace() asgn_ids = [a['AssignmentId'] for a in all_asgns] run_ids = list() worker_runs = worker2run[worker_id] for asgn_id in asgn_ids: for run_id, run_d in worker_runs.items(): if run_d['assignment_id'] == asgn_id: run_ids.append(run_id) print(f"Assignment ID: {asgn_ids[0]}") print(f"Submit date: {all_asgns[0]['SubmitTime'].strftime('%m/%d')}") #assert len(run_ids) == 1, ipdb.set_trace() #run_id = run_ids[0] #asgn_id = asgn_ids[0] #worker_data = run2worker[run_id][worker_id] #answers = list() #qsts = list() #ctx = worker_data['task_data'][0]['conversations'][0]['dialog'][0]['text'] #for task_datum in worker_data['task_data']: # qst_d = task_datum['conversations'][1] # qsts.append(qst_d['dialog'][0]['text']) # if 'answer' in qst_d and 'answer' is not None: # answers.append(qst_d['answer']) # else: # answers.append(None) #try: # choices = [CHOICE2ANS[r['speakerChoice']] for r in worker_data['response']['task_data']] # reasons = [r['textReason'] for r in worker_data['response']['task_data']] #except KeyError as e: # print("Key error!") # print("task_data not in worker response!") # ipdb.set_trace() #try: # pair = db.get_worker_assignment_pairing(worker_id, asgn_id) # hit_time = pair['task_end'] - pair['task_start'] #except: # ipdb.set_trace() #print(f"\nAssignment ID: {worker_data['assignment_id']}") #print(f"CONTEXT: {ctx}\n") #for qst, ans, choice, reason in zip(qsts, answers, choices, reasons): # print(f"QUESTION: {qst}") # print(f"ANSWER: {ans}") # print(f"CHOICE: {choice}") # print(f"REASON: {reason}") # print() #print(f"HIT time: {hit_time}") #resp = input("Accept (y/n) ? ") #if resp == "y": # try: # mturk_manager.approve_work(asgn_id, override_rejection=True) # print("\tApproved!") # except: # ipdb.set_trace() # main loop while True: print("Enter 'p' to print runs") cmd = input("Enter command: ") if len(cmd) == 0 or cmd == "exit": break cmd_parts = cmd.split() if cmd_parts[0] == "p": get_run_id_data(run_ids) elif cmd_parts[0] == "inspect": assert len(cmd_parts) == 3, "Insufficient arguments!" inspect_run_worker_pair(cmd_parts[1], cmd_parts[2]) elif cmd_parts[0] in ["get-asgn", 'ga']: assert len( cmd_parts ) == 3, "Insufficient arguments! Please provide worker_id and ..." inspect_hit_worker_pair(cmd_parts[1], cmd_parts[2]) elif cmd_parts[0] == "inspect-asgn": assert len(cmd_parts) > 1, "No assignment ID provided." inspect_assignment(cmd_parts[1]) elif cmd_parts[0] == "inspect-hit": assert len(cmd_parts) > 1, "No HIT ID provided." inspect_hit(cmd_parts[1]) elif cmd_parts[0] == "approve": assert len(cmd_parts) > 1, "No run ID provided." run_id = cmd_parts[1] if run_id in run_ids: approve_run_hits(run_id) else: print(f"Run ID {run_id} not found!") elif cmd_parts[0] == "approve-asgn": assert len(cmd_parts) > 1, "No assignment ID provided." approve_assignment(cmd_parts[1]) elif cmd_parts[0] == "award-from-file": assert len(cmd_parts) > 1, "No file provided." if not os.path.exists(cmd_parts[1]): print(f"File {cmd_parts[1]} not found!") continue award_from_file(cmd_parts[1], BONUS_MSG) elif cmd_parts[0] in ["d", "debug"]: ipdb.set_trace() else: print(f"Command `{cmd}` not understood.")
def main(): """This script should be used after some error occurs that leaves HITs live while the ParlAI MTurk server down. This will search through live HITs and list them by task ID, letting you close down HITs that do not link to any server and are thus irrecoverable. """ parser = argparse.ArgumentParser(description='Delete HITs by expiring') parser.add_argument('--sandbox', dest='sandbox', default=False, action='store_true', help='Delete HITs from sandbox') parser.add_argument( '--ignore-assigned', dest='ignore', default=False, action='store_true', help='Ignore HITs that may already be completed or assigned', ) parser.add_argument( '--approve', dest='approve', default=False, action='store_true', help='Approve HITs that have been completed on deletion', ) parser.add_argument( '--verbose', dest='verbose', default=False, action='store_true', help='List actions to individual HITs', ) opt = parser.parse_args() sandbox = opt.sandbox ignore = opt.ignore approve = opt.approve verbose = opt.verbose task_group_ids = [] group_to_hit = {} processed = 0 found = 0 spinner_vals = ['-', '\\', '|', '/'] if sandbox: print( 'About to query the SANDBOX server, these HITs will be active HITs' ' from within the MTurk requester sandbox') else: print('About to query the LIVE server, these HITs will be active HITs ' 'potentially being worked on by real Turkers right now') print('Getting HITs from amazon MTurk server, please wait...\n' 'or use CTRL-C to skip to expiring some of what is found.\n') mturk_utils.setup_aws_credentials() client = mturk_utils.get_mturk_client(sandbox) response = client.list_hits(MaxResults=100) try: while (True): processed += response['NumResults'] for hit in response['HITs']: if ignore: if hit['NumberOfAssignmentsAvailable'] == 0: # Ignore hits with no assignable assignments continue if hit['HITStatus'] != 'Assignable' and \ hit['HITStatus'] != 'Unassignable': # Ignore completed hits continue question = hit['Question'] try: if 'ExternalURL' in question: url = question.split('ExternalURL')[1] group_id = url.split('task_group_id=')[1] group_id = group_id.split('&')[0] group_id = group_id.split('<')[0] if group_id not in task_group_ids: sys.stdout.write( '\rFound group {} ' ' ' '\n'.format(group_id)) group_to_hit[group_id] = {} task_group_ids.append(group_id) group_to_hit[group_id][hit['HITId']] = hit['HITStatus'] found += 1 except IndexError: pass # This wasn't the right HIT sys.stdout.write('\r{} HITs processed, {} active hits' ' found amongst {} tasks. {} '.format( processed, found, len(task_group_ids), spinner_vals[((int)(processed / 100)) % 4])) if 'NextToken' not in response: break response = client.list_hits(NextToken=response['NextToken'], MaxResults=100) except BaseException as e: print(e) pass if not approve: print('\n\nTask group id - Active HITs') for group_id in task_group_ids: print('{} - {}'.format(group_id, len(group_to_hit[group_id]))) else: print('\n\nTask group id - Active HITs - Reviewable') for group_id in task_group_ids: print('{} - {} - {}'.format( group_id, len(group_to_hit[group_id]), len([ s for s in group_to_hit[group_id].values() if s == 'Reviewable' ]), )) print( 'To clear a task, please enter the task group id of the task that you ' 'want to expire the HITs for. To exit, enter nothing') while True: task_group_id = input("Enter task group id: ") if len(task_group_id) == 0: break elif (task_group_id not in task_group_ids): print('Sorry, the id you entered was not found, try again') else: num_hits = input( 'Confirm by entering the number of HITs that will be deleted: ' ) if '{}'.format(len(group_to_hit[task_group_id])) == num_hits: hits_expired = 0 for hit_id, status in group_to_hit[task_group_id].items(): if approve: response = client.list_assignments_for_hit( HITId=hit_id, ) if response['NumResults'] == 0: if verbose: print('No results for hit {} with status {}\n' ''.format(hit_id, status)) else: assignment = response['Assignments'][0] assignment_id = assignment['AssignmentId'] client.approve_assignment( AssignmentId=assignment_id) if verbose: print('Approved assignment {}'.format( assignment_id)) mturk_utils.expire_hit(sandbox, hit_id) if verbose: print('Expired hit {}'.format(hit_id)) hits_expired += 1 sys.stdout.write('\rExpired hits {}'.format(hits_expired)) print('\nAll hits for group {} have been expired.'.format( task_group_id)) else: print( 'You entered {} but there are {} HITs to expire, please ' "try again to confirm you're ending the right task".format( num_hits, len(group_to_hit[task_group_id])))
def main(): """This script should be used after some error occurs that leaves HITs live while the ParlAI MTurk server down. This will search through live HITs and list them by task ID, letting you close down HITs that do not link to any server and are thus irrecoverable. """ parser = argparse.ArgumentParser(description='Delete HITs by expiring') parser.add_argument('--sandbox', dest='sandbox', default=False, action='store_true', help='Delete HITs from sandbox') parser.add_argument( '--ignore-assigned', dest='ignore', default=False, action='store_true', help='Ignore HITs that may already be completed or assigned', ) opt = parser.parse_args() sandbox = opt.sandbox ignore = opt.ignore task_group_ids = [] group_to_hit = {} hits = [] processed = 0 found = 0 spinner_vals = ['-', '\\', '|', '/'] if sandbox: print( 'About to query the SANDBOX server, these HITs will be active HITs' ' from within the MTurk requester sandbox' ) else: print( 'About to query the LIVE server, these HITs will be active HITs ' 'potentially being worked on by real Turkers right now' ) print('Getting HITs from amazon MTurk server, please wait...\n' 'or use CTRL-C to skip to expiring some of what is found.\n') mturk_utils.setup_aws_credentials() client = mturk_utils.get_mturk_client(sandbox) response = client.list_hits(MaxResults=100) try: while (True): processed += response['NumResults'] for hit in response['HITs']: if ignore: if hit['NumberOfAssignmentsAvailable'] == 0: # Ignore hits with no assignable assignments continue if hit['HITStatus'] != 'Assignable' and \ hit['HITStatus'] != 'Unassignable': # Ignore completed hits continue question = hit['Question'] if 'ExternalURL' in question: url = question.split('ExternalURL')[1] group_id = url.split('task_group_id=')[1] group_id = group_id.split('&')[0] group_id = group_id.split('<')[0] if group_id not in task_group_ids: group_to_hit[group_id] = [] task_group_ids.append(group_id) group_to_hit[group_id].append(hit['HITId']) found += 1 sys.stdout.write( '\r{} HITs processed, {} active hits' ' found amongst {} tasks. {} ' .format( processed, found, len(task_group_ids), spinner_vals[((int)(processed / 100)) % 4] ) ) if 'NextToken' not in response: break response = client.list_hits( NextToken=response['NextToken'], MaxResults=100 ) except BaseException: pass print('\n\nTask group id - Active HITs') for group_id in task_group_ids: print('{} - {}'.format(group_id, len(group_to_hit[group_id]))) print( 'To clear a task, please enter the task group id of the task that you ' 'want to expire the HITs for. To exit, enter nothing' ) while True: task_group_id = input("Enter task group id: ") if len(task_group_id) == 0: break elif (task_group_id not in task_group_ids): print('Sorry, the id you entered was not found, try again') else: num_hits = input( 'Confirm by entering the number of HITs that will be deleted: ' ) if '{}'.format(len(group_to_hit[task_group_id])) == num_hits: hits_expired = 0 for hit_id in group_to_hit[task_group_id]: mturk_utils.expire_hit(sandbox, hit_id) hits_expired += 1 sys.stdout.write('\rExpired hits {}'.format(hits_expired)) print('\nAll hits for group {} have been expired.'.format( task_group_id )) else: print( 'You entered {} but there are {} HITs to expire, please ' "try again to confirm you're ending the right task".format( num_hits, len(group_to_hit[task_group_id]) ) )
def main(): """This script should be used after some error occurs that leaves HITs live while the ParlAI MTurk server down. This will search through live HITs and list them by task ID, letting you close down HITs that do not link to any server and are thus irrecoverable. """ parser = argparse.ArgumentParser(description='Delete HITs by expiring') parser.add_argument('--sandbox', dest='sandbox', default=False, action='store_true', help='Delete HITs from sandbox') opt = parser.parse_args() sandbox = opt.sandbox task_group_ids = [] group_to_hit = {} hits = [] processed = 0 found = 0 spinner_vals = ['-','\\','|','/'] if sandbox: print( 'About to query the SANDBOX server, these HITs will be active HITs' ' from within the MTurk requester sandbox' ) else: print( 'About to query the LIVE server, these HITs will be active HITs ' 'potentially being worked on by real Turkers right now' ) print('Getting HITs from amazon MTurk server, please wait...\n') mturk_utils.setup_aws_credentials() client = mturk_utils.get_mturk_client(sandbox) response = client.list_hits(MaxResults=100) while (True): processed += response['NumResults'] for hit in response['HITs']: if hit['NumberOfAssignmentsAvailable'] == 0: # Ignore hits with no assignable assignments continue if hit['HITStatus'] != 'Assignable' and \ hit['HITStatus'] != 'Unassignable': # Ignore completed hits continue question = hit['Question'] if 'ExternalURL' in question: url = question.split('ExternalURL')[1] group_id = url.split('task_group_id=')[1] group_id = group_id.split('&')[0] group_id = group_id.split('<')[0] if group_id not in task_group_ids: group_to_hit[group_id] = [] task_group_ids.append(group_id) group_to_hit[group_id].append(hit['HITId']) found += 1 sys.stdout.write( '\r{} HITs processed, {} active hits found amongst {} tasks. {} ' .format( processed, found, len(task_group_ids), spinner_vals[((int) (processed / 100)) % 4] ) ) if 'NextToken' not in response: break response = client.list_hits( NextToken=response['NextToken'], MaxResults=100 ) print('\n\nTask group id - Active HITs') for group_id in task_group_ids: print('{} - {}'.format(group_id, len(group_to_hit[group_id]))) print( 'To clear a task, please enter the task group id of the task that you ' 'want to expire the HITs for. To exit, enter nothing' ) while True: task_group_id = input("Enter task group id: ") if len(task_group_id) == 0: break elif (task_group_id not in task_group_ids): print('Sorry, the id you entered was not found, try again') else: num_hits = input( 'Confirm by entering the number of HITs that will be deleted: ' ) if '{}'.format(len(group_to_hit[task_group_id])) == num_hits: hits_expired = 0 for hit_id in group_to_hit[task_group_id]: mturk_utils.expire_hit(sandbox, hit_id) hits_expired += 1 sys.stdout.write('\rExpired hits {}'.format(hits_expired)) print('\nAll hits for group {} have been expired.'.format( task_group_id )) else: print( 'You entered {} but there are {} HITs to expire, please ' 'try again to confirm you are ending the right task'.format( num_hits, len(group_to_hit[task_group_id]) ) )
def approve_work(self, assignment_id): """approve work for a given assignment through the mturk client""" client = get_mturk_client(self.is_sandbox) client.approve_assignment(AssignmentId=assignment_id)
def block_worker(self, worker_id, reason): """Block a worker by id using the mturk client, passes reason along""" client = get_mturk_client(self.is_sandbox) client.create_worker_block(WorkerId=worker_id, Reason=reason)
def get_assignment(self, assignment_id): """Gets assignment from mturk by assignment_id. Only works if the assignment is in a completed state """ client = get_mturk_client(self.is_sandbox) return client.get_assignment(AssignmentId=assignment_id)
def get_hit(self, hit_id): """Get hit from mturk by hit_id""" client = get_mturk_client(self.is_sandbox) return client.get_hit(HITId=hit_id)
def block_worker(self, worker_id, reason): """Block a worker by id using the mturk client, passes reason along""" client = mturk_utils.get_mturk_client(self.is_sandbox) client.create_worker_block(WorkerId=worker_id, Reason=reason)
def get_hit(self, hit_id): """Get hit from mturk by hit_id""" client = mturk_utils.get_mturk_client(self.is_sandbox) return client.get_hit(HITId=hit_id)
def approve_work(self, assignment_id): """approve work for a given assignment through the mturk client""" client = mturk_utils.get_mturk_client(self.is_sandbox) client.approve_assignment(AssignmentId=assignment_id)