def retrieve_job(job_id: str) -> None: mturk = connect_mturk() results = mturk.list_assignments_for_hit(HITId=job_id) if results['NumResults'] > 0: for assignment in results['Assignments']: xml_doc = xmltodict.parse(assignment['Answer']) logging.info("Worker's answer was:") if isinstance(xml_doc['QuestionFormAnswers']['Answer'], list): # Multiple fields in HIT layout for answer_field in xml_doc['QuestionFormAnswers']['Answer']: logging.info("For input field: " + answer_field['QuestionIdentifier']) logging.info("Submitted answer: " + answer_field['FreeText']) else: # One field found in HIT layout logging.info("For input field: " + xml_doc['QuestionFormAnswers']['Answer'] ['QuestionIdentifier']) logging.info( "Submitted answer: " + xml_doc['QuestionFormAnswers']['Answer']['FreeText']) else: logging.info("No results ready yet")
def submit(allow_duplicate: bool = False, name: Optional[List[str]] = None, all_tasks: bool = False, from_csv: Optional[str] = None): if name == tuple() and not all_tasks: raise ValueError("No task to submit") client = aws.connect_mturk() config = get_config() tasks = config['tasks'] if name != tuple(): tasks = [task for task in tasks if task['name'] in name] for task in tasks: logging.info(f"Submitting task {task['name']}") if from_csv is None: generator_name = retrieve_generator(task['name']) generator = generator_name(client) else: generator = csv_generator(client, from_csv) for sample in generator: if not allow_duplicate \ and not is_job_in_records(task['name'], sample): create_job(client, task, sample)
def delete(all_hits: bool = False, hit_id: Optional[List[str]] = None): client = aws.connect_mturk() if all_hits: aws.delete_all_hits(client) elif hit_id is not None: for hit in hit_id: aws.delete_hit(client, hit) else: aws.delete_recorded_hits(client)
def progress(all_hits: bool = False, all_recorded: bool = False, hit_id: Optional[List[str]] = None): client = aws.connect_mturk() if all_hits: aws.progress_all_hits(client) elif all_recorded: aws.progress_recorded_hits(client) elif hit_id != tuple(): aws.progress_hits(client, hit_id) else: raise ValueError("No job to delete")
def bonus(from_csv: Optional[str] = None, amount: float = 0., message: str = "", output: str = ""): client = aws.connect_mturk() assignments = [] workers = [] done = [] if output != "": with open(output, 'r', newline='') as fid: reader = csv.reader(fid, delimiter=',') for row in reader: done.append(row[0]) if from_csv is not None: with open(from_csv, 'r', newline='') as fid: header = next(fid).split(',') reader = csv.reader(fid, delimiter=',') for row in reader: row_dict = {k: v for k, v in zip(header, row)} assignment_id = row_dict['assignment_id'] if assignment_id in done: continue if assignment_id not in assignments: assignments.append(assignment_id) workers.append(row_dict['worker_id']) else: raise ValueError("No job to reward") for assignment, worker in zip(assignments, workers): aws.send_bonus(client, assignment, worker, amount, message) if output != "": with open(output, 'a', newline='') as fid: writer = csv.writer(fid, delimiter=',') for assignment, worker in zip(assignments, workers): writer.writerow(assignment, worker, amount, message)
response = client.create_hit_type( Title=task['title'], Description=task['description'], Keywords=task['keywords'], Reward=str(task['reward']), AssignmentDurationInSeconds=task['assignment_duration'], AutoApprovalDelayInSeconds=task['auto_approval_delay']) return response if __name__ == "__main__": config = get_config() tasks = config['tasks'] jobs = [] client = connect_mturk() for task in tasks: logging.info(f"Submitting task {task['name']}") generator = retrieve_generator(task['name'])(client) for sample in generator: job = create_job(client, task, sample) jobs.append(job) logging.info("All tasks were successfully submitted.") logging.info(f"Information is recorded in {config['job_filename']}.")