def main(): from result_to_annolist import parse_boxes parser = argparse.ArgumentParser() parser.add_argument("hits_dir") args = parser.parse_args() hit_name = get_hit_name(args.hits_dir) object_bonus_usd = float(bash_var_to_py(args.hits_dir + "/hit_params.sh", "OBJECT_BONUS_USD")); print "\nOBJECT_BONUS_USD: {}\n\n".format(object_bonus_usd); results_filename = args.hits_dir + '/' + hit_name + '.results' accept_filename = args.hits_dir + '/' + hit_name + '.accept' reject_filename = args.hits_dir + '/' + hit_name + '.reject' bonus_filename = args.hits_dir + '/' + hit_name + '-grant-bonus.sh' bonus_log_filename = hit_name + '-grant-bonus.log' with open(results_filename, 'r') as results_file: results_list = csv.reader(results_file, delimiter='\t') invert = lambda d: {v:k for k,v in d.iteritems()} columns = invert(dict(enumerate(next(results_list)))) with open(accept_filename, 'w') as accept_file: accept_file.write('assignmentIdToApprove\tassignmentIdToApproveComment\n') counter = 0 for x in results_list: assignmentid = x[columns['assignmentid']] if assignmentid: accept_file.write('%s\t"thank you for your work"\n' % assignmentid) else: counter += 1 if counter > 0: sys.stderr.write('WARNING: %s assignments not yet completed\n' % counter) print('created %s' % accept_filename) with open(reject_filename, 'w') as reject_file: reject_file.write('assignmentIdToReject\tassignmentIdToRejectComment\n') print('created %s' % reject_filename) with open(results_filename, 'r') as results_file: results_list = csv.reader(results_file, delimiter='\t') invert = lambda d: {v:k for k,v in d.iteritems()} columns = invert(dict(enumerate(next(results_list)))) if object_bonus_usd > 1: with open(bonus_filename, 'w') as bonus_file: wid = lambda x: x[columns['workerid']] for k, v in groupby(sorted(results_list, key=wid), key=wid): num_people = 0 for x in v: assignmentid = x[columns['assignmentid']] workerid = x[columns['workerid']] #num_people = int(x[columns['numcomplete']]) num_people += len(parse_boxes(x[columns['Answer.results']])) if assignmentid and workerid and num_people > 0: bonus_per_person = 0.0125 bonus_params = { 'assignmentid': assignmentid, 'amount': num_people * bonus_per_person, 'workerid': workerid, 'num_people': num_people, 'bonus_per_person': bonus_per_person, 'bonus_log_filename': bonus_log_filename, } bonus_file.write('''"$MTURK_CMD_HOME"/bin/grantBonus.sh --workerid %(workerid)s --amount %(amount)0.2f --assignment %(assignmentid)s --reason "labeled people: %(num_people)d, bonus per person: %(bonus_per_person)0.4f" echo "%(workerid)s,%(assignmentid)s" >> %(bonus_log_filename)s ''' % bonus_params) print('created %s' % bonus_filename)
def main(): from result_to_annolist import parse_boxes parser = argparse.ArgumentParser() parser.add_argument("hits_dir") args = parser.parse_args() hit_name = get_hit_name(args.hits_dir) object_bonus_usd = float( bash_var_to_py(args.hits_dir + "/hit_params.sh", "OBJECT_BONUS_USD")) print "\nOBJECT_BONUS_USD: {}\n\n".format(object_bonus_usd) results_filename = args.hits_dir + '/' + hit_name + '.results' accept_filename = args.hits_dir + '/' + hit_name + '.accept' reject_filename = args.hits_dir + '/' + hit_name + '.reject' bonus_filename = args.hits_dir + '/' + hit_name + '-grant-bonus.sh' bonus_log_filename = hit_name + '-grant-bonus.log' with open(results_filename, 'r') as results_file: results_list = csv.reader(results_file, delimiter='\t') invert = lambda d: {v: k for k, v in d.iteritems()} columns = invert(dict(enumerate(next(results_list)))) with open(accept_filename, 'w') as accept_file: accept_file.write( 'assignmentIdToApprove\tassignmentIdToApproveComment\n') counter = 0 for x in results_list: assignmentid = x[columns['assignmentid']] if assignmentid: accept_file.write('%s\t"thank you for your work"\n' % assignmentid) else: counter += 1 if counter > 0: sys.stderr.write( 'WARNING: %s assignments not yet completed\n' % counter) print('created %s' % accept_filename) with open(reject_filename, 'w') as reject_file: reject_file.write( 'assignmentIdToReject\tassignmentIdToRejectComment\n') print('created %s' % reject_filename) with open(results_filename, 'r') as results_file: results_list = csv.reader(results_file, delimiter='\t') invert = lambda d: {v: k for k, v in d.iteritems()} columns = invert(dict(enumerate(next(results_list)))) if object_bonus_usd > 1: with open(bonus_filename, 'w') as bonus_file: wid = lambda x: x[columns['workerid']] for k, v in groupby(sorted(results_list, key=wid), key=wid): num_people = 0 for x in v: assignmentid = x[columns['assignmentid']] workerid = x[columns['workerid']] #num_people = int(x[columns['numcomplete']]) num_people += len( parse_boxes(x[columns['Answer.results']])) if assignmentid and workerid and num_people > 0: bonus_per_person = 0.0125 bonus_params = { 'assignmentid': assignmentid, 'amount': num_people * bonus_per_person, 'workerid': workerid, 'num_people': num_people, 'bonus_per_person': bonus_per_person, 'bonus_log_filename': bonus_log_filename, } bonus_file.write( '''"$MTURK_CMD_HOME"/bin/grantBonus.sh --workerid %(workerid)s --amount %(amount)0.2f --assignment %(assignmentid)s --reason "labeled people: %(num_people)d, bonus per person: %(bonus_per_person)0.4f" echo "%(workerid)s,%(assignmentid)s" >> %(bonus_log_filename)s ''' % bonus_params) print('created %s' % bonus_filename)
def main(): parser = argparse.ArgumentParser() parser.add_argument("hits_dir", help="hit directory") parser.add_argument("--print_empty", action="store_true", help="print info on empty hits"); parser.add_argument("--save_worker_results", action="store_true", help="save results for each individual worker"); #parser.add_argument("output_ext", default=".al", help="output format .idl/.al/.pal") #parser.add_argument("--url_prefix", help="path on S3 that should be removed when converting to annolist") args = parser.parse_args() #url_prefix = bash_var_to_py("./data_utils_init.sh", "S3_HOST_DIR") url_prefix = bash_var_to_py(args.hits_dir + "/hit_params.sh", "S3_HOST_DIR") args.output_ext = ".pal" if url_prefix[-1] != os.sep: url_prefix += os.sep; hit_name = get_hit_name(args.hits_dir) #pal_name = args.hits_dir + '/' + hit_name + '.pal' pal_name = args.hits_dir + '/' + hit_name + args.output_ext results_filename = args.hits_dir + '/' + hit_name + '.results' results_by_worker_dir = '%s/results_by_worker_%s' % (args.hits_dir, hit_name) subprocess.call(['mkdir', '-p', results_by_worker_dir]) try: with open(args.hits_dir + '/bad_workers.txt', 'r') as f: bad_workerids = set(x.strip() for x in f.readlines()) except IOError: bad_workerids = set() with open(results_filename, 'r') as results_file: results_list = list(csv.reader(results_file, delimiter='\t')) invert = lambda d: {v:k for k,v in d.iteritems()} columns = invert(dict(enumerate(results_list[0]))) annotation_list = AnnotationLib.AnnoList(); hit_type = ""; for each in results_list[1:]: if each[columns['hitstatus']] == 'Reviewable' and each[columns['workerid']] not in bad_workerids: cur_hit_type = each[columns['Answer.results']].split(',')[0]; if not hit_type: hit_type = cur_hit_type; else: assert(hit_type == cur_hit_type); a = annotation_from_result(each, columns, url_prefix); annotation_list.append(a); # MA: some hit types require special post-processing if hit_type == "label_act": annotation_list = merge_labelact_annolist(annotation_list); annotation_list.add_attribute("gender", int); annotation_list.add_attribute("ptype", int); annotation_list.add_attribute("act", int); annotation_list.add_attribute_val("gender", "male", ATTR_VAL_GENDER_MALE); annotation_list.add_attribute_val("gender", "female", ATTR_VAL_GENDER_FEMALE); annotation_list.add_attribute_val("ptype", "sales", ATTR_VAL_PTYPE_SALES); annotation_list.add_attribute_val("ptype", "customer", ATTR_VAL_PTYPE_CUST); annotation_list.add_attribute_val("act", "interact_with_customer", ATTR_VAL_ACT_SALES_INT); annotation_list.add_attribute_val("act", "clean", ATTR_VAL_ACT_SALES_CLEAN); annotation_list.add_attribute_val("act", "other_sales", ATTR_VAL_ACT_SALES_OTHER); annotation_list.add_attribute_val("act", "queue", ATTR_VAL_ACT_CUST_QUEUE); annotation_list.add_attribute_val("act", "interact_with_sales", ATTR_VAL_ACT_CUST_INT); annotation_list.add_attribute_val("act", "browse", ATTR_VAL_ACT_CUST_BROWSE); annotation_list.add_attribute_val("act", "other_customer", ATTR_VAL_ACT_CUST_OTHER); AnnotationLib.save(pal_name, annotation_list) if args.save_worker_results: for workerid in set(x[columns['workerid']] for x in results_list[1:]): if workerid == '': print '%s missing entries' % sum(1 for each in results_list[1:] if each[columns['workerid']] == '') continue annotation_list = AnnotationLib.AnnoList([annotation_from_result(each, columns, url_prefix) for each in results_list[1:] if each[columns['hitstatus']] == 'Reviewable' and each[columns['workerid']] == workerid]); output_filename = '%s/%s.pal' % (results_by_worker_dir, workerid); #print "saving ", output_filename; print "worker: {}, number of annotations: {}".format(workerid, len(annotation_list)); AnnotationLib.save(output_filename, annotation_list) if args.print_empty: for res in results_list[1:]: if res[columns['Answer.results']] == "": print "empty hit: ", res # show statistics on empty results (happens due to unknown bug in javascript tool) empty_res_workers = [each[columns['workerid']] for each in results_list[1:] if each[columns['Answer.results']] == ""]; print "empty output by workerid: " for workerid, worker_empty_count in collections.Counter(empty_res_workers).items(): print workerid, worker_empty_count num_empty = sum((1 for each in results_list[1:] if each[columns['Answer.results']] == "")); print "number of empty results: ", num_empty