def main():
    from result_to_annolist import parse_boxes

    parser = argparse.ArgumentParser()
    parser.add_argument("hits_dir")

    args = parser.parse_args()

    hit_name = get_hit_name(args.hits_dir)
    object_bonus_usd = float(bash_var_to_py(args.hits_dir + "/hit_params.sh", "OBJECT_BONUS_USD"));
    
    print "\nOBJECT_BONUS_USD: {}\n\n".format(object_bonus_usd);

    results_filename = args.hits_dir + '/' + hit_name + '.results'
    accept_filename = args.hits_dir + '/' + hit_name + '.accept'
    reject_filename = args.hits_dir + '/' + hit_name + '.reject'
    bonus_filename = args.hits_dir + '/' + hit_name + '-grant-bonus.sh'
    bonus_log_filename = hit_name + '-grant-bonus.log'
    with open(results_filename, 'r') as results_file:
        results_list = csv.reader(results_file, delimiter='\t')
        invert = lambda d: {v:k for k,v in d.iteritems()}
        columns = invert(dict(enumerate(next(results_list))))

        with open(accept_filename, 'w') as accept_file:
            accept_file.write('assignmentIdToApprove\tassignmentIdToApproveComment\n')
            counter = 0
            for x in results_list:
                assignmentid = x[columns['assignmentid']]
                if assignmentid:
                    accept_file.write('%s\t"thank you for your work"\n' % assignmentid)
                else:
                    counter += 1
            if counter > 0:
                sys.stderr.write('WARNING: %s assignments not yet completed\n' % counter)
            print('created %s' % accept_filename)
        with open(reject_filename, 'w') as reject_file:
            reject_file.write('assignmentIdToReject\tassignmentIdToRejectComment\n')
            print('created %s' % reject_filename)

    with open(results_filename, 'r') as results_file:
        results_list = csv.reader(results_file, delimiter='\t')
        invert = lambda d: {v:k for k,v in d.iteritems()}
        columns = invert(dict(enumerate(next(results_list))))

        if object_bonus_usd > 1: 
            with open(bonus_filename, 'w') as bonus_file:
                wid = lambda x: x[columns['workerid']]
                for k, v in groupby(sorted(results_list, key=wid), key=wid):
                    num_people = 0
                    for x in v:
                        assignmentid = x[columns['assignmentid']]
                        workerid = x[columns['workerid']]
                        #num_people = int(x[columns['numcomplete']])
                        num_people += len(parse_boxes(x[columns['Answer.results']]))
                    if assignmentid and workerid and num_people > 0:
                        bonus_per_person = 0.0125
                        bonus_params = {
                            'assignmentid': assignmentid,
                            'amount': num_people * bonus_per_person,
                            'workerid': workerid,
                            'num_people': num_people,
                            'bonus_per_person': bonus_per_person,
                            'bonus_log_filename': bonus_log_filename,
                        }
                        bonus_file.write('''"$MTURK_CMD_HOME"/bin/grantBonus.sh --workerid %(workerid)s --amount %(amount)0.2f --assignment %(assignmentid)s --reason "labeled people: %(num_people)d, bonus per person: %(bonus_per_person)0.4f"
    echo "%(workerid)s,%(assignmentid)s" >> %(bonus_log_filename)s
    ''' % bonus_params)

                print('created %s' % bonus_filename)
def main():
    from result_to_annolist import parse_boxes

    parser = argparse.ArgumentParser()
    parser.add_argument("hits_dir")

    args = parser.parse_args()

    hit_name = get_hit_name(args.hits_dir)
    object_bonus_usd = float(
        bash_var_to_py(args.hits_dir + "/hit_params.sh", "OBJECT_BONUS_USD"))

    print "\nOBJECT_BONUS_USD: {}\n\n".format(object_bonus_usd)

    results_filename = args.hits_dir + '/' + hit_name + '.results'
    accept_filename = args.hits_dir + '/' + hit_name + '.accept'
    reject_filename = args.hits_dir + '/' + hit_name + '.reject'
    bonus_filename = args.hits_dir + '/' + hit_name + '-grant-bonus.sh'
    bonus_log_filename = hit_name + '-grant-bonus.log'
    with open(results_filename, 'r') as results_file:
        results_list = csv.reader(results_file, delimiter='\t')
        invert = lambda d: {v: k for k, v in d.iteritems()}
        columns = invert(dict(enumerate(next(results_list))))

        with open(accept_filename, 'w') as accept_file:
            accept_file.write(
                'assignmentIdToApprove\tassignmentIdToApproveComment\n')
            counter = 0
            for x in results_list:
                assignmentid = x[columns['assignmentid']]
                if assignmentid:
                    accept_file.write('%s\t"thank you for your work"\n' %
                                      assignmentid)
                else:
                    counter += 1
            if counter > 0:
                sys.stderr.write(
                    'WARNING: %s assignments not yet completed\n' % counter)
            print('created %s' % accept_filename)
        with open(reject_filename, 'w') as reject_file:
            reject_file.write(
                'assignmentIdToReject\tassignmentIdToRejectComment\n')
            print('created %s' % reject_filename)

    with open(results_filename, 'r') as results_file:
        results_list = csv.reader(results_file, delimiter='\t')
        invert = lambda d: {v: k for k, v in d.iteritems()}
        columns = invert(dict(enumerate(next(results_list))))

        if object_bonus_usd > 1:
            with open(bonus_filename, 'w') as bonus_file:
                wid = lambda x: x[columns['workerid']]
                for k, v in groupby(sorted(results_list, key=wid), key=wid):
                    num_people = 0
                    for x in v:
                        assignmentid = x[columns['assignmentid']]
                        workerid = x[columns['workerid']]
                        #num_people = int(x[columns['numcomplete']])
                        num_people += len(
                            parse_boxes(x[columns['Answer.results']]))
                    if assignmentid and workerid and num_people > 0:
                        bonus_per_person = 0.0125
                        bonus_params = {
                            'assignmentid': assignmentid,
                            'amount': num_people * bonus_per_person,
                            'workerid': workerid,
                            'num_people': num_people,
                            'bonus_per_person': bonus_per_person,
                            'bonus_log_filename': bonus_log_filename,
                        }
                        bonus_file.write(
                            '''"$MTURK_CMD_HOME"/bin/grantBonus.sh --workerid %(workerid)s --amount %(amount)0.2f --assignment %(assignmentid)s --reason "labeled people: %(num_people)d, bonus per person: %(bonus_per_person)0.4f"
    echo "%(workerid)s,%(assignmentid)s" >> %(bonus_log_filename)s
    ''' % bonus_params)

                print('created %s' % bonus_filename)
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("hits_dir", help="hit directory")
    parser.add_argument("--print_empty", action="store_true", help="print info on empty hits");
    parser.add_argument("--save_worker_results", action="store_true", help="save results for each individual worker");

    #parser.add_argument("output_ext", default=".al", help="output format .idl/.al/.pal")
    #parser.add_argument("--url_prefix", help="path on S3 that should be removed when converting to annolist")   

    args = parser.parse_args()

    #url_prefix = bash_var_to_py("./data_utils_init.sh", "S3_HOST_DIR")
    url_prefix = bash_var_to_py(args.hits_dir + "/hit_params.sh", "S3_HOST_DIR")

    args.output_ext = ".pal"

    if url_prefix[-1] != os.sep:
        url_prefix += os.sep;
    
    hit_name = get_hit_name(args.hits_dir)
    #pal_name = args.hits_dir + '/' + hit_name + '.pal'
    pal_name = args.hits_dir + '/' + hit_name + args.output_ext

    results_filename = args.hits_dir + '/' + hit_name + '.results'
    results_by_worker_dir = '%s/results_by_worker_%s' % (args.hits_dir, hit_name)
    subprocess.call(['mkdir', '-p', results_by_worker_dir])
    try:
        with open(args.hits_dir + '/bad_workers.txt', 'r') as f:
            bad_workerids = set(x.strip() for x in f.readlines())
    except IOError:
        bad_workerids = set()

    with open(results_filename, 'r') as results_file:
        results_list = list(csv.reader(results_file, delimiter='\t'))
        invert = lambda d: {v:k for k,v in d.iteritems()}
        columns = invert(dict(enumerate(results_list[0])))

        annotation_list = AnnotationLib.AnnoList();
        hit_type = "";

        for each in results_list[1:]:
            if each[columns['hitstatus']] == 'Reviewable' and each[columns['workerid']] not in bad_workerids:
                cur_hit_type = each[columns['Answer.results']].split(',')[0];
                if not hit_type:
                    hit_type = cur_hit_type;
                else:
                    assert(hit_type == cur_hit_type);

                a = annotation_from_result(each, columns, url_prefix);
                annotation_list.append(a);

        # MA: some hit types require special post-processing
        if hit_type == "label_act":
            annotation_list = merge_labelact_annolist(annotation_list);
            annotation_list.add_attribute("gender", int);
            annotation_list.add_attribute("ptype", int);
            annotation_list.add_attribute("act", int);

            annotation_list.add_attribute_val("gender", "male", ATTR_VAL_GENDER_MALE);
            annotation_list.add_attribute_val("gender", "female", ATTR_VAL_GENDER_FEMALE);

            annotation_list.add_attribute_val("ptype", "sales", ATTR_VAL_PTYPE_SALES);
            annotation_list.add_attribute_val("ptype", "customer", ATTR_VAL_PTYPE_CUST);

            annotation_list.add_attribute_val("act", "interact_with_customer", ATTR_VAL_ACT_SALES_INT);
            annotation_list.add_attribute_val("act", "clean", ATTR_VAL_ACT_SALES_CLEAN);
            annotation_list.add_attribute_val("act", "other_sales", ATTR_VAL_ACT_SALES_OTHER);

            annotation_list.add_attribute_val("act", "queue", ATTR_VAL_ACT_CUST_QUEUE);
            annotation_list.add_attribute_val("act", "interact_with_sales", ATTR_VAL_ACT_CUST_INT);
            annotation_list.add_attribute_val("act", "browse", ATTR_VAL_ACT_CUST_BROWSE);
            annotation_list.add_attribute_val("act", "other_customer", ATTR_VAL_ACT_CUST_OTHER);


        AnnotationLib.save(pal_name, annotation_list)

        if args.save_worker_results:
            for workerid in set(x[columns['workerid']] for x in results_list[1:]):
                if workerid == '':
                    print '%s missing entries' % sum(1 for each in results_list[1:]
                                                     if each[columns['workerid']] == '')
                    continue

                annotation_list = AnnotationLib.AnnoList([annotation_from_result(each, columns, url_prefix)
                                                          for each in results_list[1:]
                                                          if each[columns['hitstatus']] == 'Reviewable'
                                                          and each[columns['workerid']] == workerid]);

                output_filename = '%s/%s.pal' % (results_by_worker_dir, workerid);
                #print "saving ", output_filename;

                print "worker: {}, number of annotations: {}".format(workerid, len(annotation_list));
                AnnotationLib.save(output_filename, annotation_list)

        if args.print_empty:
            for res in results_list[1:]:
                if res[columns['Answer.results']] == "":
                    print "empty hit: ", res

        # show statistics on empty results (happens due to unknown bug in javascript tool)
        empty_res_workers = [each[columns['workerid']] for each in results_list[1:] if each[columns['Answer.results']] == ""];

        print "empty output by workerid: " 
        for workerid, worker_empty_count in collections.Counter(empty_res_workers).items():
            print workerid, worker_empty_count

        num_empty = sum((1 for each in results_list[1:] if each[columns['Answer.results']] == ""));
        print "number of empty results: ", num_empty