Ejemplo n.º 1
0
def external_task_prepare_context(request, experiment_id, hit, override):
    """ Fetch hit, experiment, assignment, worker, etc.  Returns either a
    dictionary on success, or a response (or exception) if there is some error.
    """

    # obtain HIT
    if hit is None:
        if 'hitId' not in request.GET:
            if request.user.is_staff:
                return html_error_response(
                    request, 'HIT ID missing from GET parameters')
            else:
                raise Http404

        hit_id = request.GET['hitId']
        try:
            hit = MtHit.objects \
                .select_related(
                    'hit_type__experiment',
                    'hit_type__experiment_settings',
                    'hit_type__requirements') \
                .get(id=hit_id)
        except MtHit.DoesNotExist:
            # if this HIT cannot be found, tell Amazon about it
            if (override is None and not request.user.is_staff
                    and 'assignmentId' in request.GET
                    and 'workerId' in request.GET
                    and 'turkSubmitTo' in request.GET):
                expire_hit_task.delay(hit_id)
            raise Http404

    # obtain experiment
    experiment = hit.hit_type.experiment
    if experiment.id != int(experiment_id):
        if request.user.is_staff:
            return html_error_response(
                request, 'Experiment ID (%s) does not match HIT (%s)' %
                (experiment_id, experiment.id))
        else:
            raise Http404

    # obtain worker and assignment
    worker = get_or_create_mturk_worker_from_request(request)
    assignment_dirty = False
    if worker and 'assignmentId' in request.GET:
        assignment, _ = MtAssignment.objects.get_or_create(
            id=request.GET['assignmentId'],
            defaults={
                'hit': hit,
                'worker': worker
            })
        if assignment.hit != hit or assignment.worker != worker:
            assignment.hit = hit
            assignment.worker = worker
            assignment_dirty = True
    else:
        assignment = None
        worker = None

    # obtain worker info specific to the experiment and worker
    if experiment and worker:
        experiment_worker, _ = ExperimentWorker.objects.get_or_create(
            experiment=experiment, worker=worker)
    else:
        experiment_worker = None

    # don't let blocked workers perform our tasks
    if (worker and worker.blocked) or (experiment_worker
                                       and experiment_worker.blocked):
        message = "Your submissions are too low quality.  Please stop doing our tasks."
        if experiment_worker and experiment_worker.blocked_reason:
            message += "<br/><br/>" + experiment_worker.blocked_reason
        elif worker and worker.blocked_reason:
            message += "<br/><br/>" + worker.blocked_reason
        return html_error_response(request, message)

    # fetch contents
    hit_contents = fetch_hit_contents(hit)
    if override and 'publishable' in request.GET:
        hit_contents = filter(lambda x: x and x.publishable(), hit_contents)
    if not hit.num_contents or not hit_contents:
        # (in the if statement, also test hit.num_contents since it is only set
        # after the last content is added)
        return html_error_response(request,
                                   "Somehow there are no items in this HIT.")

    # fetch test (sentinel) contents
    if experiment_worker:
        if assignment.num_test_contents is None:
            n = experiment.test_contents_per_assignment
            if n > 0:
                # select new test contents from the set of possible contents
                # (that the user has not already answered)
                test_content_wrappers = experiment.test_contents.all() \
                    .exclude(responses__experiment_worker=experiment_worker) \
                    .order_by('-priority')[:n]

                # register chosen items with assignment
                assignment.test_contents.add(*test_content_wrappers)
            else:
                test_content_wrappers = []

            assignment.num_test_contents = len(test_content_wrappers)
            assignment_dirty = True
        elif assignment.num_test_contents > 0:
            # re-fetch existing contents
            test_content_wrappers = assignment.test_contents.all()
        else:
            test_content_wrappers = []

        # fetch objects from inside the wrappers
        if test_content_wrappers:
            test_contents = fetch_content_tuples([
                (x.content_type_id, x.object_id) for x in test_content_wrappers
            ])
        else:
            test_contents = []
    else:
        test_contents = []
        test_content_wrappers = []

    # shuffle together (some tasks may sort contents again in javascript)
    contents = hit_contents + test_contents
    random.shuffle(contents)

    # prepare context data
    context = {
        'hit':
        hit,
        'assignment':
        assignment,
        'worker':
        worker,
        'experiment':
        experiment,
        'experiment_id':
        experiment_id,
        'experiment_worker':
        experiment_worker,
        'slug':
        experiment.slug,
        'hit_contents':
        hit_contents,
        'test_content_wrappers':
        test_content_wrappers,
        'test_contents':
        test_contents,
        'contents':
        contents,
        'num_contents':
        len(contents),
        'num_contents_predicted':
        (len(hit_contents) + experiment.test_contents_per_assignment),
        'override':
        override,
    }
    if len(contents) == 1:
        context['content'] = contents[0]

    if experiment.version >= 2:
        # old experiments (version 1) don't use this
        context['contents_json'] = json.dumps(
            [c.get_entry_dict() for c in contents])

    # list of ids as json
    context['content_id_json'] = json.dumps([{'id': c.id} for c in contents])

    # requirements
    for req in hit.hit_type.requirements.values('name', 'value'):
        context[req['name']] = req['value']

    if assignment_dirty:
        assignment.save()

    return context
Ejemplo n.º 2
0
def mturk_submit_impl(**kwargs):
    #slug = kwargs['experiment'].slug
    #print '%s time_ms: %s, time_active_ms: %s, time_load_ms: %s' % (
        #slug, kwargs['time_ms'], kwargs['time_active_ms'],
        #kwargs['time_load_ms'])
    #print '%s results: %s' % (slug, kwargs['results'])
    #if kwargs['mturk_assignment'].feedback:
        #print '%s feedback: %s' % (slug, kwargs['mturk_assignment'].feedback)

    # fetch objects if passed by ID
    if 'user_id' in kwargs:
        kwargs['user'] = UserProfile.objects.get(user_id=kwargs['user_id'])
    if 'mturk_hit_id' in kwargs:
        kwargs['mturk_hit'] = MtHit.objects.get(id=kwargs['mturk_hit_id'])
    if 'mturk_assignment_id' in kwargs:
        kwargs['mturk_assignment'] = MtAssignment.objects.get(id=kwargs['mturk_assignment_id'])
    if 'experiment_id' in kwargs:
        kwargs['experiment'] = Experiment.objects.get(id=kwargs['experiment_id'])

    # fetch experiment settings
    hit_type = kwargs['mturk_hit'].hit_type
    exp_settings = hit_type.experiment_settings
    if not exp_settings:
        # if the settings are somehow missing, update all records with the
        # newest experiment settings
        exp_settings = kwargs['experiment'].new_hit_settings
        MtHitType.objects.filter(id=hit_type.id) \
            .update(experiment_settings=exp_settings)

    # fetch hit contents
    if 'hit_contents' not in kwargs:
        kwargs['hit_contents'] = fetch_hit_contents(kwargs['mturk_hit'])
    hit_contents = kwargs['hit_contents']

    # new_objects_dict: {(content_type_id, content_id): [created items]}
    # (if [created items] is empty, the entry may be omitted)
    if hit_contents:
        new_objects_dict = exp_settings.out_content_model() \
            .mturk_submit(**kwargs)
    else:
        print "WARNING: no hit_contents in %s" % kwargs['mturk_hit'].id
        new_objects_dict = {}

    # sanity check
    if not all(isinstance(k, tuple) for k in new_objects_dict):
        raise ValueError(
            "Invalid new_objects_dict: %s" % repr(new_objects_dict))

    # flatten all items into one list
    new_objects_list = []
    for obj_list in new_objects_dict.values():
        new_objects_list += obj_list

    # attach objects to assignment
    for obj in new_objects_list:
        MtSubmittedContent.objects.get_or_create(
            assignment=kwargs['mturk_assignment'],
            object_id=obj.id,
            content_type=ContentType.objects.get_for_model(obj),
        )

    for content in hit_contents:
        # content_tuple: (content type id, object id)
        content_tuple = get_content_tuple(content)
        if content_tuple not in new_objects_dict:
            # print '%s: no new objects generated' % repr(content_tuple)
            continue

        delta_completed = len(new_objects_dict[content_tuple])
        delta_scheduled = exp_settings.out_count_ratio

        # update the fact that some outputs have been completed
        PendingContent.objects \
            .filter(
                experiment=kwargs['experiment'],
                content_type=ContentType.objects.get_for_id(content_tuple[0]),
                object_id=content_tuple[1],
            ).update(
                num_outputs_completed=F(
                    'num_outputs_completed') + delta_completed,
                num_outputs_scheduled=F(
                    'num_outputs_scheduled') - delta_scheduled,
            )

    # consider all affected objects for new experiments
    pending_objects = list(set(hit_contents + new_objects_list))
    add_pending_objects_task.delay(
        [get_content_tuple(c) for c in pending_objects])

    # mark experiment as dirty
    Experiment.objects.filter(id=kwargs['experiment'].id) \
        .update(cubam_dirty=True)

    # here, "complete" means that the user actually submitted (and is not a
    # "partial submission", i.e. a background auto-submit performed by the
    # experiment script)
    if not kwargs['complete']:
        return

    # sync with mturk 30 minutes from now (it can take a while to update the
    # status; 1 minute is not enough)
    sync_hit_task.apply_async(
        args=[kwargs['mturk_hit'].id],
        countdown=30 * 60)

    # mark as done
    MtAssignment.objects.filter(id=kwargs['mturk_assignment'].id) \
        .update(submission_complete=True)
Ejemplo n.º 3
0
def external_task_prepare_context(request, experiment_id, hit, override):
    """ Fetch hit, experiment, assignment, worker, etc.  Returns either a
    dictionary on success, or a response (or exception) if there is some error.
    """

    # obtain HIT
    if hit is None:
        if 'hitId' not in request.GET:
            if request.user.is_staff:
                return html_error_response(
                    request, 'HIT ID missing from GET parameters')
            else:
                raise Http404

        hit_id = request.GET['hitId']
        try:
            hit = MtHit.objects \
                .select_related(
                    'hit_type__experiment',
                    'hit_type__experiment_settings',
                    'hit_type__requirements') \
                .get(id=hit_id)
        except MtHit.DoesNotExist:
            # if this HIT cannot be found, tell Amazon about it
            if (override is None and
                    not request.user.is_staff and
                    'assignmentId' in request.GET and
                    'workerId' in request.GET and
                    'turkSubmitTo' in request.GET):
                expire_hit_task.delay(hit_id)
            raise Http404

    # obtain experiment
    experiment = hit.hit_type.experiment
    if experiment.id != int(experiment_id):
        if request.user.is_staff:
            return html_error_response(
                request, 'Experiment ID (%s) does not match HIT (%s)' % (
                    experiment_id, experiment.id)
            )
        else:
            raise Http404

    # obtain worker and assignment
    worker = get_or_create_mturk_worker_from_request(request)
    assignment_dirty = False
    if worker and 'assignmentId' in request.GET:
        assignment, _ = MtAssignment.objects.get_or_create(
            id=request.GET['assignmentId'],
            defaults={'hit': hit, 'worker': worker})
        if assignment.hit != hit or assignment.worker != worker:
            assignment.hit = hit
            assignment.worker = worker
            assignment_dirty = True
    else:
        assignment = None
        worker = None

    # obtain worker info specific to the experiment and worker
    if experiment and worker:
        experiment_worker, _ = ExperimentWorker.objects.get_or_create(
            experiment=experiment, worker=worker)
    else:
        experiment_worker = None

    # don't let blocked workers perform our tasks
    if (worker and worker.blocked) or (experiment_worker and experiment_worker.blocked):
        message = "Your submissions are too low quality.  Please stop doing our tasks."
        if experiment_worker and experiment_worker.blocked_reason:
            message += "<br/><br/>" + experiment_worker.blocked_reason
        elif worker and worker.blocked_reason:
            message += "<br/><br/>" + worker.blocked_reason
        return html_error_response(request, message)

    # fetch contents
    hit_contents = fetch_hit_contents(hit)
    if override and 'publishable' in request.GET:
        hit_contents = filter(lambda x: x and x.publishable(), hit_contents)
    if not hit.num_contents or not hit_contents:
        # (in the if statement, also test hit.num_contents since it is only set
        # after the last content is added)
        return html_error_response(
            request, "Somehow there are no items in this HIT.")

    # fetch test (sentinel) contents
    if experiment_worker:
        if assignment.num_test_contents is None:
            n = experiment.test_contents_per_assignment
            if n > 0:
                # select new test contents from the set of possible contents
                # (that the user has not already answered)
                test_content_wrappers = experiment.test_contents.all() \
                    .exclude(responses__experiment_worker=experiment_worker) \
                    .order_by('-priority')[:n]

                # register chosen items with assignment
                assignment.test_contents.add(*test_content_wrappers)
            else:
                test_content_wrappers = []

            assignment.num_test_contents = len(test_content_wrappers)
            assignment_dirty = True
        elif assignment.num_test_contents > 0:
            # re-fetch existing contents
            test_content_wrappers = assignment.test_contents.all()
        else:
            test_content_wrappers = []

        # fetch objects from inside the wrappers
        if test_content_wrappers:
            test_contents = fetch_content_tuples([
                (x.content_type_id, x.object_id)
                for x in test_content_wrappers
            ])
        else:
            test_contents = []
    else:
        test_contents = []
        test_content_wrappers = []

    # shuffle together (some tasks may sort contents again in javascript)
    contents = hit_contents + test_contents
    random.shuffle(contents)

    # prepare context data
    context = {
        'hit': hit,
        'assignment': assignment,
        'worker': worker,
        'experiment': experiment,
        'experiment_id': experiment_id,
        'experiment_worker': experiment_worker,
        'slug': experiment.slug,
        'hit_contents': hit_contents,
        'test_content_wrappers': test_content_wrappers,
        'test_contents': test_contents,
        'contents': contents,
        'num_contents': len(contents),
        'num_contents_predicted': (len(hit_contents) +
                                   experiment.test_contents_per_assignment),
        'override': override,
    }
    if len(contents) == 1:
        context['content'] = contents[0]

    if experiment.version >= 2:
        # old experiments (version 1) don't use this
        context['contents_json'] = json.dumps(
            [c.get_entry_dict() for c in contents])

    # list of ids as json
    context['content_id_json'] = json.dumps(
        [{'id': c.id} for c in contents])

    # requirements
    for req in hit.hit_type.requirements.values('name', 'value'):
        context[req['name']] = req['value']

    if assignment_dirty:
        assignment.save()

    return context