Example #1
0
def process_irr_label(data, label):
    """This function checks if an irr datum has been labeled by enough people.

    if it has, then it will attempt to resolve the labels and record the irr history
    """
    # get the number of labels for that data in the project
    labeled = DataLabel.objects.filter(data=data)
    skipped = IRRLog.objects.filter(label__isnull=True, data=data)
    project = data.project
    current_training_set = project.get_current_training_set()

    admin_queue = Queue.objects.get(project=project, type="admin")
    # if there are >= labels or skips than the project calls for
    if (labeled.count() + skipped.count()) >= project.num_users_irr:
        # add all labels to IRRLog
        history_list = [
            IRRLog(data=data,
                   profile=d.profile,
                   label=d.label,
                   timestamp=d.timestamp) for d in labeled
        ]
        with transaction.atomic():
            IRRLog.objects.bulk_create(history_list)

            # remove all labels from DataLabel and save in list
            labels = list(labeled.values_list("label", flat=True))

            DataLabel.objects.filter(data=data).delete()

            # check if the labels agree
            if len(set(labels)) == 1 and skipped.count() == 0:
                # the data is no longer seen as irr (so it can be in the training set)
                Data.objects.filter(pk=data.pk).update(irr_ind=False)
                agree = True
                # if they do, add a new element to dataLabel with one label
                # by creator and remove from the irr queue
                DataLabel.objects.create(
                    data=data,
                    profile=project.creator,
                    label=label,
                    training_set=current_training_set,
                    time_to_label=None,
                    timestamp=timezone.now(),
                )
                DataQueue.objects.filter(data=data).delete()
            else:
                agree = False
                # if they don't, update the data into the admin queue
                DataQueue.objects.filter(data=data).update(queue=admin_queue)

        # update redis to reflect the queue changes
        irr_queue = Queue.objects.get(project=project, type="irr")
        settings.REDIS.srem(redis_serialize_set(irr_queue),
                            redis_serialize_data(data))

        if not agree:
            settings.REDIS.sadd(redis_serialize_set(admin_queue),
                                redis_serialize_data(data))
Example #2
0
def restore_data(request, data_pk):
    """Move a datum out of the RecycleBin.
    Args:
        request: The POST request
        pk: Primary key of the data
    Returns:
        {}
    """
    data = Data.objects.get(pk=data_pk)
    profile = request.user.profile
    response = {}

    # Make sure coder is an admin
    if project_extras.proj_permission_level(data.project, profile) > 1:
        # remove it from the recycle bin
        queue = Queue.objects.get(project=data.project, type="admin")
        DataQueue.objects.create(data=data, queue=queue)

        # update redis
        settings.REDIS.sadd(redis_serialize_set(queue),
                            redis_serialize_data(data))

        RecycleBin.objects.get(data=data).delete()
    else:
        response["error"] = "Invalid credentials. Must be an admin."

    return Response(response)
Example #3
0
def unassign_datum(datum, profile):
    """Remove a profile's assignment to a datum.

    Re-add the datum to its respective queue in Redis.
    """
    assignment = AssignedData.objects.filter(profile=profile, data=datum).get()

    queue = assignment.queue
    assignment.delete()

    settings.REDIS.lpush(redis_serialize_queue(queue),
                         redis_serialize_data(datum))
Example #4
0
def move_skipped_to_admin_queue(datum, profile, project):
    """Remove the data from AssignedData and redis.

    Change the assigned queue to the admin one for this project
    """
    new_queue = Queue.objects.get(project=project, type="admin")
    with transaction.atomic():
        # remove the data from the assignment table
        assignment = AssignedData.objects.get(data=datum, profile=profile)
        queue = assignment.queue
        assignment.delete()

        # change the queue to the admin one
        DataQueue.objects.filter(data=datum,
                                 queue=queue).update(queue=new_queue)

    # remove the data from redis
    settings.REDIS.srem(redis_serialize_set(queue),
                        redis_serialize_data(datum))
    settings.REDIS.sadd(redis_serialize_set(new_queue),
                        redis_serialize_data(datum))
Example #5
0
def label_admin_label(request, data_pk):
    """This is called when an admin manually labels a datum on the admin annotation
    page. It labels a single datum with the given label and profile, with null as the
    time.

    Args:
        request: The POST request
        data_pk: Primary key of the data
    Returns:
        {}
    """
    datum = Data.objects.get(pk=data_pk)
    project = datum.project
    label = Label.objects.get(pk=request.data["labelID"])
    profile = request.user.profile
    response = {}

    current_training_set = project.get_current_training_set()

    with transaction.atomic():
        queue = project.queue_set.get(type="admin")
        DataLabel.objects.create(
            data=datum,
            label=label,
            profile=profile,
            training_set=current_training_set,
            time_to_label=None,
            timestamp=timezone.now(),
        )

        DataQueue.objects.filter(data=datum, queue=queue).delete()

        # update redis
        settings.REDIS.srem(redis_serialize_set(queue),
                            redis_serialize_data(datum))

        # make sure the data is no longer irr
        if datum.irr_ind:
            Data.objects.filter(pk=datum.pk).update(irr_ind=False)

    # NOTE: this checks if the model needs to be triggered, but not if the
    # queues need to be refilled. This is because for something to be in the
    # admin queue, annotate or skip would have already checked for an empty queue
    check_and_trigger_model(datum)
    return Response(response)
Example #6
0
def modify_label_to_skip(request, data_pk):
    """Take a datum that is in the assigneddata queue for that user and place it in the
    admin queue. Remove it from the assignedData queue.

    Args:
        request: The POST request
        data_pk: Primary key of the data
    Returns:
        {}
    """
    data = Data.objects.get(pk=data_pk)
    profile = request.user.profile
    response = {}
    project = data.project
    old_label = Label.objects.get(pk=request.data["oldLabelID"])
    queue = Queue.objects.get(project=project, type="admin")

    with transaction.atomic():
        DataLabel.objects.filter(data=data, label=old_label).delete()
        if data.irr_ind:
            # if it was irr, add it to the log
            if len(IRRLog.objects.filter(data=data, profile=profile)) == 0:
                IRRLog.objects.create(data=data,
                                      profile=profile,
                                      label=None,
                                      timestamp=timezone.now())
        else:
            # if it's not irr, add it to the admin queue immediately
            DataQueue.objects.create(data=data, queue=queue)

            # update redis
            settings.REDIS.sadd(redis_serialize_set(queue),
                                redis_serialize_data(data))
        LabelChangeLog.objects.create(
            project=project,
            data=data,
            profile=profile,
            old_label=old_label.name,
            new_label="skip",
            change_timestamp=timezone.now(),
        )

    return Response(response)
Example #7
0
def label_data(label, datum, profile, time):
    """Record that a given datum has been labeled; remove its assignment, if any.

    Remove datum from DataQueue and its assocaited redis set.
    """
    current_training_set = datum.project.get_current_training_set()
    irr_data = datum.irr_ind

    with transaction.atomic():
        DataLabel.objects.create(
            data=datum,
            label=label,
            profile=profile,
            training_set=current_training_set,
            time_to_label=time,
            timestamp=timezone.now(),
        )
        # There's a unique constraint on data/profile, so this is
        # guaranteed to return one object
        assignment = AssignedData.objects.filter(data=datum,
                                                 profile=profile).get()
        queue = assignment.queue
        assignment.delete()

        if not irr_data:
            DataQueue.objects.filter(data=datum, queue=queue).delete()
        else:
            num_history = IRRLog.objects.filter(data=datum).count()
            # if the IRR history has more than the needed number of labels , it is
            # already processed so just add this label to the history.
            if num_history >= datum.project.num_users_irr:
                IRRLog.objects.create(data=datum,
                                      profile=profile,
                                      label=label,
                                      timestamp=timezone.now())
                DataLabel.objects.get(data=datum, profile=profile).delete()
            else:
                process_irr_label(datum, label)
    if not irr_data:
        settings.REDIS.srem(redis_serialize_set(queue),
                            redis_serialize_data(datum))
Example #8
0
def discard_data(request, data_pk):
    """Move a datum to the RecycleBin. This removes it from the admin dataqueue. This is
    used only in the skew table by the admin.

    Args:
        request: The POST request
        pk: Primary key of the data
    Returns:
        {}
    """
    data = Data.objects.get(pk=data_pk)
    profile = request.user.profile
    project = data.project
    response = {}

    # Make sure coder is an admin
    if project_extras.proj_permission_level(data.project, profile) > 1:
        # remove it from the admin queue
        queue = Queue.objects.get(project=project, type="admin")
        DataQueue.objects.get(data=data, queue=queue).delete()

        # update redis
        settings.REDIS.srem(redis_serialize_set(queue),
                            redis_serialize_data(data))

        IRRLog.objects.filter(data=data).delete()
        Data.objects.filter(pk=data_pk).update(irr_ind=False)
        RecycleBin.objects.create(data=data, timestamp=timezone.now())

        # remove any IRR log data
        irr_records = IRRLog.objects.filter(data=data)
        irr_records.delete()

    else:
        response["error"] = "Invalid credentials. Must be an admin."

    return Response(response)
def test_redis_serialize_data(test_project_data):
    datum = test_project_data.data_set.first()
    data_key = redis_serialize_data(datum)

    assert data_key == "data:" + str(datum.pk)