Beispiel #1
0
def select_and_label(dataset: Dataset,
                     model: Model,
                     topk: int,
                     choice_fn: Callable) -> None:  # IDK what return type should be...
    """
    Selects data points from dataset according to criterion and updates the model.

    Parameters
    ==========
    dataset : Dataset
        Dataset of predictions and observations.
    model : Model
        Bayesian assessment model.
    choice_fn : Callable
        Function used to identify the next class to be labeled.
    """
    # Initialize outputs

    # Shuffle the dataset and enqueue queries
    dataset.shuffle()
    queues = dataset.enqueue()

    n_samples = len(dataset)

    mpe = np.zeros((n_samples // LOG_FREQ, dataset.num_classes))
    confusion_log = np.zeros((n_samples // LOG_FREQ, dataset.num_classes, dataset.num_classes))

    # Run experiment
    i = 0
    while i < n_samples:
        sample = model.sample()
        choices = choice_fn(sample)

        candidates = [choice for choice in choices if len(queues[choice]) > 0]
        if len(candidates) < topk:
            topk = 1

        for idx in range(topk):
            choice = candidates[idx]
            observation = queues[choice].pop()
            model.update(choice, observation)

            i += 1
            if not i % LOG_FREQ:
                index = i // LOG_FREQ - 1
                mpe[index] = model.mpe()
                confusion_log[index] = model.confusion_matrix()

    # In case we're one short
    mpe[-1] = model.mpe()

    return mpe, confusion_log