def assess_online_predictor(predictor,
                            dataset,
                            evaluation_function,
                            test_epochs,
                            minibatch_size,
                            test_on='training+test',
                            accumulator=None,
                            report_test_scores=True,
                            test_batch_size=None,
                            test_callback=None):
    """
    DEPRECATED: use assess_prediction_functions_on_generator in train_and_test.py

    Train an online predictor and return the LearningCurveData.

    :param predictor:  An IPredictor object
    :param dataset: A DataSet object
    :param evaluation_function: A function of the form: score=fcn(actual_values, target_values)
    :param test_epochs: List of epochs to test at.  Eg. [0.5, 1, 2, 4]
    :param minibatch_size: Number of samples per minibatch, or:
        'full' to do full-batch.
        'stretch': to stretch the size of each batch so that we make just one call to "train" between each test.  Use
            this, for instance, if your predictor trains on one sample at a time in sequence anyway.
    :param report_test_scores: Print out the test scores as they're computed (T/F)
    :param test_callback: A callback which takes the predictor, and is called every time a test
        is done.  This can be useful for plotting/debugging the state.
    :return: LearningCurveData containing the score on the test sets

    """
    # TODO: Remove this class, as it is deprecated

    record = LearningCurveData()

    testing_sets = dataset_to_testing_sets(dataset, test_on)
    if accumulator is None:
        prediction_functions = {k: predictor.predict for k in testing_sets}
    else:
        accum_constructor = {'avg': RunningAverage}[accumulator] \
            if isinstance(accumulator, str) else accumulator
        accumulators = {k: accum_constructor() for k in testing_sets}
        prediction_functions = {
            k: lambda inp, kp=k: accumulators[kp](predictor.predict(inp))
            for k in testing_sets
        }
        # Bewate the in-loop lambda - but I think we're ok here.

    if isinstance(evaluation_function, str):
        evaluation_function = get_evaluation_function(evaluation_function)

    def do_test(current_epoch):
        scores = [(k,
                   evaluation_function(
                       process_in_batches(prediction_functions[k], x,
                                          test_batch_size), y))
                  for k, (x, y) in testing_sets.iteritems()]
        if report_test_scores:
            print 'Scores at Epoch %s: %s, after %.2fs' % (
                current_epoch, ', '.join(
                    '%s: %.3f' % (set_name, score)
                    for set_name, score in scores), time.time() - start_time)
        record.add(current_epoch, scores)
        if test_callback is not None:
            record.add(current_epoch, ('callback', test_callback(predictor)))

    start_time = time.time()
    if minibatch_size == 'stretch':
        test_samples = (np.array(test_epochs) *
                        dataset.training_set.n_samples).astype(int)
        i = 0
        if test_samples[0] == 0:
            do_test(i)
            i += 1
        for indices in checkpoint_minibatch_index_generator(
                n_samples=dataset.training_set.n_samples,
                checkpoints=test_samples,
                slice_when_possible=True):
            predictor.train(dataset.training_set.input[indices],
                            dataset.training_set.target[indices])
            do_test(test_epochs[i])
            i += 1
    else:
        checker = CheckPointCounter(test_epochs)
        last_n_samples_seen = 0
        for (n_samples_seen, input_minibatch, target_minibatch) in \
                dataset.training_set.minibatch_iterator(minibatch_size = minibatch_size, epochs = float('inf'), single_channel = True):
            current_epoch = (
                float(last_n_samples_seen)) / dataset.training_set.n_samples
            last_n_samples_seen = n_samples_seen
            time_for_a_test, done = checker.check(current_epoch)
            if time_for_a_test:
                do_test(current_epoch)
            if done:
                break
            predictor.train(input_minibatch, target_minibatch)

    return record
def compare_predictors(dataset,
                       online_predictors={},
                       offline_predictors={},
                       minibatch_size='full',
                       evaluation_function='mse',
                       test_epochs=sqrtspace(0, 1, 10),
                       report_test_scores=True,
                       test_on='training+test',
                       test_batch_size=None,
                       accumulators=None,
                       online_test_callbacks={}):
    """
    DEPRECATED: use train_and_test_online_predictor instead.

    Compare a set of predictors by running them on a dataset, and return the learning curves for each predictor.

    :param dataset: A DataSet object
    :param online_predictors: A dict<str:IPredictor> of online predictors.  An online predictor is
        sequentially fed minibatches of data and updates its parameters with each minibatch.
    :param offline_predictors: A dict<str:object> of offline predictors.  Offline predictors obey sklearn's
        Estimator/Predictor interfaces - ie they methods
            estimator = object.fit(data, targets) and
            prediction = object.predict(data)
    :param minibatch_size: Size of the minibatches to use for online predictors.  Can be:
        An int, in which case it represents the minibatch size for all classifiers.
        A dict<str: int>, in which case you can set the minibatch size per-classifier.
        In place of the int, you can put 'all' if you want to train on the whole dataset in each iteration.
    :param test_epochs: Test points to use for online predictors.  Can be:
        A list of integers - in which case the classifier is after seeing this many samples.
        A list of floats - in which case the classifier is tested after seeing this many epochs.
        'always' - In which case a test is performed after every training step
        The final test point determines the end of training.
    :param evaluation_function: Function used to evaluate output of predictors
    :param report_test_scores: Boolean indicating whether you'd like to report results online.
    :param test_on: 'training', 'test', 'training+test'
    :param test_batch_size: When the test set is too large to process in one step, use this to break it
        up into chunks.
    :param accumulators: A dict<str: accum_fcn>, where accum_fcn is a stateful-function of the form:
        accmulated_output = accum_fcn(this_output)
        Special case: accum_fcn can be 'avg' to make a running average.
    :param online_test_callbacks: A dict<str: fcn> where fcn is a callback that takes an online
        predictor as an argument.  Useful for logging/plotting/debugging progress during training.
    :return: An OrderedDict<LearningCurveData>
    """

    all_keys = online_predictors.keys() + offline_predictors.keys()
    assert len(
        all_keys
    ) > 0, 'You have to give at least one predictor.  Is that too much to ask?'
    assert len(all_keys) == len(
        np.unique(all_keys)
    ), "You have multiple predictors using the same names. Change that."
    type_constructor_dict = OrderedDict(
        [(k, ('offline', offline_predictors[k]))
         for k in sorted(offline_predictors.keys())] +
        [(k, ('online', online_predictors[k]))
         for k in sorted(online_predictors.keys())])

    minibatch_size = _pack_into_dict(minibatch_size,
                                     expected_keys=online_predictors.keys())
    accumulators = _pack_into_dict(accumulators,
                                   expected_keys=online_predictors.keys())
    online_test_callbacks = _pack_into_dict(
        online_test_callbacks,
        expected_keys=online_predictors.keys(),
        allow_subset=True)
    test_epochs = np.array(test_epochs)
    if isinstance(evaluation_function, str):
        evaluation_function = get_evaluation_function(evaluation_function)

    records = OrderedDict()

    # Run the offline predictors
    for predictor_name, (predictor_type,
                         predictor) in type_constructor_dict.iteritems():
        print '%s\nRunning predictor %s\n%s' % ('=' * 20, predictor_name,
                                                '-' * 20)
        records[predictor_name] = \
            assess_offline_predictor(
                predictor=predictor,
                dataset = dataset,
                evaluation_function = evaluation_function,
                report_test_scores = report_test_scores,
                test_on = test_on,
                test_batch_size = test_batch_size
                ) if predictor_type == 'offline' else \
            assess_online_predictor(
                predictor=predictor,
                dataset = dataset,
                evaluation_function = evaluation_function,
                test_epochs = test_epochs,
                accumulator = accumulators[predictor_name],
                minibatch_size = minibatch_size[predictor_name],
                report_test_scores = report_test_scores,
                test_on = test_on,
                test_batch_size = test_batch_size,
                test_callback=online_test_callbacks[predictor_name] if predictor_name in online_test_callbacks else None
                ) if predictor_type == 'online' else \
            bad_value(predictor_type)

    print 'Done!'

    return records