def test_compare_predictors(hang_plot=False):
    from sklearn.svm import SVC
    dataset = get_synthetic_clusters_dataset()

    w_constructor = lambda rng=np.random.RandomState(45): .1 * rng.randn(
        dataset.input_shape[0], dataset.n_categories)
    records = compare_predictors(
        dataset=dataset,
        offline_predictors={'SVM': SVC()},
        online_predictors={
            'fast-perceptron':
            Perceptron(alpha=0.1, w=w_constructor()).to_categorical(),
            'slow-perceptron':
            Perceptron(alpha=0.001, w=w_constructor()).to_categorical()
        },
        minibatch_size=10,
        test_epochs=sqrtspace(0, 10, 20),
        evaluation_function='percent_correct')

    assert 99 < records['SVM'].get_scores('Test') <= 100
    assert 20 < records['slow-perceptron'].get_scores(
        'Test')[0] < 40 and 95 < records['slow-perceptron'].get_scores(
            'Test')[-1] <= 100
    assert 20 < records['fast-perceptron'].get_scores(
        'Test')[0] < 40 and 98 < records['fast-perceptron'].get_scores(
            'Test')[-1] <= 100

    plot_learning_curves(records, hang=hang_plot)
def test_compare_predictors(hang_plot = False):
    from sklearn.svm import SVC
    dataset = get_synthetic_clusters_dataset()

    w_constructor = lambda rng = np.random.RandomState(45): .1*rng.randn(dataset.input_shape[0], dataset.n_categories)
    records = compare_predictors(
        dataset = dataset,
        offline_predictors={
            'SVM': SVC()
            },
        online_predictors={
            'fast-perceptron': Perceptron(alpha = 0.1, w = w_constructor()).to_categorical(),
            'slow-perceptron': Perceptron(alpha = 0.001, w = w_constructor()).to_categorical()
            },
        minibatch_size = 10,
        test_epochs = sqrtspace(0, 10, 20),
        evaluation_function='percent_correct'
        )

    assert 99 < records['SVM'].get_scores('Test') <= 100
    assert 20 < records['slow-perceptron'].get_scores('Test')[0] < 40 and 95 < records['slow-perceptron'].get_scores('Test')[-1] <= 100
    assert 20 < records['fast-perceptron'].get_scores('Test')[0] < 40 and 98 < records['fast-perceptron'].get_scores('Test')[-1] <= 100

    plot_learning_curves(records, hang = hang_plot)
def compare_predictors(dataset, online_predictors={}, offline_predictors={}, minibatch_size = 'full',
        evaluation_function = 'mse', test_epochs = sqrtspace(0, 1, 10), report_test_scores = True,
        test_on = 'training+test', test_batch_size = None, accumulators = None, online_test_callbacks = {}):
    """
    DEPRECATED: use train_and_test_online_predictor instead.

    Compare a set of predictors by running them on a dataset, and return the learning curves for each predictor.

    :param dataset: A DataSet object
    :param online_predictors: A dict<str:IPredictor> of online predictors.  An online predictor is
        sequentially fed minibatches of data and updates its parameters with each minibatch.
    :param offline_predictors: A dict<str:object> of offline predictors.  Offline predictors obey sklearn's
        Estimator/Predictor interfaces - ie they methods
            estimator = object.fit(data, targets) and
            prediction = object.predict(data)
    :param minibatch_size: Size of the minibatches to use for online predictors.  Can be:
        An int, in which case it represents the minibatch size for all classifiers.
        A dict<str: int>, in which case you can set the minibatch size per-classifier.
        In place of the int, you can put 'all' if you want to train on the whole dataset in each iteration.
    :param test_epochs: Test points to use for online predictors.  Can be:
        A list of integers - in which case the classifier is after seeing this many samples.
        A list of floats - in which case the classifier is tested after seeing this many epochs.
        'always' - In which case a test is performed after every training step
        The final test point determines the end of training.
    :param evaluation_function: Function used to evaluate output of predictors
    :param report_test_scores: Boolean indicating whether you'd like to report results online.
    :param test_on: 'training', 'test', 'training+test'
    :param test_batch_size: When the test set is too large to process in one step, use this to break it
        up into chunks.
    :param accumulators: A dict<str: accum_fcn>, where accum_fcn is a stateful-function of the form:
        accmulated_output = accum_fcn(this_output)
        Special case: accum_fcn can be 'avg' to make a running average.
    :param online_test_callbacks: A dict<str: fcn> where fcn is a callback that takes an online
        predictor as an argument.  Useful for logging/plotting/debugging progress during training.
    :return: An OrderedDict<LearningCurveData>
    """

    all_keys = online_predictors.keys()+offline_predictors.keys()
    assert len(all_keys) > 0, 'You have to give at least one predictor.  Is that too much to ask?'
    assert len(all_keys) == len(np.unique(all_keys)), "You have multiple predictors using the same names. Change that."
    type_constructor_dict = OrderedDict(
        [(k, ('offline', offline_predictors[k])) for k in sorted(offline_predictors.keys())] +
        [(k, ('online', online_predictors[k])) for k in sorted(online_predictors.keys())]
        )

    minibatch_size = _pack_into_dict(minibatch_size, expected_keys=online_predictors.keys())
    accumulators = _pack_into_dict(accumulators, expected_keys=online_predictors.keys())
    online_test_callbacks = _pack_into_dict(online_test_callbacks, expected_keys=online_predictors.keys(), allow_subset=True)
    test_epochs = np.array(test_epochs)
    if isinstance(evaluation_function, str):
        evaluation_function = get_evaluation_function(evaluation_function)

    records = OrderedDict()

    # Run the offline predictors
    for predictor_name, (predictor_type, predictor) in type_constructor_dict.items():
        print('%s\nRunning predictor %s\n%s' % ('='*20, predictor_name, '-'*20))
        records[predictor_name] = \
            assess_offline_predictor(
                predictor=predictor,
                dataset = dataset,
                evaluation_function = evaluation_function,
                report_test_scores = report_test_scores,
                test_on = test_on,
                test_batch_size = test_batch_size
                ) if predictor_type == 'offline' else \
            assess_online_predictor(
                predictor=predictor,
                dataset = dataset,
                evaluation_function = evaluation_function,
                test_epochs = test_epochs,
                accumulator = accumulators[predictor_name],
                minibatch_size = minibatch_size[predictor_name],
                report_test_scores = report_test_scores,
                test_on = test_on,
                test_batch_size = test_batch_size,
                test_callback=online_test_callbacks[predictor_name] if predictor_name in online_test_callbacks else None
                ) if predictor_type == 'online' else \
            bad_value(predictor_type)

    print('Done!')

    return records
def compare_predictors(dataset,
                       online_predictors={},
                       offline_predictors={},
                       minibatch_size='full',
                       evaluation_function='mse',
                       test_epochs=sqrtspace(0, 1, 10),
                       report_test_scores=True,
                       test_on='training+test',
                       test_batch_size=None,
                       accumulators=None,
                       online_test_callbacks={}):
    """
    DEPRECATED: use train_and_test_online_predictor instead.

    Compare a set of predictors by running them on a dataset, and return the learning curves for each predictor.

    :param dataset: A DataSet object
    :param online_predictors: A dict<str:IPredictor> of online predictors.  An online predictor is
        sequentially fed minibatches of data and updates its parameters with each minibatch.
    :param offline_predictors: A dict<str:object> of offline predictors.  Offline predictors obey sklearn's
        Estimator/Predictor interfaces - ie they methods
            estimator = object.fit(data, targets) and
            prediction = object.predict(data)
    :param minibatch_size: Size of the minibatches to use for online predictors.  Can be:
        An int, in which case it represents the minibatch size for all classifiers.
        A dict<str: int>, in which case you can set the minibatch size per-classifier.
        In place of the int, you can put 'all' if you want to train on the whole dataset in each iteration.
    :param test_epochs: Test points to use for online predictors.  Can be:
        A list of integers - in which case the classifier is after seeing this many samples.
        A list of floats - in which case the classifier is tested after seeing this many epochs.
        'always' - In which case a test is performed after every training step
        The final test point determines the end of training.
    :param evaluation_function: Function used to evaluate output of predictors
    :param report_test_scores: Boolean indicating whether you'd like to report results online.
    :param test_on: 'training', 'test', 'training+test'
    :param test_batch_size: When the test set is too large to process in one step, use this to break it
        up into chunks.
    :param accumulators: A dict<str: accum_fcn>, where accum_fcn is a stateful-function of the form:
        accmulated_output = accum_fcn(this_output)
        Special case: accum_fcn can be 'avg' to make a running average.
    :param online_test_callbacks: A dict<str: fcn> where fcn is a callback that takes an online
        predictor as an argument.  Useful for logging/plotting/debugging progress during training.
    :return: An OrderedDict<LearningCurveData>
    """

    all_keys = online_predictors.keys() + offline_predictors.keys()
    assert len(
        all_keys
    ) > 0, 'You have to give at least one predictor.  Is that too much to ask?'
    assert len(all_keys) == len(
        np.unique(all_keys)
    ), "You have multiple predictors using the same names. Change that."
    type_constructor_dict = OrderedDict(
        [(k, ('offline', offline_predictors[k]))
         for k in sorted(offline_predictors.keys())] +
        [(k, ('online', online_predictors[k]))
         for k in sorted(online_predictors.keys())])

    minibatch_size = _pack_into_dict(minibatch_size,
                                     expected_keys=online_predictors.keys())
    accumulators = _pack_into_dict(accumulators,
                                   expected_keys=online_predictors.keys())
    online_test_callbacks = _pack_into_dict(
        online_test_callbacks,
        expected_keys=online_predictors.keys(),
        allow_subset=True)
    test_epochs = np.array(test_epochs)
    if isinstance(evaluation_function, str):
        evaluation_function = get_evaluation_function(evaluation_function)

    records = OrderedDict()

    # Run the offline predictors
    for predictor_name, (predictor_type,
                         predictor) in type_constructor_dict.items():
        print('%s\nRunning predictor %s\n%s' %
              ('=' * 20, predictor_name, '-' * 20))
        records[predictor_name] = \
            assess_offline_predictor(
                predictor=predictor,
                dataset = dataset,
                evaluation_function = evaluation_function,
                report_test_scores = report_test_scores,
                test_on = test_on,
                test_batch_size = test_batch_size
                ) if predictor_type == 'offline' else \
            assess_online_predictor(
                predictor=predictor,
                dataset = dataset,
                evaluation_function = evaluation_function,
                test_epochs = test_epochs,
                accumulator = accumulators[predictor_name],
                minibatch_size = minibatch_size[predictor_name],
                report_test_scores = report_test_scores,
                test_on = test_on,
                test_batch_size = test_batch_size,
                test_callback=online_test_callbacks[predictor_name] if predictor_name in online_test_callbacks else None
                ) if predictor_type == 'online' else \
            bad_value(predictor_type)

    print('Done!')

    return records