Beispiel #1
0
def train_run(n_examples, n_features, decisions_in, rseed=1):
    # -- need y for margin computation
    X, y = toyproblem.digits_xy(0, n_examples)
    assert len(y) == n_examples
    config = dict(
        n_examples_train=n_examples,
        n_examples_test=0,
        n_folds=5,
        feat_spec=dict(seed=rseed,
            n_features=n_features,
            scale=2.2),
        decisions=decisions_in, # gets clobbered by ctrl attachment
        save_svms=False,      # good to test False sometimes
        svm_l2_regularization=1e-3,
        svm_max_observations=1e5,
        )
    ctrl = Ctrl(None)
    BI = toyproblem.BoostableDigits()
    result = BI.evaluate(config, ctrl)
    decisions = np.asarray(result['decisions'])
    assert decisions.shape == (5, n_examples)
    print decisions.shape
    print 'mean abs decisions', abs(decisions).mean(),
    print 'mean margins', 1 - np.minimum(decisions * y, 1).mean(),
    tr_acc, te_acc = mean_acc(result)
    print 'train_accuracy', tr_acc,
    print 'test_accuracy', te_acc,
    print ''
    return decisions, tr_acc, te_acc
Beispiel #2
0
def test_boosting_margin_goes_down():
    n_examples = 1750
    X, y = toyproblem.digits_xy(0, n_examples)
    n_rounds = 8
    margins = []
    decisions = None
    for round_ii in range(n_rounds):
        config = dict(
            n_examples_train=n_examples,
            n_examples_test=0,
            n_folds=5,
            feat_spec=dict(seed=round_ii, n_features=16, scale=2.2),
            decisions=decisions,
            save_svms=False,       # good to test False sometimes
            svm_l2_regularization=1e-3,
            svm_max_observations=1e3,
            )
        ctrl = Ctrl(None)
        BI = toyproblem.BoostableDigits()
        result = BI.evaluate(config, ctrl)
        decisions = np.asarray(result['decisions'])
        assert decisions.shape == (5, 1750)
        print 'mean abs decisions', abs(decisions).mean(),
        margins.append(1 - np.minimum(decisions * y, 1).mean())
        for key in 'train_accuracy', 'test_accuracy':
            print key, np.mean([rr[key] for rr in result['splits']]),
        print ''
    print margins
    print list(reversed(margins))
    print list(sorted(margins))
    assert list(reversed(margins)) == list(sorted(margins))
Beispiel #3
0
def test_boosting_for_smoke():
    n_examples = 1790
    X, y = toyproblem.digits_xy(0, n_examples)
    assert len(y) == n_examples

    n_rounds = 16
    n_features_per_round = 16

    print 'Training jointly'
    _, joint_tr_acc, joint_te_acc = train_run(
            n_examples,
            n_rounds * n_features_per_round,
            None)

    print 'Training one round'
    _, one_tr_acc, one_te_acc = train_run(
            n_examples,
            n_features_per_round,
            None)

    # -- train in rounds
    print 'Training in rounds'
    decisions = None
    for round_ii in range(n_rounds):
        decisions, tr_acc, te_acc = train_run(
                n_examples,
                n_features_per_round,
                decisions,
                rseed=round_ii)

    # assert that round-training and joint training are both way better than
    # training just one
    assert joint_tr_acc > 95
    assert joint_te_acc > 88
    assert one_tr_acc < 72
    assert one_te_acc < 72
    assert tr_acc > 90
    assert te_acc > 88