コード例 #1
0
def run():
    (train_images, train_labels), (val_images, val_labels), (test_images, test_labels) \
        = load_data_subset(N_train_data, N_val_data, N_test_data)

    batch_idxs = BatchList(N_train_data, batch_size)
    parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weights = parser.N

    hyperparser = WeightsParser()
    hyperparser.add_weights('log_L2_reg', (N_weights, ))
    metas = np.zeros(hyperparser.N)
    print "Number of hyperparameters to be trained:", hyperparser.N

    npr.seed(0)
    hyperparser.set(metas, 'log_L2_reg', log_L2_reg_scale + np.ones(N_weights))

    def indexed_loss_fun(x, meta_params, idxs):  # To be optimized by SGD.
        L2_reg = np.exp(hyperparser.get(meta_params, 'log_L2_reg'))
        return loss_fun(x,
                        X=train_images[idxs],
                        T=train_labels[idxs],
                        L2_reg=L2_reg)

    def meta_loss_fun(x, meta_params):  # To be optimized in the outer loop.
        L2_reg = np.exp(hyperparser.get(meta_params, 'log_L2_reg'))
        log_prior = -meta_L2_reg * np.dot(L2_reg.ravel(), L2_reg.ravel())
        return loss_fun(x, X=val_images, T=val_labels) - log_prior

    def test_loss_fun(x):  # To measure actual performance.
        return loss_fun(x, X=test_images, T=test_labels)

    log_alphas = np.full(N_iters, log_alpha_0)
    betas = np.full(N_iters, beta_0)

    v0 = npr.randn(N_weights) * velocity_scale
    x0 = npr.randn(N_weights) * np.exp(log_param_scale)

    output = []
    for i in range(N_meta_iter):
        results = sgd2(indexed_loss_fun, meta_loss_fun, batch_idxs, N_iters,
                       x0, v0, np.exp(log_alphas), betas, metas)

        learning_curve = results['learning_curve']
        validation_loss = results['M_final']
        test_loss = test_loss_fun(results['x_final'])
        weightparser = parser.new_vect(results['x_final'])
        l2parser = parser.new_vect(np.exp(hyperparser.get(metas,
                                                          'log_L2_reg')))
        output.append((learning_curve, validation_loss, test_loss,
                       weightparser[('weights', 0)], l2parser[('weights', 0)]))
        metas -= results['dMd_meta'] * meta_stepsize
        print "Meta iteration {0} Valiation loss {1} Test loss {2}"\
            .format(i, validation_loss, test_loss)
    return output
コード例 #2
0
def run():
    val_images, val_labels, test_images, test_labels, _ = load_data(
        normalize=True)
    val_images = val_images[:N_val_data, :]
    val_labels = val_labels[:N_val_data, :]
    true_data_scale = np.std(val_images)

    test_images = test_images[:N_test_data, :]
    test_labels = test_labels[:N_test_data, :]
    batch_idxs = BatchList(N_fake_data, batch_size)
    parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weights = len(parser.vect)

    npr.seed(0)
    init_fake_data = npr.randn(
        *(val_images[:N_fake_data, :].shape)) * init_fake_data_scale
    one_hot = lambda x, K: np.array(x[:, None] == np.arange(K)[None, :],
                                    dtype=int)
    fake_labels = one_hot(np.array(range(N_fake_data)) % N_classes,
                          N_classes)  # One of each.

    hyperparser = WeightsParser()
    hyperparser.add_weights('log_L2_reg', (1, ))
    hyperparser.add_weights('fake_data', init_fake_data.shape)
    metas = np.zeros(hyperparser.N)
    print "Number of hyperparameters to be trained:", hyperparser.N
    hyperparser.set(metas, 'log_L2_reg', init_log_L2_reg)
    hyperparser.set(metas, 'fake_data', init_fake_data)

    def indexed_loss_fun(x, meta_params, idxs):  # To be optimized by SGD.
        L2_reg = np.exp(hyperparser.get(meta_params, 'log_L2_reg')[0])
        fake_data = hyperparser.get(meta_params, 'fake_data')
        return loss_fun(x,
                        X=fake_data[idxs],
                        T=fake_labels[idxs],
                        L2_reg=L2_reg)

    def meta_loss_fun(x, meta_params):  # To be optimized in the outer loop.
        fake_data = hyperparser.get(meta_params, 'fake_data')
        log_prior = -fake_data_L2_reg * np.dot(fake_data.ravel(),
                                               fake_data.ravel())
        return loss_fun(x, X=val_images, T=val_labels) - log_prior

    def test_loss_fun(x):  # To measure actual performance.
        return loss_fun(x, X=test_images, T=test_labels)

    log_alphas = np.full(N_iters, log_alpha_0)
    betas = np.full(N_iters, beta_0)

    output = []
    velocity = np.zeros(hyperparser.N)
    for i in range(N_meta_iter):
        print "L2 reg is ", np.exp(hyperparser.get(metas,
                                                   'log_L2_reg')[0]), "| ",

        npr.seed(0)
        v0 = npr.randn(N_weights) * velocity_scale
        x0 = npr.randn(N_weights) * np.exp(log_param_scale)

        results = sgd2(indexed_loss_fun, meta_loss_fun, batch_idxs, N_iters,
                       x0, v0, np.exp(log_alphas), betas, metas)

        learning_curve = results['learning_curve']
        validation_loss = results['M_final']
        test_err = frac_err(results['x_final'], test_images, test_labels)
        fake_data_scale = np.std(hyperparser.get(
            metas, 'fake_data')) / true_data_scale
        test_loss = test_loss_fun(results['x_final'])
        output.append(
            (learning_curve, validation_loss, test_loss, fake_data_scale,
             np.exp(hyperparser.get(metas, 'log_L2_reg')[0]), test_err))

        # Do meta-SGD with momentum
        g = results['dMd_meta']
        velocity = meta_momentum * velocity - (1.0 - meta_momentum) * g
        metas += velocity * meta_stepsize
        print "Meta iteration {0} Validation loss {1} Test loss {2} Test err {3}"\
            .format(i, validation_loss, test_loss, test_err)
    return output, hyperparser.get(metas, 'fake_data')