Beispiel #1
0
def run():
    train_images, train_labels, _, _, _ = load_data()
    train_images = train_images[:N_data, :]
    train_labels = train_labels[:N_data, :]
    batch_idxs = BatchList(N_data, batch_size)
    iter_per_epoch = len(batch_idxs)
    parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes,
                                                 L2_reg,
                                                 return_parser=True)
    N_weights = parser.N

    def indexed_loss_fun(w, idxs):
        return loss_fun(w, X=train_images[idxs], T=train_labels[idxs])

    log_alphas = np.full(N_iters, log_alpha_0)
    betas = np.full(N_iters, beta_0)
    npr.seed(2)
    V0 = npr.randn(N_weights) * velocity_scale
    #W0 = npr.randn(N_weights) * np.exp(log_param_scale)
    X_uniform = npr.rand(
        N_weights)  # Weights are uniform passed through an inverse cdf.
    bindict = {
        k: np.linspace(-1, 1, N_bins) *
        np.exp(log_param_scale)  # Different cdf per layer.
        for k, v in parser.idxs_and_shapes.iteritems()
    }
    output = []
    for i in range(N_meta_iter):
        print "Meta iteration {0}".format(i)
        #X0, dX_dbins = bininvcdf(W_uniform, bins)
        X0 = np.zeros(N_weights)
        dX_dbins = {}
        for k, cur_bins in bindict.iteritems():
            cur_slice, cur_shape = parser.idxs_and_shapes[k]
            cur_xs = X_uniform[cur_slice]
            cur_X0, cur_dX_dbins = bininvcdf(cur_xs, cur_bins)
            X0[cur_slice] = cur_X0
            dX_dbins[k] = cur_dX_dbins
        results = sgd(indexed_loss_fun,
                      batch_idxs,
                      N_iters,
                      X0,
                      V0,
                      np.exp(log_alphas),
                      betas,
                      record_learning_curve=True)
        dL_dx = results['d_x']

        learning_curve = results['learning_curve']
        output.append((learning_curve, bindict))

        # Update bins with one gradient step.
        for k, bins in bindict.iteritems():
            dL_dbins = np.dot(parser.get(dL_dx, k).flatten(), dX_dbins[k])
            bins = bins - dL_dbins * bin_stepsize
            bins[[0, -1]] = bins[[0, -1]] - dL_dbins[[0, 1]] * bin_stepsize
            bins.sort()  # Sort in place.
            bindict[k] = bins

    return output
Beispiel #2
0
def run(oiter):
    # ----- Variable for this run -----
    log_alpha_0 = all_log_alpha_0[oiter]

    print "Running job {0} on {1}".format(oiter + 1, socket.gethostname())
    train_images, train_labels, _, _, _ = load_data()
    train_images = train_images[:N_data, :]
    train_labels = train_labels[:N_data, :]
    batch_idxs = BatchList(N_data, batch_size)
    iter_per_epoch = len(batch_idxs)
    N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg)

    def indexed_loss_fun(w, idxs):
        return loss_fun(w, X=train_images[idxs], T=train_labels[idxs])

    V0 = npr.randn(N_weights) * velocity_scale
    losses = []
    d_losses = []
    alpha_0 = np.exp(log_alpha_0)
    for N_iters in all_N_iters:
        alphas = np.full(N_iters, alpha_0)
        betas = np.full(N_iters, beta_0)
        npr.seed(1)
        W0 = npr.randn(N_weights) * np.exp(log_param_scale)
        results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, alphas,
                      betas)
        losses.append(results['loss_final'])
        d_losses.append(d_log_loss(alpha_0, results['d_alphas']))

    return losses, d_losses
Beispiel #3
0
def run():
    train_images, train_labels, _, _, _ = load_data()
    train_images = train_images[:N_data, :]
    train_labels = train_labels[:N_data, :]
    batch_idxs = BatchList(N_data, batch_size)

    parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weights = len(parser.vect)

    def indexed_loss_fun(w, idxs):
        return loss_fun(w,
                        X=train_images[idxs],
                        T=train_labels[idxs],
                        L2_reg=L2_reg)

    losses = []
    d_losses = []
    for log_alpha_0 in all_log_alpha_0:
        npr.seed(0)
        V0 = npr.randn(N_weights) * velocity_scale
        alpha_0 = np.exp(log_alpha_0)
        alphas = np.full(N_iters, alpha_0)
        betas = np.full(N_iters, beta_0)
        W0 = npr.randn(N_weights) * np.exp(log_param_scale)
        results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, alphas,
                      betas)
        losses.append(results['loss_final'])
        d_losses.append(d_log_loss(alpha_0, results['d_alphas']))

    return losses, d_losses
Beispiel #4
0
def run(oiter):
    # ----- Variable for this run -----
    log_alpha_0 = all_log_alpha_0[oiter]

    print "Running job {0} on {1}".format(oiter + 1, socket.gethostname())
    train_images, train_labels, _, _, _ = load_data()
    train_images = train_images[:N_data, :]
    train_labels = train_labels[:N_data, :]
    batch_idxs = BatchList(N_data, batch_size)
    iter_per_epoch = len(batch_idxs)
    N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg)
    def indexed_loss_fun(w, idxs):
        return loss_fun(w, X=train_images[idxs], T=train_labels[idxs])

    V0 = npr.randn(N_weights) * velocity_scale
    losses = []
    d_losses = []
    alpha_0 = np.exp(log_alpha_0)
    for N_iters in all_N_iters:
        alphas = np.full(N_iters, alpha_0)
        betas = np.full(N_iters, beta_0)
        npr.seed(1)
        W0 = npr.randn(N_weights) * np.exp(log_param_scale)
        results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, alphas, betas)
        losses.append(results['loss_final'])
        d_losses.append(d_log_loss(alpha_0, results['d_alphas']))

    return losses, d_losses
Beispiel #5
0
def run():
    train_images, train_labels, _, _, _ = load_data(normalize=True)
    train_images = train_images[:N_real_data, :]
    train_labels = train_labels[:N_real_data, :]
    batch_idxs = BatchList(N_fake_data, batch_size)
    parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg, return_parser=True)
    N_weights = parser.N

    fake_data = npr.randn(*(train_images[:N_fake_data, :].shape)) * init_fake_data_scale
    fake_labels = one_hot(np.array(range(N_fake_data)) % N_classes, N_classes)  # One of each.

    def indexed_loss_fun(x, meta_params, idxs):   # To be optimized by SGD.
        return loss_fun(x, X=meta_params[idxs], T=fake_labels[idxs])
    def meta_loss_fun(x):                         # To be optimized in the outer loop.
        return loss_fun(x, X=train_images, T=train_labels)
    log_alphas = np.full(N_iters, log_alpha_0)
    betas      = np.full(N_iters, beta_0)
    npr.seed(0)
    v0 = npr.randn(N_weights) * velocity_scale
    x0 = npr.randn(N_weights) * np.exp(log_param_scale)

    output = []
    for i in range(N_meta_iter):
        results = sgd2(indexed_loss_fun, meta_loss_fun, batch_idxs, N_iters,
                       x0, v0, np.exp(log_alphas), betas, fake_data)
        learning_curve = results['learning_curve']
        validation_loss = results['M_final']
        output.append((learning_curve, validation_loss, fake_data))
        fake_data -= results['dMd_meta'] * data_stepsize   # Update data with one gradient step.
        print "Meta iteration {0} Valiation loss {1}".format(i, validation_loss)
    return output
Beispiel #6
0
def run():
    train_images, train_labels, _, _, _ = load_data()
    train_images = train_images[:N_data, :]
    train_labels = train_labels[:N_data, :]
    batch_idxs = BatchList(N_data, batch_size)
    iter_per_epoch = len(batch_idxs)
    N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg)
    def indexed_loss_fun(w, idxs):
        return loss_fun(w, X=train_images[idxs], T=train_labels[idxs])

    log_alphas = np.full(N_iters, log_alpha_0)
    betas      = np.full(N_iters, beta_0)
    npr.seed(1)
    V0 = npr.randn(N_weights) * velocity_scale
    W0 = npr.randn(N_weights) * np.exp(log_param_scale)
    output = []
    for i in range(N_meta_iter):
        print "Meta iteration {0}".format(i)
        results = sgd(indexed_loss_fun, batch_idxs, N_iters,
                      W0, V0, np.exp(log_alphas), betas, record_learning_curve=True)
        learning_curve = results['learning_curve']
        d_log_alphas = np.exp(log_alphas) * results['d_alphas']
        output.append((learning_curve, log_alphas, d_log_alphas))
        log_alphas = log_alphas - meta_alpha * d_log_alphas

    return output
Beispiel #7
0
def run():
    train_images, train_labels, _, _, _ = load_data()
    train_images = train_images[:N_data, :]
    train_labels = train_labels[:N_data, :]
    batch_idxs = BatchList(N_data, batch_size)

    parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weights = len(parser.vect)
    def indexed_loss_fun(w, idxs):
        return loss_fun(w, X=train_images[idxs], T=train_labels[idxs], L2_reg=L2_reg)

    losses = []
    d_losses = []
    for log_alpha_0 in log_stepsizes:
        npr.seed(0)
        V0 = npr.randn(N_weights) * velocity_scale
        alpha_0 = np.exp(log_alpha_0)
        alphas = np.full(N_iters, alpha_0)
        betas = np.full(N_iters, beta_0)
        W0 = npr.randn(N_weights) * np.exp(log_param_scale)
        results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, alphas, betas)
        losses.append(results['loss_final'])
        d_losses.append(d_log_loss(alpha_0, results['d_alphas']))

    return losses, d_losses
Beispiel #8
0
def run():
    train_images, train_labels, _, _, _ = load_data()
    train_images = train_images[:N_data, :]
    train_labels = train_labels[:N_data, :]
    batch_idxs = BatchList(N_data, batch_size)
    iter_per_epoch = len(batch_idxs)
    N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg)

    def indexed_loss_fun(w, idxs):
        return loss_fun(w, X=train_images[idxs], T=train_labels[idxs])

    log_alphas = np.full(N_iters, log_alpha_0)
    betas = np.full(N_iters, beta_0)
    npr.seed(1)
    V0 = npr.randn(N_weights) * velocity_scale
    W0 = npr.randn(N_weights) * np.exp(log_param_scale)
    output = []
    for i in range(N_meta_iter):
        print "Meta iteration {0}".format(i)
        results = sgd(indexed_loss_fun,
                      batch_idxs,
                      N_iters,
                      W0,
                      V0,
                      np.exp(log_alphas),
                      betas,
                      record_learning_curve=True)
        learning_curve = results['learning_curve']
        d_log_alphas = np.exp(log_alphas) * results['d_alphas']
        output.append((learning_curve, log_alphas, d_log_alphas))
        log_alphas = log_alphas - meta_alpha * step_smooth(
            d_log_alphas, iter_per_epoch)

    return output
Beispiel #9
0
def run():
    train_images, train_labels, _, _, _ = load_data()
    train_images = train_images[:N_data, :]
    train_labels = train_labels[:N_data, :]
    batch_idxs = BatchList(N_data, batch_size)
    iter_per_epoch = len(batch_idxs)
    N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg)
    def indexed_loss_fun(w, idxs):
        return loss_fun(w, X=train_images[idxs], T=train_labels[idxs])

    V0 = npr.randn(N_weights) * velocity_scale
    losses = []
    d_losses = []
    for N_iters in all_N_iters:
        alphas = np.full(N_iters, alpha_0)
        betas = np.full(N_iters, beta_0)
        loss_curve = []
        d_loss_curve = []
        for log_param_scale in all_log_param_scale:
            print "log_param_scale {0}, N_iters {1}".format(log_param_scale, N_iters)
            npr.seed(1)
            W0 = npr.randn(N_weights) * np.exp(log_param_scale)
            results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, alphas, betas)
            loss_curve.append(results['loss_final'])
            d_loss_curve.append(d_log_loss(W0, results['d_x']))
        losses.append(loss_curve)
        d_losses.append(d_loss_curve)

    with open('results.pkl', 'w') as f:
        pickle.dump((losses, d_losses), f)
Beispiel #10
0
def run():
    train_images, train_labels, _, _, _ = load_data()
    train_images = train_images[:N_data, :]
    train_labels = train_labels[:N_data, :]
    batch_idxs = BatchList(N_data, batch_size)
    iter_per_epoch = len(batch_idxs)
    N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg)
    def indexed_loss_fun(w, idxs):
        return loss_fun(w, X=train_images[idxs], T=train_labels[idxs])

    log_alphas = np.full(N_iters, log_alpha_0)
    betas      = np.full(N_iters, beta_0)
    npr.seed(2)
    V0 = npr.randn(N_weights) * velocity_scale
    #W0 = npr.randn(N_weights) * np.exp(log_param_scale)
    bins = np.linspace(-1,1,N_bins) * np.exp(log_param_scale)
    W_uniform = npr.rand(N_weights)
    output = []
    for i in range(N_meta_iter):
        print "Meta iteration {0}".format(i)
        W0, dW_dbins = bininvcdf(W_uniform, bins)
        results = sgd(indexed_loss_fun, batch_idxs, N_iters,
                      W0, V0, np.exp(log_alphas), betas, record_learning_curve=True)
        dL_dx = results['d_x']
        dL_dbins = np.dot(dL_dx, dW_dbins)
        learning_curve = results['learning_curve']
        output.append((learning_curve, bins))
        bins = bins - dL_dbins * bin_stepsize
        bins[[0,-1]] = bins[[0,-1]] - dL_dbins[[0,1]] * bin_stepsize
        bins.sort()  # Sort in place.

    return output
Beispiel #11
0
def run():
    train_images, train_labels, _, _, _ = load_data()
    train_images = train_images[:N_data, :]
    train_labels = train_labels[:N_data, :]
    batch_idxs = BatchList(N_data, batch_size)
    iter_per_epoch = len(batch_idxs)
    N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg)
    def indexed_loss_fun(w, idxs):
        return loss_fun(w, X=train_images[idxs], T=train_labels[idxs])

    V0 = npr.randn(N_weights) * velocity_scale
    losses = []
    d_losses = []
    for N_iters in all_N_iters:
        alphas = np.full(N_iters, alpha_0)
        betas = np.full(N_iters, beta_0)
        loss_curve = []
        d_loss_curve = []
        for log_param_scale in all_log_param_scale:
            print "log_param_scale {0}, N_iters {1}".format(log_param_scale, N_iters)
            npr.seed(1)
            W0 = npr.randn(N_weights) * np.exp(log_param_scale)
            results = sgd(indexed_loss_fun, batch_idxs, N_iters, W0, V0, alphas, betas)
            loss_curve.append(results['loss_final'])
            d_loss_curve.append(d_log_loss(W0, results['d_x']))
        losses.append(loss_curve)
        d_losses.append(d_loss_curve)

    with open('results.pkl', 'w') as f:
        pickle.dump((losses, d_losses), f)
Beispiel #12
0
def run():
    val_images, val_labels, test_images, test_labels, _ = load_data(
        normalize=True)
    val_images = val_images[:N_val_data, :]
    val_labels = val_labels[:N_val_data, :]
    truedatasize = np.std(val_images)

    test_images = test_images[:N_test_data, :]
    test_labels = test_labels[:N_test_data, :]
    batch_idxs = BatchList(N_fake_data, batch_size)
    parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes,
                                                 L2_reg,
                                                 return_parser=True)
    N_weights = parser.N

    fake_data = npr.randn(
        *(val_images[:N_fake_data, :].shape)) * init_fake_data_scale
    fake_labels = one_hot(np.array(range(N_fake_data)) % N_classes,
                          N_classes)  # One of each.

    def indexed_loss_fun(x, meta_params, idxs):  # To be optimized by SGD.
        return loss_fun(x, X=meta_params[idxs], T=fake_labels[idxs])

    def meta_loss_fun(x, meta_params):  # To be optimized in the outer loop.
        log_prior = -fake_data_L2_reg * np.dot(meta_params.ravel(),
                                               meta_params.ravel())
        return loss_fun(x, X=val_images, T=val_labels) - log_prior

    def test_loss_fun(x):  # To measure actual performance.
        return loss_fun(x, X=test_images, T=test_labels)

    log_alphas = np.full(N_iters, log_alpha_0)
    betas = np.full(N_iters, beta_0)
    npr.seed(0)
    v0 = npr.randn(N_weights) * velocity_scale
    x0 = npr.randn(N_weights) * np.exp(log_param_scale)

    output = []
    for i in range(N_meta_iter):
        results = sgd2(indexed_loss_fun, meta_loss_fun, batch_idxs, N_iters,
                       x0, v0, np.exp(log_alphas), betas, fake_data)
        learning_curve = results['learning_curve']
        validation_loss = results['M_final']
        fakedatasize = np.std(fake_data) / truedatasize
        test_loss = test_loss_fun(results['x_final'])
        output.append((learning_curve, validation_loss, test_loss, fake_data,
                       fakedatasize))
        fake_data -= results[
            'dMd_meta'] * data_stepsize  # Update data with one gradient step.
        print "Meta iteration {0} Valiation loss {1} Test loss {2}"\
            .format(i, validation_loss, test_loss)
    return output
Beispiel #13
0
def run():
    train_images, train_labels, _, _, _ = load_data(normalize=True)
    train_images = train_images[:N_data, :]
    train_labels = train_labels[:N_data, :]
    batch_idxs = BatchList(N_data, batch_size)
    iter_per_epoch = len(batch_idxs)
    parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg, return_parser=True)
    N_weights = parser.N
    def indexed_loss_fun(w, idxs):
        return loss_fun(w, X=train_images[idxs], T=train_labels[idxs])
    log_alphas = np.full(N_iters, log_alpha_0)
    betas      = np.full(N_iters, beta_0)
    npr.seed(2)
    V0 = npr.randn(N_weights) * velocity_scale
    #W0 = npr.randn(N_weights) * np.exp(log_param_scale)

    bindict = {k : np.linspace(-1,1,N_bins) * np.exp(log_param_scale)  # Different cdf per layer.
                   for k, v in parser.idxs_and_shapes.iteritems()}
    output = []
    for i in range(N_meta_iter):
        print "Meta iteration {0}".format(i)
        #X0, dX_dbins = bininvcdf(W_uniform, bins)
        X_uniform = npr.rand(N_weights)  # Weights are uniform passed through an inverse cdf.
        X0 = np.zeros(N_weights)
        dX_dbins = {}
        for k, cur_bins in bindict.iteritems():
            cur_slice, cur_shape = parser.idxs_and_shapes[k]
            cur_xs = X_uniform[cur_slice]
            cur_X0, cur_dX_dbins = bininvcdf(cur_xs, cur_bins)
            X0[cur_slice] = cur_X0
            dX_dbins[k] = cur_dX_dbins
        results = sgd(indexed_loss_fun, batch_idxs, N_iters,
                      X0, V0, np.exp(log_alphas), betas, record_learning_curve=True)
        dL_dx = results['d_x']

        learning_curve = results['learning_curve']
        output.append((learning_curve, bindict))

        # Update bins with one gradient step.
        for k, bins in bindict.iteritems():
            dL_dbins = np.dot(parser.get(dL_dx, k).flatten(), dX_dbins[k])
            bins = bins - dL_dbins * bin_stepsize
            bins[[0,-1]] = bins[[0,-1]] - dL_dbins[[0,1]] * bin_stepsize
            bindict[k] = np.sort(bins)
        bindict = bindict.copy()

    return output
Beispiel #14
0
def run():
    train_images, train_labels, _, _, _ = load_data(normalize=True)
    train_images = train_images[:N_real_data, :]
    train_labels = train_labels[:N_real_data, :]
    batch_idxs = BatchList(N_fake_data, batch_size)
    parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes,
                                                 L2_reg,
                                                 return_parser=True)
    N_weights = parser.N

    #fake_data = npr.randn(*(train_images[:N_fake_data, :].shape))
    fake_data = np.zeros(train_images[:N_fake_data, :].shape)
    one_hot = lambda x, K: np.array(x[:, None] == np.arange(K)[None, :],
                                    dtype=int)
    fake_labels = one_hot(np.array(range(0, 10)), 10)  # One of each label.

    def indexed_loss_fun(x, meta_params, idxs):  # To be optimized by SGD.
        return loss_fun(x, X=meta_params[idxs], T=fake_labels[idxs])

    def meta_loss_fun(x):  # To be optimized in the outer loop.
        return loss_fun(x, X=train_images, T=train_labels)

    log_alphas = np.full(N_iters, log_alpha_0)
    betas = np.full(N_iters, beta_0)
    npr.seed(0)
    v0 = npr.randn(N_weights) * velocity_scale
    x0 = npr.randn(N_weights) * np.exp(log_param_scale)

    output = []
    for i in range(N_meta_iter):
        print "Meta iteration {0}".format(i)
        results = sgd2(indexed_loss_fun, meta_loss_fun, batch_idxs, N_iters,
                       x0, v0, np.exp(log_alphas), betas, fake_data)

        learning_curve = results['learning_curve']
        output.append((learning_curve, fake_data))
        fake_data -= results[
            'dMd_meta'] * data_stepsize  # Update data with one gradient step.

    return output
Beispiel #15
0
def run():
    train_images, train_labels, _, _, _ = load_data()
    train_images = train_images[:N_data, :]
    train_labels = train_labels[:N_data, :]
    batch_idxs = BatchList(N_data, batch_size)
    iter_per_epoch = len(batch_idxs)
    N_weights, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg)

    def indexed_loss_fun(w, idxs):
        return loss_fun(w, X=train_images[idxs], T=train_labels[idxs])

    log_alphas = np.full(N_iters, log_alpha_0)
    betas = np.full(N_iters, beta_0)
    npr.seed(2)
    V0 = npr.randn(N_weights) * velocity_scale
    #W0 = npr.randn(N_weights) * np.exp(log_param_scale)
    bins = np.linspace(-1, 1, N_bins) * np.exp(log_param_scale)
    W_uniform = npr.rand(N_weights)
    output = []
    for i in range(N_meta_iter):
        print "Meta iteration {0}".format(i)
        W0, dW_dbins = bininvcdf(W_uniform, bins)
        results = sgd(indexed_loss_fun,
                      batch_idxs,
                      N_iters,
                      W0,
                      V0,
                      np.exp(log_alphas),
                      betas,
                      record_learning_curve=True)
        dL_dx = results['d_x']
        dL_dbins = np.dot(dL_dx, dW_dbins)
        learning_curve = results['learning_curve']
        output.append((learning_curve, bins))
        bins = bins - dL_dbins * bin_stepsize
        bins[[0, -1]] = bins[[0, -1]] - dL_dbins[[0, 1]] * bin_stepsize
        bins.sort()  # Sort in place.

    return output
Beispiel #16
0
def run():
    train_images, train_labels, _, _, _ = load_data(normalize=True)
    train_images = train_images[:N_real_data, :]
    train_labels = train_labels[:N_real_data, :]
    batch_idxs = BatchList(N_fake_data, batch_size)
    parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes, L2_reg, return_parser=True)
    N_weights = parser.N

    # fake_data = npr.randn(*(train_images[:N_fake_data, :].shape))
    fake_data = np.zeros(train_images[:N_fake_data, :].shape)
    one_hot = lambda x, K: np.array(x[:, None] == np.arange(K)[None, :], dtype=int)
    fake_labels = one_hot(np.array(range(0, 10)), 10)  # One of each label.

    def indexed_loss_fun(x, meta_params, idxs):  # To be optimized by SGD.
        return loss_fun(x, X=meta_params[idxs], T=fake_labels[idxs])

    def meta_loss_fun(x):  # To be optimized in the outer loop.
        return loss_fun(x, X=train_images, T=train_labels)

    log_alphas = np.full(N_iters, log_alpha_0)
    betas = np.full(N_iters, beta_0)
    npr.seed(0)
    v0 = npr.randn(N_weights) * velocity_scale
    x0 = npr.randn(N_weights) * np.exp(log_param_scale)

    output = []
    for i in range(N_meta_iter):
        print "Meta iteration {0}".format(i)
        results = sgd2(
            indexed_loss_fun, meta_loss_fun, batch_idxs, N_iters, x0, v0, np.exp(log_alphas), betas, fake_data
        )

        learning_curve = results["learning_curve"]
        output.append((learning_curve, fake_data))
        fake_data -= results["dMd_meta"] * data_stepsize  # Update data with one gradient step.

    return output
Beispiel #17
0
def run():
    val_images, val_labels, test_images, test_labels, _ = load_data(
        normalize=True)
    val_images = val_images[:N_val_data, :]
    val_labels = val_labels[:N_val_data, :]
    true_data_scale = np.std(val_images)

    test_images = test_images[:N_test_data, :]
    test_labels = test_labels[:N_test_data, :]
    batch_idxs = BatchList(N_fake_data, batch_size)
    parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weights = len(parser.vect)

    npr.seed(0)
    init_fake_data = npr.randn(
        *(val_images[:N_fake_data, :].shape)) * init_fake_data_scale
    one_hot = lambda x, K: np.array(x[:, None] == np.arange(K)[None, :],
                                    dtype=int)
    fake_labels = one_hot(np.array(range(N_fake_data)) % N_classes,
                          N_classes)  # One of each.

    hyperparser = WeightsParser()
    hyperparser.add_weights('log_L2_reg', (1, ))
    hyperparser.add_weights('fake_data', init_fake_data.shape)
    metas = np.zeros(hyperparser.N)
    print "Number of hyperparameters to be trained:", hyperparser.N
    hyperparser.set(metas, 'log_L2_reg', init_log_L2_reg)
    hyperparser.set(metas, 'fake_data', init_fake_data)

    def indexed_loss_fun(x, meta_params, idxs):  # To be optimized by SGD.
        L2_reg = np.exp(hyperparser.get(meta_params, 'log_L2_reg')[0])
        fake_data = hyperparser.get(meta_params, 'fake_data')
        return loss_fun(x,
                        X=fake_data[idxs],
                        T=fake_labels[idxs],
                        L2_reg=L2_reg)

    def meta_loss_fun(x, meta_params):  # To be optimized in the outer loop.
        fake_data = hyperparser.get(meta_params, 'fake_data')
        log_prior = -fake_data_L2_reg * np.dot(fake_data.ravel(),
                                               fake_data.ravel())
        return loss_fun(x, X=val_images, T=val_labels) - log_prior

    def test_loss_fun(x):  # To measure actual performance.
        return loss_fun(x, X=test_images, T=test_labels)

    log_alphas = np.full(N_iters, log_alpha_0)
    betas = np.full(N_iters, beta_0)

    output = []
    velocity = np.zeros(hyperparser.N)
    for i in range(N_meta_iter):
        print "L2 reg is ", np.exp(hyperparser.get(metas,
                                                   'log_L2_reg')[0]), "| ",

        npr.seed(0)
        v0 = npr.randn(N_weights) * velocity_scale
        x0 = npr.randn(N_weights) * np.exp(log_param_scale)

        results = sgd2(indexed_loss_fun, meta_loss_fun, batch_idxs, N_iters,
                       x0, v0, np.exp(log_alphas), betas, metas)

        learning_curve = results['learning_curve']
        validation_loss = results['M_final']
        test_err = frac_err(results['x_final'], test_images, test_labels)
        fake_data_scale = np.std(hyperparser.get(
            metas, 'fake_data')) / true_data_scale
        test_loss = test_loss_fun(results['x_final'])
        output.append(
            (learning_curve, validation_loss, test_loss, fake_data_scale,
             np.exp(hyperparser.get(metas, 'log_L2_reg')[0]), test_err))

        # Do meta-SGD with momentum
        g = results['dMd_meta']
        velocity = meta_momentum * velocity - (1.0 - meta_momentum) * g
        metas += velocity * meta_stepsize
        print "Meta iteration {0} Validation loss {1} Test loss {2} Test err {3}"\
            .format(i, validation_loss, test_loss, test_err)
    return output, hyperparser.get(metas, 'fake_data')
Beispiel #18
0
def run():
    val_images, val_labels, test_images, test_labels, _ = load_data(normalize=True)
    val_images = val_images[:N_val_data, :]
    val_labels = val_labels[:N_val_data, :]
    true_data_scale = np.std(val_images)

    test_images = test_images[:N_test_data, :]
    test_labels = test_labels[:N_test_data, :]
    batch_idxs = BatchList(N_fake_data, batch_size)
    parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weights = len(parser.vect)

    npr.seed(0)
    init_fake_data = npr.randn(*(val_images[:N_fake_data, :].shape)) * init_fake_data_scale
    one_hot = lambda x, K : np.array(x[:,None] == np.arange(K)[None, :], dtype=int)
    fake_labels = one_hot(np.array(range(N_fake_data)) % N_classes, N_classes)  # One of each.

    hyperparser = WeightsParser()
    hyperparser.add_weights('log_L2_reg', (1,))
    hyperparser.add_weights('fake_data', init_fake_data.shape)
    metas = np.zeros(hyperparser.N)
    print "Number of hyperparameters to be trained:", hyperparser.N
    hyperparser.set(metas, 'log_L2_reg', init_log_L2_reg)
    hyperparser.set(metas, 'fake_data', init_fake_data)

    def indexed_loss_fun(x, meta_params, idxs):   # To be optimized by SGD.
        L2_reg=np.exp(hyperparser.get(meta_params, 'log_L2_reg')[0])
        fake_data=hyperparser.get(meta_params, 'fake_data')
        return loss_fun(x, X=fake_data[idxs], T=fake_labels[idxs], L2_reg=L2_reg)
    def meta_loss_fun(x, meta_params):            # To be optimized in the outer loop.
        fake_data=hyperparser.get(meta_params, 'fake_data')
        log_prior = -fake_data_L2_reg * np.dot(fake_data.ravel(), fake_data.ravel())
        return loss_fun(x, X=val_images, T=val_labels) - log_prior
    def test_loss_fun(x):                         # To measure actual performance.
        return loss_fun(x, X=test_images, T=test_labels)

    log_alphas = np.full(N_iters, log_alpha_0)
    betas      = np.full(N_iters, beta_0)

    output = []
    for i in range(N_meta_iter):
        print "L2 reg is ", np.exp(hyperparser.get(metas, 'log_L2_reg')[0]), "| ",

        npr.seed(0)
        v0 = npr.randn(N_weights) * velocity_scale
        x0 = npr.randn(N_weights) * np.exp(log_param_scale)

        results = sgd2(indexed_loss_fun, meta_loss_fun, batch_idxs, N_iters,
                       x0, v0, np.exp(log_alphas), betas, metas)

        learning_curve = results['learning_curve']
        validation_loss = results['M_final']
        test_err = frac_err(results['x_final'], test_images, test_labels)
        fake_data_scale = np.std(hyperparser.get(metas, 'fake_data')) / true_data_scale
        test_loss = test_loss_fun(results['x_final'])
        output.append((learning_curve, validation_loss, test_loss, fake_data_scale,
                       np.exp(hyperparser.get(metas, 'log_L2_reg')[0]), test_err))

        metas -= results['dMd_meta'] * meta_stepsize
        print "Meta iteration {0} Validation loss {1} Test loss {2} Test err {3}"\
            .format(i, validation_loss, test_loss, test_err)
    return output, hyperparser.get(metas, 'fake_data')