Exemple #1
0
def run():
    train_data, valid_data, test_data = load_data_subset(
        N_train, N_valid, N_test)
    kernel = make_sq_exp_kernel(L0)

    def loss_fun(transform, train_data, valid_data):
        train_data = augment_data(train_data, transform)
        return weighted_neighbors_loss(train_data, valid_data, kernel)

    loss_grad = batchwise_function(grad(loss_fun))
    loss_fun = batchwise_function(loss_fun)

    batch_idxs = BatchList(N_valid, batch_size)
    A = np.eye(N_pix)
    valid_losses = [loss_fun(A, train_data, valid_data)]
    test_losses = [loss_fun(A, train_data, test_data)]
    A += A_init_scale * npr.randn(N_pix, N_pix)
    for meta_iter in range(N_meta_iters):
        print "Iter {0} valid {1} test {2}".format(meta_iter, valid_losses[-1],
                                                   test_losses[-1])

        for idxs in batch_idxs:
            valid_batch = [x[idxs] for x in valid_data]
            d_A = loss_grad(A, train_data, valid_batch)
            A -= meta_alpha * (d_A + meta_L1 * np.sign(A))
        valid_losses.append(loss_fun(A, train_data, valid_data))
        test_losses.append(loss_fun(A, train_data, test_data))

    return A, valid_losses, test_losses
def run():
    train_data, valid_data, test_data = load_data_subset(N_train, N_valid, N_test)
    kernel = make_sq_exp_kernel(L0)
    def loss_fun(transform, train_data, valid_data):
        train_data = augment_data(train_data, transform)
        return weighted_neighbors_loss(train_data, valid_data, kernel)
    loss_grad = batchwise_function(grad(loss_fun))
    loss_fun  = batchwise_function(loss_fun)

    batch_idxs = BatchList(N_valid, batch_size)
    A = np.eye(N_pix)
    valid_losses = [loss_fun(A, train_data, valid_data)]
    test_losses  = [loss_fun(A, train_data,  test_data)]
    A += A_init_scale * npr.randn(N_pix, N_pix)
    for meta_iter in range(N_meta_iters):
        print "Iter {0} valid {1} test {2}".format(
            meta_iter, valid_losses[-1], test_losses[-1])

        for idxs in batch_idxs:
            valid_batch = [x[idxs] for x in valid_data]
            d_A        = loss_grad(A, train_data, valid_batch)
            A -= meta_alpha * (d_A + meta_L1 * np.sign(A))
        valid_losses.append(loss_fun(A, train_data, valid_data))
        test_losses.append( loss_fun(A, train_data, test_data))

    return A, valid_losses, test_losses
Exemple #3
0
def run():
    (train_images, train_labels), (val_images, val_labels), (test_images, test_labels) \
        = load_data_subset(N_train_data, N_val_data, N_test_data)

    batch_idxs = BatchList(N_train_data, batch_size)
    parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weights = parser.N

    hyperparser = WeightsParser()
    hyperparser.add_weights('log_L2_reg', (N_weights, ))
    metas = np.zeros(hyperparser.N)
    print "Number of hyperparameters to be trained:", hyperparser.N

    npr.seed(0)
    hyperparser.set(metas, 'log_L2_reg', log_L2_reg_scale + np.ones(N_weights))

    def indexed_loss_fun(x, meta_params, idxs):  # To be optimized by SGD.
        L2_reg = np.exp(hyperparser.get(meta_params, 'log_L2_reg'))
        return loss_fun(x,
                        X=train_images[idxs],
                        T=train_labels[idxs],
                        L2_reg=L2_reg)

    def meta_loss_fun(x, meta_params):  # To be optimized in the outer loop.
        L2_reg = np.exp(hyperparser.get(meta_params, 'log_L2_reg'))
        log_prior = -meta_L2_reg * np.dot(L2_reg.ravel(), L2_reg.ravel())
        return loss_fun(x, X=val_images, T=val_labels) - log_prior

    def test_loss_fun(x):  # To measure actual performance.
        return loss_fun(x, X=test_images, T=test_labels)

    log_alphas = np.full(N_iters, log_alpha_0)
    betas = np.full(N_iters, beta_0)

    v0 = npr.randn(N_weights) * velocity_scale
    x0 = npr.randn(N_weights) * np.exp(log_param_scale)

    output = []
    for i in range(N_meta_iter):
        results = sgd2(indexed_loss_fun, meta_loss_fun, batch_idxs, N_iters,
                       x0, v0, np.exp(log_alphas), betas, metas)

        learning_curve = results['learning_curve']
        validation_loss = results['M_final']
        test_loss = test_loss_fun(results['x_final'])
        output.append((learning_curve, validation_loss, test_loss,
                       parser.get(np.exp(hyperparser.get(metas, 'log_L2_reg')),
                                  (('weights', 0)))))
        metas -= results['dMd_meta'] * meta_stepsize
        print "Meta iteration {0} Valiation loss {1} Test loss {2}"\
            .format(i, validation_loss, test_loss)
    return output
Exemple #4
0
def run():
    (train_images, train_labels), (val_images, val_labels), (test_images, test_labels) \
        = load_data_subset(N_train_data, N_val_data, N_test_data)

    batch_idxs = BatchList(N_train_data, batch_size)
    parser, _, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weights = parser.N

    hyperparser = WeightsParser()
    hyperparser.add_weights('log_L2_reg', (N_weights,))
    metas = np.zeros(hyperparser.N)
    print "Number of hyperparameters to be trained:", hyperparser.N

    npr.seed(0)
    hyperparser.set(metas, 'log_L2_reg', log_L2_reg_scale + np.ones(N_weights))

    def indexed_loss_fun(x, meta_params, idxs):   # To be optimized by SGD.
        L2_reg=np.exp(hyperparser.get(meta_params, 'log_L2_reg'))
        return loss_fun(x, X=train_images[idxs], T=train_labels[idxs], L2_reg=L2_reg)
    def meta_loss_fun(x, meta_params):            # To be optimized in the outer loop.
        L2_reg=np.exp(hyperparser.get(meta_params, 'log_L2_reg'))
        log_prior = -meta_L2_reg * np.dot(L2_reg.ravel(), L2_reg.ravel())
        return loss_fun(x, X=val_images, T=val_labels) - log_prior
    def test_loss_fun(x):                         # To measure actual performance.
        return loss_fun(x, X=test_images, T=test_labels)

    log_alphas = np.full(N_iters, log_alpha_0)
    betas      = np.full(N_iters, beta_0)

    v0 = npr.randn(N_weights) * velocity_scale
    x0 = npr.randn(N_weights) * np.exp(log_param_scale)

    output = []
    for i in range(N_meta_iter):
        results = sgd2(indexed_loss_fun, meta_loss_fun, batch_idxs, N_iters,
                       x0, v0, np.exp(log_alphas), betas, metas)

        learning_curve = results['learning_curve']
        validation_loss = results['M_final']
        test_loss = test_loss_fun(results['x_final'])
        output.append((learning_curve, validation_loss, test_loss,
                       parser.get(np.exp(hyperparser.get(metas, 'log_L2_reg')), (('weights', 0)))))
        metas -= results['dMd_meta'] * meta_stepsize
        print "Meta iteration {0} Valiation loss {1} Test loss {2}"\
            .format(i, validation_loss, test_loss)
    return output
Exemple #5
0
def build_test_images():
    if os.path.isfile('test_images.pkl'):
        with open('test_images.pkl') as f:
            all_images = pickle.load(f)
    else:
        vert_stripes = np.zeros((L, L))
        vert_stripes[:, ::4] = 1.0
        vert_stripes[:, 1::4] = 1.0
        horz_stripes = np.zeros((L, L))
        horz_stripes[::4, :] = 1.0
        horz_stripes[1::4, :] = 1.0
        mnist_imgs = load_data_subset(4)[0][0]
        all_images = np.concatenate((vert_stripes.reshape(
            1, L * L), horz_stripes.reshape(1, L * L), mnist_imgs),
                                    axis=0)
        with open('test_images.pkl', 'w') as f:
            pickle.dump(all_images, f)

    return all_images
def build_test_images():
    if os.path.isfile('test_images.pkl'):
        with open('test_images.pkl') as f:
            all_images = pickle.load(f)
    else:
        vert_stripes = np.zeros((L, L))
        vert_stripes[:, ::4] = 1.0
        vert_stripes[:, 1::4] = 1.0
        horz_stripes = np.zeros((L, L))
        horz_stripes[::4, :] = 1.0
        horz_stripes[1::4, :] = 1.0
        mnist_imgs = load_data_subset(4)[0][0]
        all_images = np.concatenate((vert_stripes.reshape(1, L * L),
                                     horz_stripes.reshape(1, L * L),
                                     mnist_imgs), axis=0)
        with open('test_images.pkl', 'w') as f:
            pickle.dump(all_images, f)

    return all_images
Exemple #7
0
def run():
    (train_images, train_labels),\
    (valid_images, valid_labels),\
    (tests_images, tests_labels) = load_data_subset(N_train, N_valid, N_tests)
    batch_idxs = BatchList(N_train, batch_size)
    N_iters = N_epochs * len(batch_idxs)
    parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weight_types = len(parser.names)
    hyperparams = VectorParser()
    hyperparams['log_L2_reg'] = np.full(N_weight_types, init_log_L2_reg)
    hyperparams['log_param_scale'] = np.full(N_weight_types,
                                             init_log_param_scale)
    hyperparams['log_alphas'] = np.full(N_iters, init_log_alphas)
    hyperparams['invlogit_betas'] = np.full(N_iters, init_invlogit_betas)

    def indexed_loss_fun(w, log_L2_reg, i):
        idxs = batch_idxs[i % len(batch_idxs)]
        partial_vects = [
            np.full(parser[name].size, np.exp(log_L2_reg[i]))
            for i, name in enumerate(parser.names)
        ]
        L2_reg_vect = np.concatenate(partial_vects, axis=0)
        return loss_fun(w,
                        X=train_images[idxs],
                        T=train_labels[idxs],
                        L2_reg=L2_reg_vect)

    def train_loss_fun(w, log_L2_reg=0.0):
        return loss_fun(w, X=train_images, T=train_labels)

    def valid_loss_fun(w, log_L2_reg=0.0):
        return loss_fun(w, X=valid_images, T=valid_labels)

    def tests_loss_fun(w, log_L2_reg=0.0):
        return loss_fun(w, X=tests_images, T=tests_labels)

    all_learning_curves = []
    all_x = []

    def hyperloss_grad(hyperparam_vect, i):
        learning_curve = []

        def callback(x, i):
            if i % len(batch_idxs) == 0:
                learning_curve.append(
                    loss_fun(x, X=train_images, T=train_labels))

        npr.seed(i)
        N_weights = parser.vect.size
        V0 = np.zeros(N_weights)

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        layer_param_scale = [
            np.full(parser[name].size,
                    np.exp(cur_hyperparams['log_param_scale'][i]))
            for i, name in enumerate(parser.names)
        ]
        W0 = npr.randn(N_weights) * np.concatenate(layer_param_scale, axis=0)
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas = logit(cur_hyperparams['invlogit_betas'])
        log_L2_reg = cur_hyperparams['log_L2_reg']
        results = sgd3(indexed_loss_fun,
                       valid_loss_fun,
                       W0,
                       V0,
                       alphas,
                       betas,
                       log_L2_reg,
                       callback=callback)
        hypergrads = hyperparams.copy()
        hypergrads['log_L2_reg'] = results['dMd_meta']
        weights_grad = parser.new_vect(W0 * results['dMd_x'])
        hypergrads['log_param_scale'] = [
            np.sum(weights_grad[name]) for name in parser.names
        ]
        hypergrads['log_alphas'] = results['dMd_alphas'] * alphas
        hypergrads['invlogit_betas'] = (
            results['dMd_betas'] * d_logit(cur_hyperparams['invlogit_betas']))
        all_x.append(results['x_final'])
        all_learning_curves.append(learning_curve)
        return hypergrads.vect

    add_fields = ['train_loss', 'valid_loss', 'tests_loss']
    meta_results = {field: [] for field in add_fields + hyperparams.names}

    def meta_callback(hyperparam_vect, i):
        print "Meta iter {0}".format(i)
        x = all_x[-1]
        cur_hyperparams = hyperparams.new_vect(hyperparam_vect.copy())
        log_L2_reg = cur_hyperparams['log_L2_reg']
        for field in cur_hyperparams.names:
            meta_results[field].append(cur_hyperparams[field])

        meta_results['train_loss'].append(train_loss_fun(x))
        meta_results['valid_loss'].append(valid_loss_fun(x))
        meta_results['tests_loss'].append(tests_loss_fun(x))

    final_result = rms_prop(hyperloss_grad, hyperparams.vect, meta_callback,
                            N_meta_iter, meta_alpha)
    meta_results['all_learning_curves'] = all_learning_curves
    parser.vect = None  # No need to pickle zeros
    return meta_results, parser
def run():
    (train_images, train_labels),\
    (valid_images, valid_labels),\
    (tests_images, tests_labels) = load_data_subset(N_train, N_valid, N_tests)
    batch_idxs = BatchList(N_train, batch_size)
    N_iters = N_epochs * len(batch_idxs)
    parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weight_types = len(parser.names)
    hyperparams = VectorParser()
    hyperparams['log_L2_reg']      = np.full(N_weight_types, init_log_L2_reg)
    hyperparams['log_param_scale'] = np.full(N_weight_types, init_log_param_scale)
    hyperparams['log_alphas']      = np.full(N_iters, init_log_alphas)
    hyperparams['invlogit_betas']  = np.full(N_iters, init_invlogit_betas)

    def indexed_loss_fun(w, log_L2_reg, i):
        idxs = batch_idxs[i % len(batch_idxs)]
        partial_vects = [np.full(parser[name].size, np.exp(log_L2_reg[i]))
                         for i, name in enumerate(parser.names)]
        L2_reg_vect = np.concatenate(partial_vects, axis=0)
        return loss_fun(w, X=train_images[idxs], T=train_labels[idxs], L2_reg=L2_reg_vect)

    def train_loss_fun(w, log_L2_reg=0.0):
        return loss_fun(w, X=train_images, T=train_labels)

    def valid_loss_fun(w, log_L2_reg=0.0):
        return loss_fun(w, X=valid_images, T=valid_labels)

    def tests_loss_fun(w, log_L2_reg=0.0):
        return loss_fun(w, X=tests_images, T=tests_labels)

    all_learning_curves = []
    all_x = []

    def hyperloss(hyperparam_vect, i):
        learning_curve = []
        def callback(x, i):
            if i % len(batch_idxs) == 0:
                learning_curve.append(loss_fun(x, X=train_images, T=train_labels))

        npr.seed(i)
        N_weights = parser.vect.size
        V0 = np.zeros(N_weights)
        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        layer_param_scale = [np.full(parser[name].size, 
                                     np.exp(cur_hyperparams['log_param_scale'][i]))
                             for i, name in enumerate(parser.names)]
        W0 = npr.randn(N_weights) * np.concatenate(layer_param_scale, axis=0)
        alphas     = np.exp(cur_hyperparams['log_alphas'])
        betas      =  logit(cur_hyperparams['invlogit_betas'])
        log_L2_reg =        cur_hyperparams['log_L2_reg']
        W_opt = sgd5(grad(indexed_loss_fun), kylist(W0, alphas, betas, log_L2_reg), callback)
        all_x.append(getval(W_opt))
        all_learning_curves.append(learning_curve)
        return valid_loss_fun(W_opt)

    hyperloss_grad = grad(hyperloss)

    add_fields = ['train_loss', 'valid_loss', 'tests_loss']
    meta_results = {field : [] for field in add_fields + hyperparams.names}
    def meta_callback(hyperparam_vect, i):
        x = all_x[-1]
        cur_hyperparams = hyperparams.new_vect(hyperparam_vect.copy())
        log_L2_reg = cur_hyperparams['log_L2_reg']
        for field in cur_hyperparams.names:
            meta_results[field].append(cur_hyperparams[field])

        meta_results['train_loss'].append(train_loss_fun(x))
        meta_results['valid_loss'].append(valid_loss_fun(x))
        meta_results['tests_loss'].append(tests_loss_fun(x))

    final_result = rms_prop(hyperloss_grad, hyperparams.vect, meta_callback, N_meta_iter, meta_alpha)
    meta_results['all_learning_curves'] = all_learning_curves
    parser.vect = None # No need to pickle zeros
    return meta_results, parser
Exemple #9
0
def run():
    (train_images, train_labels),\
    (valid_images, valid_labels),\
    (tests_images, tests_labels) = load_data_subset(N_train, N_valid, N_tests)
    batch_idxs = BatchList(N_train, batch_size)
    N_iters = N_epochs * len(batch_idxs)
    parser, pred_fun, loss_fun, frac_err = make_nn_funs(layer_sizes)
    N_weight_types = len(parser.names)
    hyperparams = VectorParser()
    hyperparams['log_L2_reg']      = np.full(N_weight_types, init_log_L2_reg)
    hyperparams['log_param_scale'] = np.full(N_weight_types, init_log_param_scale)
    hyperparams['log_alphas']      = np.full(N_iters, init_log_alphas)
    hyperparams['invlogit_betas']  = np.full(N_iters, init_invlogit_betas)

    def train_loss_fun(w, log_L2_reg=0.0):
        return loss_fun(w, X=train_images, T=train_labels)

    def valid_loss_fun(w, log_L2_reg=0.0):
        return loss_fun(w, X=valid_images, T=valid_labels)

    def tests_loss_fun(w, log_L2_reg=0.0):
        return loss_fun(w, X=tests_images, T=tests_labels)

    all_learning_curves = []
    all_x = []
    def hyperloss_grad(hyperparam_vect, ii):
        learning_curve = []
        def callback(x, i):
            if i % len(batch_idxs) == 0:
                learning_curve.append(loss_fun(x, X=train_images, T=train_labels))

        def indexed_loss_fun(w, log_L2_reg, j):
            # idxs = batch_idxs[i % len(batch_idxs)]
            npr.seed(1000 * ii + j)
            idxs = npr.randint(N_train, size=len(batch_idxs))
            partial_vects = [np.full(parser[name].size, np.exp(log_L2_reg[i]))
                             for i, name in enumerate(parser.names)]
            L2_reg_vect = np.concatenate(partial_vects, axis=0)
            return loss_fun(w, X=train_images[idxs], T=train_labels[idxs], L2_reg=L2_reg_vect)

        npr.seed(ii)
        N_weights = parser.vect.size
        V0 = np.zeros(N_weights)

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        layer_param_scale = [np.full(parser[name].size, 
                                     np.exp(cur_hyperparams['log_param_scale'][i]))
                             for i, name in enumerate(parser.names)]
        W0 = npr.randn(N_weights) * np.concatenate(layer_param_scale, axis=0)
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas = logit(cur_hyperparams['invlogit_betas'])
        log_L2_reg = cur_hyperparams['log_L2_reg']
        results = sgd3(indexed_loss_fun, valid_loss_fun, W0, V0,
                       alphas, betas, log_L2_reg, callback=callback)
        hypergrads = hyperparams.copy()
        hypergrads['log_L2_reg']      = results['dMd_meta']
        weights_grad = parser.new_vect(W0 * results['dMd_x'])
        hypergrads['log_param_scale'] = [np.sum(weights_grad[name])
                                         for name in parser.names]
        hypergrads['log_alphas']      = results['dMd_alphas'] * alphas
        hypergrads['invlogit_betas']  = (results['dMd_betas'] *
                                         d_logit(cur_hyperparams['invlogit_betas']))
        all_x.append(results['x_final'])
        all_learning_curves.append(learning_curve)
        return hypergrads.vect

    add_fields = ['train_loss', 'valid_loss', 'tests_loss', 'iter_num']
    meta_results = {field : [] for field in add_fields + hyperparams.names}
    def meta_callback(hyperparam_vect, i):
        if i % N_meta_thin == 0:
            print "Meta iter {0}".format(i)
            x = all_x[-1]
            cur_hyperparams = hyperparams.new_vect(hyperparam_vect.copy())
            log_L2_reg = cur_hyperparams['log_L2_reg']
            for field in cur_hyperparams.names:
                meta_results[field].append(cur_hyperparams[field])

            meta_results['train_loss'].append(train_loss_fun(x))
            meta_results['valid_loss'].append(valid_loss_fun(x))
            meta_results['tests_loss'].append(tests_loss_fun(x))
            meta_results['iter_num'].append(i)

    final_result = rms_prop(hyperloss_grad, hyperparams.vect,
                            meta_callback, N_meta_iter, meta_alpha, meta_gamma)
    meta_results['all_learning_curves'] = all_learning_curves
    parser.vect = None # No need to pickle zeros
    return meta_results, parser