Beispiel #1
0
def test_sgd3():
    N_weights = 5
    W0 = 0.1 * npr.randn(N_weights)
    V0 = 0.1 * npr.randn(N_weights)
    N_data = 12
    batch_size = 4
    num_epochs = 3
    batch_idxs = BatchList(N_data, batch_size)
    alphas = 0.1 * npr.rand(len(batch_idxs) * num_epochs)
    betas = 0.5 + 0.2 * npr.rand(len(batch_idxs) * num_epochs)
    meta = 0.1 * npr.randn(N_weights * 2)

    A = npr.randn(N_data, N_weights)

    def loss_fun(W, meta, i=None):
        idxs = batch_idxs.all_idxs if i is None else batch_idxs[
            i % len(batch_idxs)]
        sub_A = A[idxs, :]
        return np.dot(
            np.dot(W + meta[:N_weights] + meta[N_weights:],
                   np.dot(sub_A.T, sub_A)), W)

    def meta_loss_fun(w, meta):
        return np.dot(w, w) + np.dot(meta, meta)

    def full_loss(W0, V0, alphas, betas, meta):
        result = sgd3(loss_fun, meta_loss_fun, W0, V0, alphas, betas, meta)
        return loss_fun(result['x_final'], meta)

    def meta_loss(W0, V0, alphas, betas, meta):
        result = sgd3(loss_fun, meta_loss_fun, W0, V0, alphas, betas, meta)
        return meta_loss_fun(result['x_final'], meta)

    result = sgd3(loss_fun, meta_loss_fun, W0, V0, alphas, betas, meta)
    d_an = (result['dMd_x'], result['dMd_v'], result['dMd_alphas'],
            result['dMd_betas'], result['dMd_meta'])
    d_num = nd(meta_loss, W0, V0, alphas, betas, meta)
    for i, (an, num) in enumerate(zip(d_an, d_num)):
        assert np.allclose(an, num, rtol=1e-3, atol=1e-4), \
            "Type {0}, diffs are: {1}".format(i, an - num)

    result = sgd3(loss_fun, loss_fun, W0, V0, alphas, betas, meta)
    d_an = (result['dMd_x'], result['dMd_v'], result['dMd_alphas'],
            result['dMd_betas'], result['dMd_meta'])
    d_num = nd(full_loss, W0, V0, alphas, betas, meta)
    for i, (an, num) in enumerate(zip(d_an, d_num)):
        assert np.allclose(an, num, rtol=1e-3, atol=1e-4), \
            "Type {0}, diffs are: {1}".format(i, an - num)
Beispiel #2
0
    def hyperloss_grad(hyperparam_vect, ii):
        learning_curve = []
        params_curve = []

        def callback(x, i):
            params_curve.append(x)
            learning_curve.append(loss_fun(x))

        def indexed_loss_fun(w, log_L2_reg, j):
            return loss_fun(w)

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        W0 = init_params
        V0 = cur_hyperparams['V0']
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas = logit(cur_hyperparams['invlogit_betas'])
        log_L2_reg = 0.0
        results = sgd3(indexed_loss_fun,
                       loss_fun,
                       W0,
                       V0,
                       alphas,
                       betas,
                       log_L2_reg,
                       callback=callback)
        hypergrads = hyperparams.copy()
        hypergrads['V0'] = results['dMd_v'] * 0
        hypergrads['log_alphas'] = results['dMd_alphas'] * alphas
        hypergrads['invlogit_betas'] = (
            results['dMd_betas'] * d_logit(cur_hyperparams['invlogit_betas']))
        all_x.append(results['x_final'])
        all_learning_curves.append(learning_curve)
        all_param_curves.append(params_curve)
        return hypergrads.vect
Beispiel #3
0
    def hyperloss_grad(hyperparam_vect, i):
        learning_curve = []
        def callback(x, i):
            if i % len(batch_idxs) == 0:
                learning_curve.append(loss_fun(x, X=train_images, T=train_labels))

        npr.seed(i)
        N_weights = parser.vect.size
        V0 = np.zeros(N_weights)

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        layer_param_scale = [np.full(parser[name].size, 
                                     np.exp(cur_hyperparams['log_param_scale'][i]))
                             for i, name in enumerate(parser.names)]
        W0 = npr.randn(N_weights) * np.concatenate(layer_param_scale, axis=0)
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas = logit(cur_hyperparams['invlogit_betas'])
        log_L2_reg = cur_hyperparams['log_L2_reg']
        results = sgd3(indexed_loss_fun, valid_loss_fun, W0, V0,
                       alphas, betas, log_L2_reg, callback=callback)
        hypergrads = hyperparams.copy()
        hypergrads['log_L2_reg']      = results['dMd_meta']
        weights_grad = parser.new_vect(W0 * results['dMd_x'])
        hypergrads['log_param_scale'] = [np.sum(weights_grad[name])
                                         for name in parser.names]
        hypergrads['log_alphas']      = results['dMd_alphas'] * alphas
        hypergrads['invlogit_betas']  = (results['dMd_betas'] *
                                         d_logit(cur_hyperparams['invlogit_betas']))
        all_x.append(results['x_final'])
        all_learning_curves.append(learning_curve)
        return hypergrads.vect
Beispiel #4
0
    def hyperloss_grad(hyperparam_vect, ii):
        learning_curve = []
        params_curve = []
        def callback(x, i):
            params_curve.append(x)
            learning_curve.append(loss_fun(x))

        def indexed_loss_fun(w, log_L2_reg, j):
            return loss_fun(w)

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        W0 = np.ones(N_weights) * init_param_scale
        V0 = cur_hyperparams['V0']
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas = logit(cur_hyperparams['invlogit_betas'])
        log_L2_reg = 0.0
        results = sgd3(indexed_loss_fun, loss_fun, W0, V0,
                       alphas, betas, log_L2_reg, callback=callback)
        hypergrads = hyperparams.copy()
        hypergrads['V0']              = results['dMd_v']
        hypergrads['log_alphas']      = results['dMd_alphas'] * alphas
        hypergrads['invlogit_betas']  = (results['dMd_betas'] *
                                         d_logit(cur_hyperparams['invlogit_betas']))
        all_x.append(results['x_final'])
        all_learning_curves.append(learning_curve)
        all_param_curves.append(params_curve)
        return hypergrads.vect
Beispiel #5
0
    def hyperloss_grad(hyperparam_vect, i):
        learning_curve = []
        def callback(x, i):
            if i % len(batch_idxs) == 0:
                learning_curve.append(loss_fun(x, X=train_images, T=train_labels))

        npr.seed(i)
        N_weights = parser.vect.size
        V0 = np.zeros(N_weights)

        cur_hyperparams = hyperparams.new_vect(hyperparam_vect)
        layer_param_scale = [np.full(parser[name].size, 
                                     np.exp(cur_hyperparams['log_param_scale'][i]))
                             for i, name in enumerate(parser.names)]
        W0 = npr.randn(N_weights) * np.concatenate(layer_param_scale, axis=0)
        alphas = np.exp(cur_hyperparams['log_alphas'])
        betas = logit(cur_hyperparams['invlogit_betas'])
        log_L2_reg = cur_hyperparams['log_L2_reg']
        results = sgd3(indexed_loss_fun, valid_loss_fun, W0, V0,
                       alphas, betas, log_L2_reg, callback=callback)
        hypergrads = hyperparams.copy()
        hypergrads['log_L2_reg']      = results['dMd_meta']
        weights_grad = parser.new_vect(W0 * results['dMd_x'])
        hypergrads['log_param_scale'] = [np.sum(weights_grad[name])
                                         for name in parser.names]
        hypergrads['log_alphas']      = results['dMd_alphas'] * alphas
        hypergrads['invlogit_betas']  = (results['dMd_betas'] *
                                         d_logit(cur_hyperparams['invlogit_betas']))
        all_x.append(results['x_final'])
        all_learning_curves.append(learning_curve)
        return hypergrads.vect
Beispiel #6
0
def test_sgd3():
    N_weights = 5
    W0 = 0.1 * npr.randn(N_weights)
    V0 = 0.1 * npr.randn(N_weights)
    N_data = 12
    batch_size = 4
    num_epochs = 3
    batch_idxs = BatchList(N_data, batch_size)
    alphas = 0.1 * npr.rand(len(batch_idxs) * num_epochs)
    betas = 0.5 + 0.2 * npr.rand(len(batch_idxs) * num_epochs)
    meta = 0.1 * npr.randn(N_weights * 2)

    A = npr.randn(N_data, N_weights)

    def loss_fun(W, meta, i=None):
        idxs = batch_idxs.all_idxs if i is None else batch_idxs[i % len(batch_idxs)]
        sub_A = A[idxs, :]
        return np.dot(np.dot(W + meta[:N_weights] + meta[N_weights:], np.dot(sub_A.T, sub_A)), W)

    def meta_loss_fun(w, meta):
        return np.dot(w, w) + np.dot(meta, meta)

    def full_loss(W0, V0, alphas, betas, meta):
        result = sgd3(loss_fun, meta_loss_fun, W0, V0, alphas, betas, meta)
        return loss_fun(result['x_final'], meta)

    def meta_loss(W0, V0, alphas, betas, meta):
        result = sgd3(loss_fun, meta_loss_fun, W0, V0, alphas, betas, meta)
        return meta_loss_fun(result['x_final'], meta)

    result = sgd3(loss_fun, meta_loss_fun, W0, V0, alphas, betas, meta)
    d_an = (result['dMd_x'], result['dMd_v'], result['dMd_alphas'],
            result['dMd_betas'], result['dMd_meta'])
    d_num = nd(meta_loss, W0, V0, alphas, betas, meta)
    for i, (an, num) in enumerate(zip(d_an, d_num)):
        assert np.allclose(an, num, rtol=1e-3, atol=1e-4), \
            "Type {0}, diffs are: {1}".format(i, an - num)

    result = sgd3(loss_fun, loss_fun, W0, V0, alphas, betas, meta)
    d_an = (result['dMd_x'], result['dMd_v'], result['dMd_alphas'],
            result['dMd_betas'], result['dMd_meta'])
    d_num = nd(full_loss, W0, V0, alphas, betas, meta)
    for i, (an, num) in enumerate(zip(d_an, d_num)):
        assert np.allclose(an, num, rtol=1e-3, atol=1e-4), \
            "Type {0}, diffs are: {1}".format(i, an - num)
Beispiel #7
0
 def meta_loss(W0, V0, alphas, betas, meta):
     result = sgd3(loss_fun, meta_loss_fun, W0, V0, alphas, betas, meta)
     return meta_loss_fun(result['x_final'], meta)