Ejemplo n.º 1
0
def test_sgd_parser():
    N_weights = 6
    W0 = 0.1 * npr.randn(N_weights)
    N_data = 12
    batch_size = 4
    num_epochs = 4
    batch_idxs = BatchList(N_data, batch_size)

    parser = VectorParser()
    parser.add_shape('first',  [2,])
    parser.add_shape('second', [1,])
    parser.add_shape('third',  [3,])
    N_weight_types = 3

    alphas = 0.1 * npr.rand(len(batch_idxs) * num_epochs, N_weight_types)
    betas = 0.5 + 0.2 * npr.rand(len(batch_idxs) * num_epochs, N_weight_types)
    meta = 0.1 * npr.randn(N_weights*2)

    A = npr.randn(N_data, N_weights)
    def loss_fun(W, meta, i=None):
        idxs = batch_idxs.all_idxs if i is None else batch_idxs[i % len(batch_idxs)]
        sub_A = A[idxs, :]
        return np.dot(np.dot(W + meta[:N_weights] + meta[N_weights:], np.dot(sub_A.T, sub_A)), W)

    def full_loss(params):
        (W0, alphas, betas, meta) = params
        result = sgd_parsed(grad(loss_fun), kylist(W0, alphas, betas, meta), parser)
        return loss_fun(result, meta)

    d_num = nd(full_loss, (W0, alphas, betas, meta))
    d_an_fun = grad(full_loss)
    d_an = d_an_fun([W0, alphas, betas, meta])
    for i, (an, num) in enumerate(zip(d_an, d_num[0])):
        assert np.allclose(an, num, rtol=1e-3, atol=1e-4), \
            "Type {0}, diffs are: {1}".format(i, an - num)
Ejemplo n.º 2
0
def make_parabola(d):
    parser = VectorParser()
    parser.add_shape('weights', d)
    dimscale = np.exp(np.linspace(-3, 3, d))
    offset = npr.randn(d)

    def loss(w, X=0.0, T=0.0, L2_reg=0.0):
        return np.dot((w - offset) * dimscale, (w - offset))

    return parser, loss
Ejemplo n.º 3
0
def make_parabola(d):
    parser = VectorParser()
    parser.add_shape('weights', d)
    dimscale = np.exp(np.linspace(-3, 3, d))
    offset = npr.randn(d)

    def loss(w, X=0.0, T=0.0, L2_reg=0.0):
        return np.dot((w - offset) * dimscale, (w - offset))

    return parser, loss
Ejemplo n.º 4
0
def make_toy_funs():
    parser = VectorParser()
    parser.add_shape('weights', 2)

    def rosenbrock(x):
        return sum(100.0*(x[1:]-x[:-1]**2.0)**2.0 + (1-x[:-1])**2.0)

    def loss(W_vect, X=0.0, T=0.0, L2_reg=0.0):
        return 500 * logit(rosenbrock(W_vect) / 500)

    return parser, loss
Ejemplo n.º 5
0
def make_toy_funs():
    parser = VectorParser()
    parser.add_shape('weights', 2)

    def rosenbrock(x):
        return sum(100.0 * (x[1:] - x[:-1]**2.0)**2.0 + (1 - x[:-1])**2.0)

    def loss(W_vect, X=0.0, T=0.0, L2_reg=0.0):
        return 500 * logit(rosenbrock(W_vect) / 500)

    return parser, loss
Ejemplo n.º 6
0
def make_toy_funs():
    parser = VectorParser()
    parser.add_shape('weights', 2)

    def rosenbrock(w):
        x = w[1:]
        y = w[:-1]
        return sum(100.0 * (x - y**2.0)**2.0 + (1 - y)**2.0 + 200.0 * y)

    def loss(W_vect, X=0.0, T=0.0, L2_reg=0.0):
        return 800 * logit(rosenbrock(W_vect) / 500)

    return parser, loss
Ejemplo n.º 7
0
def make_toy_funs():
    parser = VectorParser()
    parser.add_shape("weights", 2)

    def rosenbrock(w):
        x = w[1:]
        y = w[:-1]
        return sum(100.0 * (x - y ** 2.0) ** 2.0 + (1 - y) ** 2.0 + 200.0 * y)

    def loss(W_vect, X=0.0, T=0.0, L2_reg=0.0):
        return 800 * logit(rosenbrock(W_vect) / 500)

    return parser, loss
Ejemplo n.º 8
0
def test_sgd_parser():
    N_weights = 6
    W0 = 0.1 * npr.randn(N_weights)
    N_data = 12
    batch_size = 4
    num_epochs = 4
    batch_idxs = BatchList(N_data, batch_size)

    parser = VectorParser()
    parser.add_shape('first', [
        2,
    ])
    parser.add_shape('second', [
        1,
    ])
    parser.add_shape('third', [
        3,
    ])
    N_weight_types = 3

    alphas = 0.1 * npr.rand(len(batch_idxs) * num_epochs, N_weight_types)
    betas = 0.5 + 0.2 * npr.rand(len(batch_idxs) * num_epochs, N_weight_types)
    meta = 0.1 * npr.randn(N_weights * 2)

    A = npr.randn(N_data, N_weights)

    def loss_fun(W, meta, i=None):
        idxs = batch_idxs.all_idxs if i is None else batch_idxs[
            i % len(batch_idxs)]
        sub_A = A[idxs, :]
        return np.dot(
            np.dot(W + meta[:N_weights] + meta[N_weights:],
                   np.dot(sub_A.T, sub_A)), W)

    def full_loss(params):
        (W0, alphas, betas, meta) = params
        result = sgd_parsed(grad(loss_fun), kylist(W0, alphas, betas, meta),
                            parser)
        return loss_fun(result, meta)

    d_num = nd(full_loss, (W0, alphas, betas, meta))
    d_an_fun = grad(full_loss)
    d_an = d_an_fun([W0, alphas, betas, meta])
    for i, (an, num) in enumerate(zip(d_an, d_num[0])):
        assert np.allclose(an, num, rtol=1e-3, atol=1e-4), \
            "Type {0}, diffs are: {1}".format(i, an - num)
Ejemplo n.º 9
0
        m = b1t * g + (1 - b1t) * m
        v = b2 * (g**2) + (1 - b2) * v
        mhat = m / (1 - (1 - b1)**(i + 1))
        vhat = v / (1 - (1 - b2)**(i + 1))
        x -= step_size * mhat / (np.sqrt(vhat) + eps)

    return x


# --
# Make NN functions

parser = VectorParser()
for i, shape in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
    parser.add_shape(('weights', i), shape)
    parser.add_shape(('biases', i), (1, shape[1]))


def pred_fun(W_vect, X):
    """Outputs normalized log-probabilities."""
    W = parser.new_vect(W_vect)
    cur_units = X
    N_iter = len(layer_sizes) - 1
    for i in range(N_iter):
        cur_W = W[('weights', i)]
        cur_B = W[('biases', i)]
        cur_units = np.dot(cur_units, cur_W) + cur_B
        if i == (N_iter - 1):
            cur_units = cur_units - logsumexp(cur_units, axis=1)
        else: