Beispiel #1
0
def test_scalar_value():
    npr.seed(1)

    for ii in xrange(NUM_TRIALS):
        np_X = npr.randn()

        X = kayak.Parameter(np_X)
        out = kayak.L1Norm(X)

        assert close_float(out.value, np.abs(np_X))
Beispiel #2
0
def test_matrix_value():
    npr.seed(7)

    for ii in xrange(NUM_TRIALS):
        np_X = npr.randn(10, 20)
        wt = np.exp(npr.randn())

        X = kayak.Parameter(np_X)
        out = kayak.L1Norm(X, weight=wt)

        assert close_float(out.value, wt * np.sum(np.abs(np_X)))
Beispiel #3
0
def test_scalar_value_2():
    npr.seed(3)

    for ii in xrange(NUM_TRIALS):
        np_X = npr.randn()
        wt = np.exp(npr.randn())

        X = kayak.Parameter(np_X)
        out = kayak.L1Norm(X, weight=wt)

        assert close_float(out.value, wt * np.abs(np_X))
Beispiel #4
0
def test_scalar_grad():
    npr.seed(2)

    for ii in xrange(NUM_TRIALS):
        while True:
            np_X = npr.randn()
            if np.abs(np_X) > 0.1:
                break

        X = kayak.Parameter(np_X)
        out = kayak.L1Norm(X)

        assert close_float(out.grad(X), np.sign(np_X))
        assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
Beispiel #5
0
def test_vector_grad():
    npr.seed(6)

    for ii in xrange(NUM_TRIALS):
        while True:
            np_X = npr.randn()
            if np.all(np.abs(np_X) > 0.1):
                break
        wt = np.exp(npr.randn())

        X = kayak.Parameter(np_X)
        out = kayak.L1Norm(X, weight=wt)

        assert np.all(close_float(out.grad(X), wt * np.sign(np_X)))
        assert kayak.util.checkgrad(X, out) < MAX_GRAD_DIFF
Beispiel #6
0
def train(inputs, targets, batch_size, learn_rate, momentum, l1_weight,
          l2_weight, dropout, improvement_thresh):

    # Create a batcher object.
    batcher = kayak.Batcher(batch_size, inputs.shape[0])

    # Inputs and targets need access to the batcher.
    X = kayak.Inputs(inputs, batcher)
    T = kayak.Targets(targets, batcher)

    # Put some dropout regularization on the inputs
    H = kayak.Dropout(X, dropout)

    # Weights and biases, with random initializations.
    W = kayak.Parameter(0.1 * npr.randn(inputs.shape[1], 10))
    B = kayak.Parameter(0.1 * npr.randn(1, 10))

    # Nothing fancy here: inputs times weights, plus bias, then softmax.
    Y = kayak.LogSoftMax(kayak.ElemAdd(kayak.MatMult(H, W), B))

    # The training loss is negative multinomial log likelihood.
    loss = kayak.MatAdd(kayak.MatSum(kayak.LogMultinomialLoss(Y, T)),
                        kayak.L2Norm(W, l2_weight), kayak.L1Norm(W, l1_weight))

    # Use momentum for the gradient-based optimization.
    mom_grad_W = np.zeros(W.shape)

    best_loss = np.inf
    best_epoch = -1

    # Loop over epochs.
    for epoch in range(100):

        # Track the total loss.
        total_loss = 0.0

        # Loop over batches -- using batcher as iterator.
        for batch in batcher:

            # Draw new random dropouts
            H.draw_new_mask()

            # Compute the loss of this minibatch by asking the Kayak
            # object for its value and giving it reset=True.
            total_loss += loss.value

            # Now ask the loss for its gradient in terms of the
            # weights and the biases -- the two things we're trying to
            # learn here.
            grad_W = loss.grad(W)
            grad_B = loss.grad(B)

            # Use momentum on the weight gradient.
            mom_grad_W *= momentum
            mom_grad_W += (1.0 - momentum) * grad_W

            # Now make the actual parameter updates.
            W.value -= learn_rate * mom_grad_W
            B.value -= learn_rate * grad_B

        print("Epoch: %d, total loss: %f" % (epoch, total_loss))

        if not np.isfinite(total_loss):
            print("Training diverged. Returning constraint violation.")
            break

        if total_loss < best_loss:
            best_epoch = epoch
        else:
            if (epoch - best_epoch) > improvement_thresh:
                print("Has been %d epochs without improvement. Aborting." %
                      (epoch - best_epoch))
                break

    # After we've trained, we return a sugary little function handle
    # that makes things easy.  Basically, what we're doing here is
    # simply replacing the inputs in the above defined graph and then
    # running through it to produce the outputs.
    # The point here is that we wind up with a function
    # handle the can be called with a numpy object and it produces the
    # target values for novel data, using the parameters we just learned.
    def predict(x):
        X.value = x
        H.reinstate_units()
        return Y.value

    return predict
Beispiel #7
0
kyk_B2 = kayak.Parameter(npr.randn(1, P))

# Second layer multiplication.
kyk_out = kayak.Dropout(kayak.HardReLU(
    kayak.ElemAdd(kayak.MatMult(kyk_H1, kyk_W2), kyk_B2)),
                        drop_prob=0.5,
                        batcher=batcher)

# Elementwise Loss.
kyk_el_loss = kayak.L2Loss(kyk_out, kyk_targets)

# Sum the losses.
kyk_loss = kayak.MatSum(kyk_el_loss)

# Roll in the weight regularization.
kyk_obj = kayak.ElemAdd(kyk_loss, kayak.L1Norm(kyk_W1, weight=100.0),
                        kayak.L1Norm(kyk_W2, weight=100.0))

print "W2:", kayak.util.checkgrad(kyk_W2, kyk_obj)
print "B2:", kayak.util.checkgrad(kyk_B2, kyk_obj)
print "W1:", kayak.util.checkgrad(kyk_W1, kyk_obj)
print "B1:", kayak.util.checkgrad(kyk_B1, kyk_obj)

t0 = time.time()
for ii in xrange(10):

    for batch in batcher:
        val = kyk_obj.value
        grad_W1 = kyk_obj.grad(kyk_W1)
        grad_B1 = kyk_obj.grad(kyk_B1)
        grad_W2 = kyk_obj.grad(kyk_W2)
Beispiel #8
0
def train(inputs, targets, batch_size, learn_rate, momentum, l1_weight, l2_weight, dropout):

    # Create a batcher object.
    batcher = kayak.Batcher(batch_size, inputs.shape[0])

    # Inputs and targets need access to the batcher.
    X    = kayak.Inputs(inputs, batcher)
    T    = kayak.Targets(targets, batcher)

    # Weights and biases, with random initializations.
    W    = kayak.Parameter( 0.1*npr.randn( inputs.shape[1], 10 ))
    B    = kayak.Parameter( 0.1*npr.randn(1,10) )

    # Nothing fancy here: inputs times weights, plus bias, then softmax.
    dropout_layer = kayak.Dropout(X, dropout, batcher=batcher)
    Y    = kayak.LogSoftMax( kayak.ElemAdd( kayak.MatMult(dropout_layer, W), B ) )

    # The training loss is negative multinomial log likelihood.
    loss = kayak.MatAdd(kayak.MatSum(kayak.LogMultinomialLoss(Y, T)),
                        kayak.L2Norm(W, l2_weight),
                        kayak.L1Norm(W, l1_weight))

    # Use momentum for the gradient-based optimization.
    mom_grad_W = np.zeros(W.shape)

    # Loop over epochs.
    for epoch in xrange(10):

        # Track the total loss and the overall gradient.
        total_loss   = 0.0
        total_grad_W = np.zeros(W.shape)

        # Loop over batches -- using batcher as iterator.
        for batch in batcher:
            # Compute the loss of this minibatch by asking the Kayak
            # object for its value and giving it reset=True.
            total_loss += loss.value

            # Now ask the loss for its gradient in terms of the
            # weights and the biases -- the two things we're trying to
            # learn here.
            grad_W = loss.grad(W)
            grad_B = loss.grad(B)
            
            # Use momentum on the weight gradient.
            mom_grad_W = momentum*mom_grad_W + (1.0-momentum)*grad_W

            # Now make the actual parameter updates.
            W.value -= learn_rate * mom_grad_W
            B.value -= learn_rate * grad_B

            # Keep track of the gradient to see if we're converging.
            total_grad_W += grad_W

        #print epoch, total_loss, np.sum(total_grad_W**2)

    # After we've trained, we return a sugary little function handle
    # that makes things easy.  Basically, what we're doing here is
    # handing the output object (not the loss!) a dictionary where the
    # key is the Kayak input object 'X' (that is the features being
    # used here for logistic regression) and the value in that
    # dictionary is being determined by the argument to the lambda
    # expression.  The point here is that we wind up with a function
    # handle the can be called with a numpy object and it produces the
    # target values for novel data, using the parameters we just learned.
    
    def compute_predictions(x):
        X.data = x
        batcher.test_mode()
        return Y.value

    return compute_predictions