def online_mlp_784_500_10():
    HUs=500
    w = shared(rand(HUs, inputs) * numpy.sqrt(6 / (inputs + HUs)))
    b = shared(zeros(HUs))
    v = shared(zeros(outputs,HUs))
    c = shared(zeros(outputs))
    si = shared(0)    # current training example index
    sx = data_x[si]
    sy = data_y[si]

    nll, p_y_given_x, _argmax = crossentropy_softmax_argmax_1hot_with_bias(
            dot(tanh(dot(sx, w.T)+b), v.T).dimshuffle('x', 0),
            c,
            sy.dimshuffle('x'))
    cost = nll.mean()
    gw, gb, gv, gc = grad(cost, [w, b, v, c])
    train = function([], [],
            updates={
                w:w - lr * gw,
                b:b - lr * gb,
                v:v - lr * gv,
                c:c - lr * gc,
                si: (si + 1) % n_examples})
    theano.printing.debugprint(train, file=open('foo_train', 'wb'))
    t = time.time()
    train.fn(n_calls=n_examples)
    dt = time.time() - t
    try:
        train.fn.update_profile(train.profile)
    except AttributeError:
        pass
    reportmodel('mlp_784_500_10_hack', 1, dt)
Beispiel #2
0
def online_mlp_784_500_10():
    assert False, "This is old stuff not up to date that you probably don't need"
    HUs = 500
    w = shared(rand(HUs, inputs) * numpy.sqrt(6 / (inputs + HUs)))
    b = shared(zeros(HUs))
    v = shared(zeros(outputs, HUs))
    c = shared(zeros(outputs))
    si = shared(0)  # current training example index
    sx = data_x[si]
    sy = data_y[si]

    nll, p_y_given_x, _argmax = crossentropy_softmax_argmax_1hot_with_bias(
        dot(tanh(dot(sx, w.T) + b), v.T).dimshuffle('x', 0), c,
        sy.dimshuffle('x'))
    cost = nll.mean()
    gw, gb, gv, gc = grad(cost, [w, b, v, c])
    train = function(
        [], [],
        updates={
            w: w - lr * gw,
            b: b - lr * gb,
            v: v - lr * gv,
            c: c - lr * gc,
            si: (si + 1) % n_examples
        })
    #theano.printing.debugprint(train, file=open('foo_train', 'wb'))
    GlobalBenchReporter.simple_eval_model(train, "mlp_784_500_10_hack")
    try:
        train.fn.update_profile(train.profile)
    except AttributeError:
        pass
Beispiel #3
0
def online_mlp_784_1000_1000_1000_10():
    assert False, "This is old stuff not up to date that you probably don't need"
    w0 = shared(rand(inputs, 1000) * numpy.sqrt(6 / (inputs + 1000)))
    b0 = shared(zeros(1000))
    w1 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000)))
    b1 = shared(zeros(1000))
    w2 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000)))
    b2 = shared(zeros(1000))
    v = shared(zeros(1000, outputs))
    c = shared(zeros(outputs))
    params = [w0, b0, w1, b1, w2, b2, v, c]

    si = shared(0)  # current training example index
    sx = data_x[si]
    sy = data_y[si]
    h0 = tanh(dot(sx, w0) + b0)
    h1 = tanh(dot(h0, w1) + b1)
    h2 = tanh(dot(h1, w2) + b2)

    nll, p_y_given_x, _argmax = crossentropy_softmax_argmax_1hot_with_bias(
        dot(h2, v).dimshuffle('x', 0), c, sy.dimshuffle('x'))
    cost = nll.mean()
    gparams = grad(cost, params)
    updates = [(p, p - lr * gp) for p, gp in zip(params, gparams)]
    updates += [(si, (si + 1) % n_examples)]
    train = function([], [], updates=updates)
    #theano.printing.debugprint(train, file=open('foo_train', 'wb'))
    GlobalBenchReporter.simple_eval_model(train,
                                          "mlp_784_1000_1000_1000_10_hack")
    try:
        train.fn.update_profile(train.profile)
    except AttributeError:
        pass
Beispiel #4
0
def online_mlp_784_500_10():
    assert False, "This is old stuff not up to date that you probably don't need"
    HUs = 500
    w = shared(rand(HUs, inputs) * numpy.sqrt(6 / (inputs + HUs)))
    b = shared(zeros(HUs))
    v = shared(zeros(outputs, HUs))
    c = shared(zeros(outputs))
    si = shared(0)    # current training example index
    sx = data_x[si]
    sy = data_y[si]

    nll, p_y_given_x, _argmax = crossentropy_softmax_argmax_1hot_with_bias(
            dot(tanh(dot(sx, w.T) + b), v.T).dimshuffle('x', 0),
            c,
            sy.dimshuffle('x'))
    cost = nll.mean()
    gw, gb, gv, gc = grad(cost, [w, b, v, c])
    train = function([], [],
            updates={
                w: w - lr * gw,
                b: b - lr * gb,
                v: v - lr * gv,
                c: c - lr * gc,
                si: (si + 1) % n_examples})
    #theano.printing.debugprint(train, file=open('foo_train', 'wb'))
    GlobalBenchReporter.simple_eval_model(train, "mlp_784_500_10_hack")
    try:
        train.fn.update_profile(train.profile)
    except AttributeError:
        pass
Beispiel #5
0
def online_mlp_784_1000_1000_1000_10():
    assert False, "This is old stuff not up to date that you probably don't need"
    w0 = shared(rand(inputs, 1000) * numpy.sqrt(6 / (inputs + 1000)))
    b0 = shared(zeros(1000))
    w1 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000)))
    b1 = shared(zeros(1000))
    w2 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000 + 1000)))
    b2 = shared(zeros(1000))
    v = shared(zeros(1000, outputs))
    c = shared(zeros(outputs))
    params = [w0, b0, w1, b1, w2, b2, v, c]

    si = shared(0)    # current training example index
    sx = data_x[si]
    sy = data_y[si]
    h0 = tanh(dot(sx, w0) + b0)
    h1 = tanh(dot(h0, w1) + b1)
    h2 = tanh(dot(h1, w2) + b2)

    nll, p_y_given_x, _argmax = crossentropy_softmax_argmax_1hot_with_bias(
            dot(h2, v).dimshuffle('x', 0),
            c,
            sy.dimshuffle('x'))
    cost = nll.mean()
    gparams = grad(cost, params)
    updates = [(p, p - lr * gp) for p, gp in zip(params, gparams)]
    updates += [(si, (si + 1) % n_examples)]
    train = function([], [], updates=updates)
    #theano.printing.debugprint(train, file=open('foo_train', 'wb'))
    GlobalBenchReporter.simple_eval_model(train,
                                          "mlp_784_1000_1000_1000_10_hack")
    try:
        train.fn.update_profile(train.profile)
    except AttributeError:
        pass
Beispiel #6
0
def test_bug_2009_07_17_borrowed_output():
    """Regression test for a bug where output was borrowed by mistake."""
    a = theano.tensor.dmatrix()
    b = theano.tensor.dmatrix()
    # The output should *NOT* be borrowed.
    g = theano.function([a, b],
            theano.Out(theano.tensor.dot(a, b), borrow=False))
    
    x = numpy.zeros((1, 2))
    y = numpy.ones((2, 5))
    
    z = g(x, y)
    print z         # Should be zero.
    x.fill(1)
    print g(x, y)   # Should be non-zero.
    print z         # Should still be zero.
    assert numpy.linalg.norm(z) == 0

    # The code above was supposed to fail when it was written (or, more
    # accurately, on the next revision, i.e. when it was merged with the
    # rest of the code, i.e. on revision cac9c9e9f08e).
    # However, for some reason, it does not fail anymore when at this revision.
    # Thus, a new test (below) was added that exhibits the same issue. Note
    # that it may better be moved into the test_nnet.py test file if it turns
    # out the bug was caused by 'crossentropy_softmax_argmax_1hot_with_bias',
    # and was not a more general issue.
    test_output_activation_no_bias = theano.tensor.dmatrix()
    test_b2 = theano.tensor.dvector()
    test_target = theano.tensor.ivector()
    nll_softmax_argmax = (
            crossentropy_softmax_argmax_1hot_with_bias(
                test_output_activation_no_bias,
                test_b2,
                test_target))
    output = nll_softmax_argmax[1]
    g = theano.function([test_output_activation_no_bias, test_b2, test_target],
            theano.Out(output, borrow=False))
    
    a = numpy.zeros((1, 5))
    b = numpy.ones(5)
    c = numpy.zeros(1, dtype=numpy.int32)
    
    z = g(a, b, c)
    z_backup = copy.copy(z)
    id_z = id(z)
    print('Output z after first call: %s' % (z, ))
    a[0, 0] = 1
    id_other = id(g(a, b, c))
    print ('Output z after second call: %s' % (z, ))
    # Ensure that calling the function again returns a pointer towards a new
    # array.
    assert id_z != id_other
    # Just to be 100% sure, ensure that z was not altered.
    assert (z == z_backup).all()
def test_bug_2009_07_17_borrowed_output():
    # Regression test for a bug where output was borrowed by mistake.
    a = theano.tensor.dmatrix()
    b = theano.tensor.dmatrix()
    # The output should *NOT* be borrowed.
    g = theano.function([a, b],
            theano.Out(theano.tensor.dot(a, b), borrow=False))

    x = np.zeros((1, 2))
    y = np.ones((2, 5))

    z = g(x, y)
    print(z)         # Should be zero.
    x.fill(1)
    print(g(x, y))   # Should be non-zero.
    print(z)         # Should still be zero.
    assert np.linalg.norm(z) == 0

    # The code above was supposed to fail when it was written (or, more
    # accurately, on the next revision, i.e. when it was merged with the
    # rest of the code, i.e. on revision cac9c9e9f08e).
    # However, for some reason, it does not fail anymore when at this revision.
    # Thus, a new test (below) was added that exhibits the same issue. Note
    # that it may better be moved into the test_nnet.py test file if it turns
    # out the bug was caused by 'crossentropy_softmax_argmax_1hot_with_bias',
    # and was not a more general issue.
    test_output_activation_no_bias = theano.tensor.dmatrix()
    test_b2 = theano.tensor.dvector()
    test_target = theano.tensor.ivector()
    nll_softmax_argmax = (
            crossentropy_softmax_argmax_1hot_with_bias(
                test_output_activation_no_bias,
                test_b2,
                test_target))
    output = nll_softmax_argmax[1]
    g = theano.function([test_output_activation_no_bias, test_b2, test_target],
            theano.Out(output, borrow=False))

    a = np.zeros((1, 5))
    b = np.ones(5)
    c = np.zeros(1, dtype=np.int32)

    z = g(a, b, c)
    z_backup = copy.copy(z)
    id_z = id(z)
    print(('Output z after first call: %s' % (z, )))
    a[0, 0] = 1
    id_other = id(g(a, b, c))
    print(('Output z after second call: %s' % (z, )))
    # Ensure that calling the function again returns a pointer towards a new
    # array.
    assert id_z != id_other
    # Just to be 100% sure, ensure that z was not altered.
    assert (z == z_backup).all()
def online_mlp_784_10():
    v = shared(zeros(outputs, inputs))
    c = shared(zeros(outputs))
    si = shared(0)    # current training example index
    sx = data_x[si]
    sy = data_y[si]

    nll, p_y_given_x, _argmax = crossentropy_softmax_argmax_1hot_with_bias(
            dot(sx, v.T).dimshuffle('x', 0),
            c,
            sy.dimshuffle('x'))
    cost = nll.mean()
    gv, gc = grad(cost, [v, c])
    train = function([], [],
            updates={
                v:v - lr * gv,
                c:c - lr * gc,
                si: (si + 1) % n_examples})
    theano.printing.debugprint(train, file=open('foo_train', 'wb'))
    t = time.time()
    train.fn(n_calls=n_examples)
    dt = time.time() - t
    try:
        train.fn.update_profile(train.profile)
    except AttributeError:
        pass
    reportmodel('mlp_784_10_hack', 1, dt)
    if 1:
        t = time.time()
        for i in xrange(n_examples):
            train()
        dt = time.time() - t
        reportmodel('mlp_784_10_hack2', 1, dt)
    if 1:
        t = time.time()
        fn = train.fn
        for i in xrange(n_examples): fn()
        dt = time.time() - t
        reportmodel('mlp_784_10_hack3', 1, dt)
Beispiel #9
0
def online_mlp_784_10():
    assert False, "This is old stuff not up to date that you probably don't need"
    v = shared(zeros(outputs, inputs))
    c = shared(zeros(outputs))
    si = shared(0)    # current training example index
    sx = data_x[si]
    sy = data_y[si]

    nll, p_y_given_x, _argmax = crossentropy_softmax_argmax_1hot_with_bias(
            dot(sx, v.T).dimshuffle('x', 0),
            c,
            sy.dimshuffle('x'))
    cost = nll.mean()
    gv, gc = grad(cost, [v, c])
    train = function([], [],
            updates={
                v: v - lr * gv,
                c: c - lr * gc,
                si:  (si + 1) % n_examples})
    #theano.printing.debugprint(train, file=open('foo_train', 'wb'))
    GlobalBenchReporter.simple_eval_model(train, 'mlp_784_10_hack')
    try:
        train.fn.update_profile(train.profile)
    except AttributeError:
        pass
    if 1:
        t = time.time()
        for i in xrange(n_examples):
            train()
        dt = time.time() - t
        reportmodel('mlp_784_10_hack2', 1, dt)
    if 1:
        t = time.time()
        fn = train.fn
        for i in xrange(n_examples):
            fn()
        dt = time.time() - t
        reportmodel('mlp_784_10_hack3', 1, dt)
Beispiel #10
0
def online_mlp_784_10():
    assert False, "This is old stuff not up to date that you probably don't need"
    v = shared(zeros(outputs, inputs))
    c = shared(zeros(outputs))
    si = shared(0)  # current training example index
    sx = data_x[si]
    sy = data_y[si]

    nll, p_y_given_x, _argmax = crossentropy_softmax_argmax_1hot_with_bias(
        dot(sx, v.T).dimshuffle('x', 0), c, sy.dimshuffle('x'))
    cost = nll.mean()
    gv, gc = grad(cost, [v, c])
    train = function([], [],
                     updates={
                         v: v - lr * gv,
                         c: c - lr * gc,
                         si: (si + 1) % n_examples
                     })
    #theano.printing.debugprint(train, file=open('foo_train', 'wb'))
    GlobalBenchReporter.simple_eval_model(train, 'mlp_784_10_hack')
    try:
        train.fn.update_profile(train.profile)
    except AttributeError:
        pass
    if 1:
        t = time.time()
        for i in xrange(n_examples):
            train()
        dt = time.time() - t
        reportmodel('mlp_784_10_hack2', 1, dt)
    if 1:
        t = time.time()
        fn = train.fn
        for i in xrange(n_examples):
            fn()
        dt = time.time() - t
        reportmodel('mlp_784_10_hack3', 1, dt)
def online_mlp_784_1000_1000_1000_10():
    w0 = shared(rand(inputs, 1000) * numpy.sqrt(6 / (inputs + 1000)))
    b0 = shared(zeros(1000))
    w1 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000+1000)))
    b1 = shared(zeros(1000))
    w2 = shared(rand(1000, 1000) * numpy.sqrt(6 / (1000+1000)))
    b2 = shared(zeros(1000))
    v = shared(zeros(1000, outputs))
    c = shared(zeros(outputs))
    params=[w0,b0,w1,b1,w2,b2,v,c]

    si = shared(0)    # current training example index
    sx = data_x[si]
    sy = data_y[si]
    h0 = tanh(dot(sx, w0)+b0)
    h1 = tanh(dot(h0, w1)+b1)
    h2 = tanh(dot(h1, w2)+b2)

    nll, p_y_given_x, _argmax = crossentropy_softmax_argmax_1hot_with_bias(
            dot(h2, v).dimshuffle('x', 0),
            c,
            sy.dimshuffle('x'))
    cost = nll.mean()
    gparams = grad(cost, params)
    updates = [(p,p-lr*gp) for p,gp in zip(params, gparams)]
    updates += [(si, (si + 1) % n_examples)]
    train = function([], [], updates=updates)
    theano.printing.debugprint(train, file=open('foo_train', 'wb'))
    t = time.time()
    train.fn(n_calls=n_examples)
    dt = time.time() - t
    try:
        train.fn.update_profile(train.profile)
    except AttributeError:
        pass
    reportmodel('mlp_784_1000_1000_1000_10_hack', 1, dt)
Beispiel #12
0
w2 = TT.dmatrix()
b2 = TT.dvector()

from theano.tensor.nnet import crossentropy_softmax_argmax_1hot_with_bias
from theano.compile.function_module import function

xw1 = theano.dot(w1.T, x.T).T
h = ACTIVATION_FUNCTION(xw1 + b1)

if HLAYERS == 2:
    xwh = theano.dot(wh.T, h.T).T
    h = ACTIVATION_FUNCTION(xwh + bh)

#zero = tensor.zeros_like(x[0,:])
(kl, softmax,
 argmax) = crossentropy_softmax_argmax_1hot_with_bias(theano.dot(h, w2), b2,
                                                      targety)

if HLAYERS == 2:
    validatefn = function([x, targety, w1, b1, wh, bh, w2, b2],
                          [kl, softmax, argmax, xw1, xwh],
                          mode=COMPILE_MODE)
    (gw1, gb1, gwh, gbh, gw2, gb2) = TT.grad(kl, [w1, b1, wh, bh, w2, b2])
    trainfn = function([x, targety, w1, b1, wh, bh, w2, b2], [
        kl, softmax, argmax, xw1, xwh,
        theano.compile.io.Out(gw1, borrow=True), gb1, gwh, gbh, gw2, gb2
    ],
                       mode=COMPILE_MODE)
else:
    validatefn = function([x, targety, w1, b1, w2, b2],
                          [kl, softmax, argmax, xw1],
                          mode=COMPILE_MODE)
Beispiel #13
0
b2 = TT.dvector()

from theano.tensor.nnet import crossentropy_softmax_argmax_1hot_with_bias
from theano.compile.function_module import function

xw1 = theano.dot(w1.T, x.T).T
h = ACTIVATION_FUNCTION(xw1 + b1)

if HLAYERS == 2:
    xwh = theano.dot(wh.T, h.T).T
    h = ACTIVATION_FUNCTION(xwh + bh)

#zero = tensor.zeros_like(x[0,:])

if HYPERPARAMETERS["locally normalize"]:
    (kl, softmax, argmax) = crossentropy_softmax_argmax_1hot_with_bias(theano.dot(h, w2), b2, targety)
else:
    prey = theano.dot(h, w2) + b2
    softmax = nnet.sigmoid(prey)
    kl = -TT.mean(TT.sum(targety * TT.log(softmax) + (1 - targety) * TT.log(1 - softmax), axis=1), axis=0)
    argmax = TT.argmax(softmax)

if HLAYERS == 2:
    validatefn = function([x, targety, w1, b1, wh, bh, w2, b2], [kl, softmax, argmax, xw1, xwh], mode=COMPILE_MODE)
    (gw1, gb1, gwh, gbh, gw2, gb2) = TT.grad(kl, [w1, b1, wh, bh, w2, b2])
    trainfn = function([x, targety, w1, b1, wh, bh, w2, b2], [kl, softmax, argmax, xw1, xwh, theano.compile.io.Out(gw1, borrow = True), gb1, gwh, gbh, gw2, gb2], mode=COMPILE_MODE)
else:
    validatefn = function([x, targety, w1, b1, w2, b2], [kl, softmax, argmax, xw1], mode=COMPILE_MODE)
    (gw1, gb1, gw2, gb2) = TT.grad(kl, [w1, b1, w2, b2])
    trainfn = function([x, targety, w1, b1, w2, b2], [kl, softmax, argmax, xw1, theano.compile.io.Out(gw1, borrow = True), gb1, gw2, gb2], mode=COMPILE_MODE)