b2 = TT.dvector() from theano.tensor.nnet import crossentropy_softmax_argmax_1hot_with_bias from theano.compile.function_module import function xw1 = theano.dot(w1.T, x.T).T h = ACTIVATION_FUNCTION(xw1 + b1) if HLAYERS == 2: xwh = theano.dot(wh.T, h.T).T h = ACTIVATION_FUNCTION(xwh + bh) #zero = tensor.zeros_like(x[0,:]) if HYPERPARAMETERS["locally normalize"]: (kl, softmax, argmax) = crossentropy_softmax_argmax_1hot_with_bias(theano.dot(h, w2), b2, targety) else: prey = theano.dot(h, w2) + b2 softmax = nnet.sigmoid(prey) kl = -TT.mean(TT.sum(targety * TT.log(softmax) + (1 - targety) * TT.log(1 - softmax), axis=1), axis=0) argmax = TT.argmax(softmax) if HLAYERS == 2: validatefn = function([x, targety, w1, b1, wh, bh, w2, b2], [kl, softmax, argmax, xw1, xwh], mode=COMPILE_MODE) (gw1, gb1, gwh, gbh, gw2, gb2) = TT.grad(kl, [w1, b1, wh, bh, w2, b2]) trainfn = function([x, targety, w1, b1, wh, bh, w2, b2], [kl, softmax, argmax, xw1, xwh, theano.compile.io.Out(gw1, borrow = True), gb1, gwh, gbh, gw2, gb2], mode=COMPILE_MODE) else: validatefn = function([x, targety, w1, b1, w2, b2], [kl, softmax, argmax, xw1], mode=COMPILE_MODE) (gw1, gb1, gw2, gb2) = TT.grad(kl, [w1, b1, w2, b2]) trainfn = function([x, targety, w1, b1, w2, b2], [kl, softmax, argmax, xw1, theano.compile.io.Out(gw1, borrow = True), gb1, gw2, gb2], mode=COMPILE_MODE)
xw1 = theano.dot(w1.T, x.T).T h = ACTIVATION_FUNCTION(xw1 + b1) if HLAYERS == 2: xwh = theano.dot(wh.T, h.T).T h = ACTIVATION_FUNCTION(xwh + bh) #zero = tensor.zeros_like(x[0,:]) (kl, softmax, argmax) = crossentropy_softmax_argmax_1hot_with_bias(theano.dot(h, w2), b2, targety) if HLAYERS == 2: validatefn = function([x, targety, w1, b1, wh, bh, w2, b2], [kl, softmax, argmax, xw1, xwh], mode=COMPILE_MODE) (gw1, gb1, gwh, gbh, gw2, gb2) = TT.grad(kl, [w1, b1, wh, bh, w2, b2]) trainfn = function([x, targety, w1, b1, wh, bh, w2, b2], [ kl, softmax, argmax, xw1, xwh, theano.compile.io.Out(gw1, borrow=True), gb1, gwh, gbh, gw2, gb2 ], mode=COMPILE_MODE) else: validatefn = function([x, targety, w1, b1, w2, b2], [kl, softmax, argmax, xw1], mode=COMPILE_MODE) (gw1, gb1, gw2, gb2) = TT.grad(kl, [w1, b1, w2, b2]) trainfn = function([x, targety, w1, b1, w2, b2], [ kl, softmax, argmax, xw1, theano.compile.io.Out(gw1, borrow=True), gb1, gw2, gb2
b1R = TT.dvector('b1') w2R = TT.dmatrix('w2') b2R = TT.dvector('b2') import pylearn.algorithms.cost as cost from theano.compile.function_module import function #xw1R = theano.dot(w1R.T, xR.T).T xw1R = TS.structured_dot(w1R.T, xR.T).T #print w1R.type #print xR.type hR = ACTIVATION_FUNCTION(xw1R + b1R) yR = nnet.sigmoid(theano.dot(hR, w2R).T + b2R) loss = cost.KL_divergence(targR, yR) fn = function([xR, targR, w1R, b1R, w2R, b2R], [yR, loss], mode=COMPILE_MODE) (gw1, gb1, gw2, gb2) = TT.grad(loss, [w1R, b1R, w2R, b2R]) trainfn = function([xR, targR, w1R, b1R, w2R, b2R], [yR, loss, theano.compile.io.Out(gw1, borrow = True), gb1, gw2, gb2, hR], mode=COMPILE_MODE) #print type(hR), type(yR) print "TRAINING" nex = xinstances.shape[0] for epoch in range(EPOCHS): print "Epoch #", epoch for j in range(nex): #print "Example #", j x = xinstances[j,:] # #print "x", x.todense() # #print x.indices targety = targets[j,:] #print "target y", targety